Skip to content

Commit

Permalink
[dxvk] Implement draw batching via VK_EXT_multi_draw
Browse files Browse the repository at this point in the history
  • Loading branch information
doitsujin committed Feb 21, 2025
1 parent 42a2640 commit 3fba046
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 79 deletions.
15 changes: 2 additions & 13 deletions src/d3d11/d3d11_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3578,12 +3578,7 @@ namespace dxvk {

EmitCsCmd<VkDrawIndirectCommand>(D3D11CmdType::Draw, 1u,
[] (DxvkContext* ctx, const VkDrawIndirectCommand* draws, size_t count) {
for (size_t i = 0; i < count; i++) {
ctx->draw(draws[i].vertexCount,
draws[i].instanceCount,
draws[i].firstVertex,
draws[i].firstInstance);
}
ctx->draw(count, draws);
});

new (m_csData->first()) VkDrawIndirectCommand(draw);
Expand All @@ -3608,13 +3603,7 @@ namespace dxvk {

EmitCsCmd<VkDrawIndexedIndirectCommand>(D3D11CmdType::DrawIndexed, 1u,
[] (DxvkContext* ctx, const VkDrawIndexedIndirectCommand* draws, size_t count) {
for (size_t i = 0; i < count; i++) {
ctx->drawIndexed(draws[i].indexCount,
draws[i].instanceCount,
draws[i].firstIndex,
draws[i].vertexOffset,
draws[i].firstInstance);
}
ctx->drawIndexed(count, draws);
});

new (m_csData->first()) VkDrawIndexedIndirectCommand(draw);
Expand Down
6 changes: 5 additions & 1 deletion src/d3d11/d3d11_video.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1312,7 +1312,11 @@ namespace dxvk {
for (uint32_t i = 0; i < cViews.size(); i++)
ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 1 + i, Rc<DxvkImageView>(cViews[i]));

ctx->draw(3, 1, 0, 0);
VkDrawIndirectCommand draw = { };
draw.vertexCount = 3u;
draw.instanceCount = 1u;

ctx->draw(1, &draw);

for (uint32_t i = 0; i < cViews.size(); i++)
ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 1 + i, nullptr);
Expand Down
45 changes: 28 additions & 17 deletions src/d3d9/d3d9_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2890,9 +2890,12 @@ namespace dxvk {

// Tests on Windows show that D3D9 does not do non-indexed instanced draws.

ctx->draw(
vertexCount, 1,
cStartVertex, 0);
VkDrawIndirectCommand draw = { };
draw.vertexCount = vertexCount;
draw.instanceCount = 1u;
draw.firstVertex = cStartVertex;

ctx->draw(1u, &draw);
});

return D3D_OK;
Expand Down Expand Up @@ -2939,10 +2942,13 @@ namespace dxvk {

ApplyPrimitiveType(ctx, cPrimType);

ctx->drawIndexed(
drawInfo.vertexCount, drawInfo.instanceCount,
cStartIndex,
cBaseVertexIndex, 0);
VkDrawIndexedIndirectCommand draw = { };
draw.indexCount = drawInfo.vertexCount;
draw.instanceCount = drawInfo.instanceCount;
draw.firstIndex = cStartIndex;
draw.vertexOffset = cBaseVertexIndex;

ctx->drawIndexed(1u, &draw);
});

return D3D_OK;
Expand Down Expand Up @@ -2981,11 +2987,12 @@ namespace dxvk {
ApplyPrimitiveType(ctx, cPrimType);

// Tests on Windows show that D3D9 does not do non-indexed instanced draws.
VkDrawIndirectCommand draw = { };
draw.vertexCount = cVertexCount;
draw.instanceCount = 1u;

ctx->bindVertexBuffer(0, std::move(cBufferSlice), cStride);
ctx->draw(
cVertexCount, 1,
0, 0);
ctx->draw(1u, &draw);
ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0);
});

Expand Down Expand Up @@ -3045,12 +3052,13 @@ namespace dxvk {

ApplyPrimitiveType(ctx, cPrimType);

VkDrawIndexedIndirectCommand draw = { };
draw.indexCount = drawInfo.vertexCount;
draw.instanceCount = drawInfo.instanceCount;

ctx->bindVertexBuffer(0, cBufferSlice.subSlice(0, cVertexSize), cStride);
ctx->bindIndexBuffer(cBufferSlice.subSlice(cVertexSize, cBufferSlice.length() - cVertexSize), cIndexType);
ctx->drawIndexed(
drawInfo.vertexCount, drawInfo.instanceCount,
0,
0, 0);
ctx->drawIndexed(1u, &draw);
ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0);
ctx->bindIndexBuffer(DxvkBufferSlice(), VK_INDEX_TYPE_UINT32);
});
Expand Down Expand Up @@ -3162,11 +3170,14 @@ namespace dxvk {
// to avoid val errors / UB.
ctx->bindShader<VK_SHADER_STAGE_FRAGMENT_BIT>(nullptr);

VkDrawIndirectCommand draw = { };
draw.vertexCount = drawInfo.vertexCount;
draw.instanceCount = drawInfo.instanceCount;
draw.firstVertex = cStartIndex;

ctx->bindShader<VK_SHADER_STAGE_GEOMETRY_BIT>(std::move(shader));
ctx->bindUniformBuffer(VK_SHADER_STAGE_GEOMETRY_BIT, getSWVPBufferSlot(), std::move(cBufferSlice));
ctx->draw(
drawInfo.vertexCount, drawInfo.instanceCount,
cStartIndex, 0);
ctx->draw(1u, &draw);
ctx->bindUniformBuffer(VK_SHADER_STAGE_GEOMETRY_BIT, getSWVPBufferSlot(), DxvkBufferSlice());
ctx->bindShader<VK_SHADER_STAGE_GEOMETRY_BIT>(nullptr);
});
Expand Down
170 changes: 143 additions & 27 deletions src/dxvk/dxvk_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ namespace dxvk {
if (m_device->features().khrMaintenance5.maintenance5)
m_features.set(DxvkContextFeature::IndexBufferRobustness);

// Check whether we can batch direct draws
if (m_device->features().extMultiDraw.multiDraw
&& m_device->properties().extMultiDraw.maxMultiDrawCount >= DirectMultiDrawBatchSize)
m_features.set(DxvkContextFeature::DirectMultiDraw);

// Add a fast path to query debug utils support
if (m_device->isDebugEnabled())
m_features.set(DxvkContextFeature::DebugUtils);
Expand Down Expand Up @@ -922,15 +927,9 @@ namespace dxvk {


void DxvkContext::draw(
uint32_t vertexCount,
uint32_t instanceCount,
uint32_t firstVertex,
uint32_t firstInstance) {
if (this->commitGraphicsState<false, false>()) {
m_cmd->cmdDraw(
vertexCount, instanceCount,
firstVertex, firstInstance);
}
uint32_t count,
const VkDrawIndirectCommand* draws) {
drawGeneric<false>(count, draws);
}


Expand All @@ -953,20 +952,12 @@ namespace dxvk {


void DxvkContext::drawIndexed(
uint32_t indexCount,
uint32_t instanceCount,
uint32_t firstIndex,
int32_t vertexOffset,
uint32_t firstInstance) {
if (this->commitGraphicsState<true, false>()) {
m_cmd->cmdDrawIndexed(
indexCount, instanceCount,
firstIndex, vertexOffset,
firstInstance);
}
uint32_t count,
const VkDrawIndexedIndirectCommand* draws) {
drawGeneric<true>(count, draws);
}


void DxvkContext::drawIndexedIndirect(
VkDeviceSize offset,
uint32_t count,
Expand Down Expand Up @@ -1689,6 +1680,114 @@ namespace dxvk {
}


template<bool Indexed, typename T>
void DxvkContext::drawGeneric(
uint32_t count,
const T* draws) {
if (this->commitGraphicsState<Indexed, false>()) {
bool needSingleDraw = count <= 1u || needsDrawBarriers();

if (needSingleDraw) {
for (uint32_t i = 0; i < count; i++) {
if (unlikely(i))
this->commitGraphicsState<Indexed, false>();

if constexpr (Indexed) {
const VkDrawIndexedIndirectCommand& draw = draws[i];

m_cmd->cmdDrawIndexed(draw.indexCount, draw.instanceCount,
draw.firstIndex, draw.vertexOffset, draw.firstInstance);
} else {
const VkDrawIndirectCommand& draw = draws[i];

m_cmd->cmdDraw(draw.vertexCount, draw.instanceCount,
draw.firstVertex, draw.firstInstance);
}
}
} else {
using MultiDrawInfo = std::conditional_t<Indexed,
VkMultiDrawIndexedInfoEXT, VkMultiDrawInfoEXT>;

// Intentially don't initialize this, we'll probably not use
// the full batch size anyway so doing so would be wasteful.
std::array<MultiDrawInfo, DirectMultiDrawBatchSize> batch;

uint32_t instanceCount = 0u;
uint32_t instanceIndex = 0u;

uint32_t batchSize = 0u;

for (uint32_t i = 0; i < count; i++) {
if (!batchSize) {
instanceCount = draws[i].instanceCount;
instanceIndex = draws[i].firstInstance;
}

if constexpr (Indexed) {
const VkDrawIndexedIndirectCommand& draw = draws[i];

auto& drawInfo = batch[batchSize++];
drawInfo.firstIndex = draw.firstIndex;
drawInfo.indexCount = draw.indexCount;
drawInfo.vertexOffset = draw.vertexOffset;
} else {
const VkDrawIndirectCommand& draw = draws[i];

auto& drawInfo = batch[batchSize++];
drawInfo.firstVertex = draw.firstVertex;
drawInfo.vertexCount = draw.vertexCount;
}

bool emitDraw = i + 1u == count || batchSize == DirectMultiDrawBatchSize;

if (!emitDraw) {
const auto& next = draws[i + 1u];

emitDraw = instanceCount != next.instanceCount
|| instanceIndex != next.firstInstance;
}

if (emitDraw) {
if (m_features.test(DxvkContextFeature::DirectMultiDraw)) {
if constexpr (Indexed) {
m_cmd->cmdDrawMultiIndexed(batchSize, batch.data(),
instanceCount, instanceIndex);
} else {
m_cmd->cmdDrawMulti(batchSize, batch.data(),
instanceCount, instanceIndex);
}
} else {
// This path only really exists for consistency reasons; all drivers
// we care about support MultiDraw natively, but debug tools may not.
if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) {
m_cmd->cmdBeginDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer,
vk::makeLabel(getBatchedDrawDebugColor(), "Multidraw"));
}

for (uint32_t i = 0; i < batchSize; i++) {
const auto& entry = batch[i];

if constexpr (Indexed) {
m_cmd->cmdDrawIndexed(entry.indexCount, instanceCount,
entry.firstIndex, entry.vertexOffset, instanceIndex);
} else {
m_cmd->cmdDraw(entry.vertexCount, instanceCount,
entry.firstVertex, instanceIndex);
}
}

if (unlikely(m_features.test(DxvkContextFeature::DebugUtils)))
m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer);
}

batchSize = 0u;
}
}
}
}
}


template<bool Indexed>
void DxvkContext::drawIndirectGeneric(
VkDeviceSize offset,
Expand All @@ -1698,11 +1797,8 @@ namespace dxvk {
if (this->commitGraphicsState<Indexed, true>()) {
auto descriptor = m_state.id.argBuffer.getDescriptor();

if (unroll) {
// Need to do this check after initially setting up the pipeline
unroll = m_state.gp.flags.test(DxvkGraphicsPipelineFlag::UnrollMergedDraws)
&& !m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
}
if (unroll)
unroll = needsDrawBarriers();

// If draws are merged but the pipeline has order-dependent stores, submit
// one draw at a time as well as barriers in between. Otherwise, keep the
Expand Down Expand Up @@ -2854,6 +2950,12 @@ namespace dxvk {
}


bool DxvkContext::needsDrawBarriers() {
return m_state.gp.flags.test(DxvkGraphicsPipelineFlag::UnrollMergedDraws)
&& !m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
}


void DxvkContext::beginRenderPassDebugRegion() {
bool hasColorAttachments = false;
bool hasDepthAttachment = m_state.om.renderTargets.depth.view != nullptr;
Expand Down Expand Up @@ -5862,6 +5964,20 @@ namespace dxvk {
}


uint32_t DxvkContext::getBatchedDrawDebugColor() const {
uint32_t color = getGraphicsPipelineDebugColor();
uint32_t result = 0u;

for (uint32_t i = 0; i < 3; i++) {
uint32_t component = (color >> (8u * i)) & 0xffu;
component += (0xff - component) / 2u;
result |= component << (8u * i);
}

return result;
}


template<VkPipelineBindPoint BindPoint>
void DxvkContext::resetSpecConstants(
uint32_t newMask) {
Expand Down
Loading

0 comments on commit 3fba046

Please sign in to comment.