Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lazy binding for D3D11 #4699

Merged
merged 17 commits into from
Feb 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
641 changes: 487 additions & 154 deletions src/d3d11/d3d11_context.cpp

Large diffs are not rendered by default.

115 changes: 91 additions & 24 deletions src/d3d11/d3d11_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ namespace dxvk {
// Use a local staging buffer to handle tiny uploads, most
// of the time we're fine with hitting the global allocator
constexpr static VkDeviceSize StagingBufferSize = 256ull << 10;

protected:
// Compile-time debug flag to force lazy binding on (True) or off (False)
constexpr static Tristate DebugLazyBinding = Tristate::Auto;

public:

D3D11CommonContext(
Expand Down Expand Up @@ -799,6 +804,30 @@ namespace dxvk {
DxvkBufferSlice AllocStagingBuffer(
VkDeviceSize Size);

void ApplyDirtyConstantBuffers(
DxbcProgramType Stage,
const DxbcBindingMask& BoundMask,
DxbcBindingMask& DirtyMask);

void ApplyDirtySamplers(
DxbcProgramType Stage,
const DxbcBindingMask& BoundMask,
DxbcBindingMask& DirtyMask);

void ApplyDirtyShaderResources(
DxbcProgramType Stage,
const DxbcBindingMask& BoundMask,
DxbcBindingMask& DirtyMask);

void ApplyDirtyUnorderedAccessViews(
DxbcProgramType Stage,
const DxbcBindingMask& BoundMask,
DxbcBindingMask& DirtyMask);

void ApplyDirtyGraphicsBindings();

void ApplyDirtyComputeBindings();

void ApplyInputLayout();

void ApplyPrimitiveTopology();
Expand Down Expand Up @@ -854,35 +883,33 @@ namespace dxvk {
D3D11Buffer* pBuffer,
UINT Offset);

template<DxbcProgramType ShaderStage>
void BindConstantBuffer(
DxbcProgramType ShaderStage,
UINT Slot,
D3D11Buffer* pBuffer,
UINT Offset,
UINT Length);

template<DxbcProgramType ShaderStage>
void BindConstantBufferRange(
DxbcProgramType ShaderStage,
UINT Slot,
UINT Offset,
UINT Length);

template<DxbcProgramType ShaderStage>
void BindSampler(
DxbcProgramType ShaderStage,
UINT Slot,
D3D11SamplerState* pSampler);

template<DxbcProgramType ShaderStage>
void BindShaderResource(
DxbcProgramType ShaderStage,
UINT Slot,
D3D11ShaderResourceView* pResource);

template<DxbcProgramType ShaderStage>
void BindUnorderedAccessView(
UINT UavSlot,
D3D11UnorderedAccessView* pUav,
UINT CtrSlot,
UINT Counter);
DxbcProgramType ShaderStage,
UINT Slot,
D3D11UnorderedAccessView* pUav);

VkClearValue ConvertColorValue(
const FLOAT Color[4],
Expand Down Expand Up @@ -911,6 +938,36 @@ namespace dxvk {
DxvkBufferSlice BufferSlice,
UINT Flags);

template<typename T>
bool DirtyBindingGeneric(
DxbcProgramType ShaderStage,
T BoundMask,
T& DirtyMask,
T DirtyBit,
bool IsNull);

bool DirtyConstantBuffer(
DxbcProgramType ShaderStage,
uint32_t Slot,
bool IsNull);

bool DirtySampler(
DxbcProgramType ShaderStage,
uint32_t Slot,
bool IsNull);

bool DirtyShaderResource(
DxbcProgramType ShaderStage,
uint32_t Slot,
bool IsNull);

bool DirtyComputeUnorderedAccessView(
uint32_t Slot,
bool IsNull);

bool DirtyGraphicsUnorderedAccessView(
uint32_t Slot);

void DiscardBuffer(
ID3D11Resource* pResource);

Expand Down Expand Up @@ -943,10 +1000,16 @@ namespace dxvk {

D3D11MaxUsedBindings GetMaxUsedBindings();

bool HasDirtyComputeBindings();

bool HasDirtyGraphicsBindings();

void ResetCommandListState();

void ResetContextState();

void ResetDirtyTracking();

void ResetStagingBuffer();

template<DxbcProgramType ShaderStage, typename T>
Expand All @@ -969,18 +1032,18 @@ namespace dxvk {

void RestoreCommandListState();

template<DxbcProgramType Stage>
void RestoreConstantBuffers();

template<DxbcProgramType Stage>
void RestoreSamplers();

template<DxbcProgramType Stage>
void RestoreShaderResources();

template<DxbcProgramType Stage>
void RestoreUnorderedAccessViews();
void RestoreConstantBuffers(
DxbcProgramType Stage);

void RestoreSamplers(
DxbcProgramType Stage);

void RestoreShaderResources(
DxbcProgramType Stage);

void RestoreUnorderedAccessViews(
DxbcProgramType Stage);

template<DxbcProgramType ShaderStage>
void SetConstantBuffers(
UINT StartSlot,
Expand Down Expand Up @@ -1063,6 +1126,10 @@ namespace dxvk {
UINT SrcDepthPitch,
UINT CopyFlags);

void UpdateUnorderedAccessViewCounter(
D3D11UnorderedAccessView* pUav,
uint32_t CounterValue);

bool ValidateRenderTargets(
UINT NumViews,
ID3D11RenderTargetView* const* ppRenderTargetViews,
Expand All @@ -1083,22 +1150,22 @@ namespace dxvk {
DxvkMultisampleState* pMsState,
UINT SampleMask);

template<bool AllowFlush = !IsDeferred, typename Cmd>
template<bool AllowFlush = true, typename Cmd>
void EmitCs(Cmd&& command) {
m_cmdData = nullptr;

if (unlikely(!m_csChunk->push(command))) {
GetTypedContext()->EmitCsChunk(std::move(m_csChunk));
m_csChunk = AllocCsChunk();

if constexpr (AllowFlush)
if constexpr (!IsDeferred && AllowFlush)
GetTypedContext()->ConsiderFlush(GpuFlushType::ImplicitWeakHint);

m_csChunk->push(command);
}
}

template<typename M, bool AllowFlush = !IsDeferred, typename Cmd, typename... Args>
template<typename M, bool AllowFlush = true, typename Cmd, typename... Args>
M* EmitCsCmd(Cmd&& command, Args&&... args) {
M* data = m_csChunk->pushCmd<M, Cmd, Args...>(
command, std::forward<Args>(args)...);
Expand All @@ -1107,7 +1174,7 @@ namespace dxvk {
GetTypedContext()->EmitCsChunk(std::move(m_csChunk));
m_csChunk = AllocCsChunk();

if constexpr (AllowFlush)
if constexpr (!IsDeferred && AllowFlush)
GetTypedContext()->ConsiderFlush(GpuFlushType::ImplicitWeakHint);

// We must record this command after the potential
Expand Down
12 changes: 12 additions & 0 deletions src/d3d11/d3d11_context_ext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ namespace dxvk {
D3D10DeviceLock lock = m_ctx->LockContext();
m_ctx->SetDrawBuffers(pBufferForArgs, nullptr);

if (unlikely(m_ctx->HasDirtyGraphicsBindings()))
m_ctx->ApplyDirtyGraphicsBindings();

m_ctx->EmitCs([
cCount = DrawCount,
cOffset = ByteOffsetForArgs,
Expand All @@ -67,6 +70,9 @@ namespace dxvk {
D3D10DeviceLock lock = m_ctx->LockContext();
m_ctx->SetDrawBuffers(pBufferForArgs, nullptr);

if (unlikely(m_ctx->HasDirtyGraphicsBindings()))
m_ctx->ApplyDirtyGraphicsBindings();

m_ctx->EmitCs([
cCount = DrawCount,
cOffset = ByteOffsetForArgs,
Expand All @@ -88,6 +94,9 @@ namespace dxvk {
D3D10DeviceLock lock = m_ctx->LockContext();
m_ctx->SetDrawBuffers(pBufferForArgs, pBufferForCount);

if (unlikely(m_ctx->HasDirtyGraphicsBindings()))
m_ctx->ApplyDirtyGraphicsBindings();

m_ctx->EmitCs([
cMaxCount = MaxDrawCount,
cArgOffset = ByteOffsetForArgs,
Expand All @@ -110,6 +119,9 @@ namespace dxvk {
D3D10DeviceLock lock = m_ctx->LockContext();
m_ctx->SetDrawBuffers(pBufferForArgs, pBufferForCount);

if (unlikely(m_ctx->HasDirtyGraphicsBindings()))
m_ctx->ApplyDirtyGraphicsBindings();

m_ctx->EmitCs([
cMaxCount = MaxDrawCount,
cArgOffset = ByteOffsetForArgs,
Expand Down
114 changes: 112 additions & 2 deletions src/d3d11/d3d11_context_imm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,12 @@ namespace dxvk {
D3D10DeviceLock lock = LockContext();

auto commandList = static_cast<D3D11CommandList*>(pCommandList);


// Reset dirty binding tracking before submitting any CS chunks.
// This is needed so that any submission that might occur during
// this call does not disrupt bindings set by the deferred context.
ResetDirtyTracking();

// Clear state so that the command list can't observe any
// current context state. The command list itself will clean
// up after execution to ensure that no state changes done
Expand Down Expand Up @@ -754,7 +759,11 @@ namespace dxvk {
if (!pState)
return;

// Reset all state affected by the current context state
// Clear dirty tracking here since all context state will be
// re-applied anyway when the context state is swapped in again.
ResetDirtyTracking();

// Reset all state affected by the current context state.
ResetCommandListState();

Com<D3D11DeviceContextState, false> oldState = std::move(m_stateObject);
Expand Down Expand Up @@ -975,8 +984,106 @@ namespace dxvk {
}


void D3D11ImmediateContext::ApplyDirtyNullBindings() {
// At the end of a submission, set all bindings that have not been applied yet
// to null on the DXVK context. This way, we avoid keeping resources alive that
// are bound to the DXVK context but not to the immediate context.
//
// Note: This requires that all methods that may modify dirty bindings on the
// DXVK context also reset the corresponding dirty bits *before* performing the
// bind operation, or otherwise an implicit flush can potentially override them.
auto& dirtyState = m_state.lazy.bindingsDirty;

EmitCs<false>([
cDirtyState = dirtyState
] (DxvkContext* ctx) {
for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) {
auto dxStage = DxbcProgramType(i);
auto vkStage = GetShaderStage(dxStage);

// Unbind all dirty constant buffers
auto cbvSlot = computeConstantBufferBinding(dxStage, 0);

for (uint32_t index : bit::BitMask(cDirtyState[dxStage].cbvMask))
ctx->bindUniformBuffer(vkStage, cbvSlot + index, DxvkBufferSlice());

// Unbind all dirty samplers
auto samplerSlot = computeSamplerBinding(dxStage, 0);

for (uint32_t index : bit::BitMask(cDirtyState[dxStage].samplerMask))
ctx->bindResourceSampler(vkStage, samplerSlot + index, nullptr);

// Unbind all dirty shader resource views
auto srvSlot = computeSrvBinding(dxStage, 0);

for (uint32_t m = 0; m < cDirtyState[dxStage].srvMask.size(); m++) {
for (uint32_t index : bit::BitMask(cDirtyState[dxStage].srvMask[m]))
ctx->bindResourceImageView(vkStage, srvSlot + index + m * 64u, nullptr);
}

// Unbind all dirty unordered access views
VkShaderStageFlags uavStages = 0u;

if (dxStage == DxbcProgramType::ComputeShader)
uavStages = VK_SHADER_STAGE_COMPUTE_BIT;
else if (dxStage == DxbcProgramType::PixelShader)
uavStages = VK_SHADER_STAGE_ALL_GRAPHICS;

if (uavStages) {
auto uavSlot = computeUavBinding(dxStage, 0);
auto ctrSlot = computeUavCounterBinding(dxStage, 0);

for (uint32_t index : bit::BitMask(cDirtyState[dxStage].uavMask)) {
ctx->bindResourceImageView(vkStage, uavSlot + index, nullptr);
ctx->bindResourceBufferView(vkStage, ctrSlot + index, nullptr);
}
}
}
});

// Since we set the DXVK context bindings to null, any bindings that are null
// on the D3D context are no longer dirty, so we can clear the respective bits.
for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) {
auto stage = DxbcProgramType(i);

for (uint32_t index : bit::BitMask(dirtyState[stage].cbvMask)) {
if (!m_state.cbv[stage].buffers[index].buffer.ptr())
dirtyState[stage].cbvMask &= ~(1u << index);
}

for (uint32_t index : bit::BitMask(dirtyState[stage].samplerMask)) {
if (!m_state.samplers[stage].samplers[index])
dirtyState[stage].samplerMask &= ~(1u << index);
}

for (uint32_t m = 0; m < dirtyState[stage].srvMask.size(); m++) {
for (uint32_t index : bit::BitMask(dirtyState[stage].srvMask[m])) {
if (!m_state.srv[stage].views[index + m * 64u].ptr())
dirtyState[stage].srvMask[m] &= ~(uint64_t(1u) << index);
}
}

if (stage == DxbcProgramType::ComputeShader || stage == DxbcProgramType::PixelShader) {
auto& uavs = stage == DxbcProgramType::ComputeShader ? m_state.uav.views : m_state.om.uavs;

for (uint32_t index : bit::BitMask(dirtyState[stage].uavMask)) {
if (!uavs[index].ptr())
dirtyState[stage].uavMask &= ~(uint64_t(1u) << index);
}
}

if (dirtyState[stage].empty())
m_state.lazy.shadersDirty.clr(stage);
}
}


void D3D11ImmediateContext::ConsiderFlush(
GpuFlushType FlushType) {
// In stress test mode, behave as if this would always flush
if (DebugLazyBinding == Tristate::True)
ApplyDirtyNullBindings();

uint64_t chunkId = GetCurrentSequenceNumber();
uint64_t submissionId = m_submissionFence->value();

Expand All @@ -998,6 +1105,9 @@ namespace dxvk {
if (!GetPendingCsChunks() && !hEvent)
return;

// Unbind unused resources
ApplyDirtyNullBindings();

// Signal the submission fence and flush the command list
uint64_t submissionId = ++m_submissionId;

Expand Down
Loading