From 2654ec890eafcf01d087f1a581c6529c0bac1bf2 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 18 Feb 2025 18:37:18 +0100 Subject: [PATCH 01/17] [util] Generalize bit mask iterator --- src/util/util_bit.h | 56 ++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/src/util/util_bit.h b/src/util/util_bit.h index 586ce83c986..6f672700503 100644 --- a/src/util/util_bit.h +++ b/src/util/util_bit.h @@ -129,6 +129,32 @@ namespace dxvk::bit { #endif } + inline uint32_t bsf(uint32_t n) { + #if (defined(__GNUC__) || defined(__clang__)) && !defined(__BMI__) && defined(DXVK_ARCH_X86) + uint32_t res; + asm ("tzcnt %1,%0" + : "=r" (res) + : "r" (n) + : "cc"); + return res; + #else + return tzcnt(n); + #endif + } + + inline uint32_t bsf(uint64_t n) { + #if (defined(__GNUC__) || defined(__clang__)) && !defined(__BMI__) && defined(DXVK_ARCH_X86_64) + uint64_t res; + asm ("tzcnt %1,%0" + : "=r" (res) + : "r" (n) + : "cc"); + return res; + #else + return tzcnt(n); + #endif + } + inline uint32_t lzcnt(uint32_t n) { #if (defined(_MSC_VER) && !defined(__clang__)) || defined(__LZCNT__) return _lzcnt_u32(n); @@ -490,6 +516,7 @@ namespace dxvk::bit { }; + template class BitMask { public: @@ -497,12 +524,12 @@ namespace dxvk::bit { class iterator { public: using iterator_category = std::input_iterator_tag; - using value_type = uint32_t; - using difference_type = uint32_t; - using pointer = const uint32_t*; - using reference = uint32_t; + using value_type = T; + using difference_type = T; + using pointer = const T*; + using reference = T; - explicit iterator(uint32_t flags) + explicit iterator(T flags) : m_mask(flags) { } iterator& operator ++ () { @@ -516,17 +543,8 @@ namespace dxvk::bit { return retval; } - uint32_t operator * () const { -#if (defined(__GNUC__) || defined(__clang__)) && !defined(__BMI__) && defined(DXVK_ARCH_X86) - uint32_t res; - asm ("tzcnt %1,%0" - : "=r" (res) - : "r" (m_mask) - : "cc"); - return res; -#else - return tzcnt(m_mask); -#endif + T operator * () const { + return bsf(m_mask); } bool operator == (iterator other) const { return m_mask == other.m_mask; } @@ -534,14 +552,14 @@ namespace dxvk::bit { private: - uint32_t m_mask; + T m_mask; }; BitMask() : m_mask(0) { } - BitMask(uint32_t n) + explicit BitMask(T n) : m_mask(n) { } iterator begin() { @@ -554,7 +572,7 @@ namespace dxvk::bit { private: - uint32_t m_mask; + T m_mask; }; From 0e6ff26300cc16cf6766680a0fe3f7aed4b2e5e2 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 18 Feb 2025 00:51:04 +0100 Subject: [PATCH 02/17] [dxbc] Gather binding info during shader compilation --- src/dxbc/dxbc_analysis.cpp | 41 ++++++++++++++++++++++++++++++++++++++ src/dxbc/dxbc_analysis.h | 2 ++ src/dxbc/dxbc_common.cpp | 8 +++----- src/dxbc/dxbc_common.h | 4 ++++ src/dxbc/dxbc_compiler.cpp | 4 +++- src/dxbc/dxbc_module.cpp | 6 ++++-- src/dxbc/dxbc_module.h | 18 ++++++++++++++--- src/dxbc/dxbc_util.h | 39 +++++++++++++++++++++++++++++++++++- 8 files changed, 110 insertions(+), 12 deletions(-) diff --git a/src/dxbc/dxbc_analysis.cpp b/src/dxbc/dxbc_analysis.cpp index a5bf4fc194e..2a6747b0582 100644 --- a/src/dxbc/dxbc_analysis.cpp +++ b/src/dxbc/dxbc_analysis.cpp @@ -108,6 +108,47 @@ namespace dxvk { m_analysis->uavInfos[registerId].nonInvariantAccess = true; } break; + case DxbcInstClass::Declaration: { + switch (ins.op) { + case DxbcOpcode::DclConstantBuffer: { + uint32_t registerId = ins.dst[0].idx[0].offset; + + if (registerId < DxbcConstBufBindingCount) + m_analysis->bindings.cbvMask |= 1u << registerId; + } break; + + case DxbcOpcode::DclSampler: { + uint32_t registerId = ins.dst[0].idx[0].offset; + + if (registerId < DxbcSamplerBindingCount) + m_analysis->bindings.samplerMask |= 1u << registerId; + } break; + + case DxbcOpcode::DclResource: + case DxbcOpcode::DclResourceRaw: + case DxbcOpcode::DclResourceStructured: { + uint32_t registerId = ins.dst[0].idx[0].offset; + + uint32_t idx = registerId / 64u; + uint32_t bit = registerId % 64u; + + if (registerId < DxbcResourceBindingCount) + m_analysis->bindings.srvMask[idx] |= uint64_t(1u) << bit; + } break; + + case DxbcOpcode::DclUavTyped: + case DxbcOpcode::DclUavRaw: + case DxbcOpcode::DclUavStructured: { + uint32_t registerId = ins.dst[0].idx[0].offset; + + if (registerId < DxbcUavBindingCount) + m_analysis->bindings.uavMask |= uint64_t(1u) << registerId; + } break; + + default: ; + } + } break; + default: break; } diff --git a/src/dxbc/dxbc_analysis.h b/src/dxbc/dxbc_analysis.h index fa589f4fd2e..64e9870384f 100644 --- a/src/dxbc/dxbc_analysis.h +++ b/src/dxbc/dxbc_analysis.h @@ -53,6 +53,8 @@ namespace dxvk { DxbcClipCullInfo clipCullIn; DxbcClipCullInfo clipCullOut; + + DxbcBindingMask bindings = { }; bool usesDerivatives = false; bool usesKill = false; diff --git a/src/dxbc/dxbc_common.cpp b/src/dxbc/dxbc_common.cpp index d150c585b83..db3d715291e 100644 --- a/src/dxbc/dxbc_common.cpp +++ b/src/dxbc/dxbc_common.cpp @@ -10,9 +10,8 @@ namespace dxvk { case DxbcProgramType::HullShader : return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; case DxbcProgramType::DomainShader : return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; case DxbcProgramType::ComputeShader : return VK_SHADER_STAGE_COMPUTE_BIT; + default: throw DxvkError("DxbcProgramInfo::shaderStage: Unsupported program type"); } - - throw DxvkError("DxbcProgramInfo::shaderStage: Unsupported program type"); } @@ -24,9 +23,8 @@ namespace dxvk { case DxbcProgramType::HullShader : return spv::ExecutionModelTessellationControl; case DxbcProgramType::DomainShader : return spv::ExecutionModelTessellationEvaluation; case DxbcProgramType::ComputeShader : return spv::ExecutionModelGLCompute; + default: throw DxvkError("DxbcProgramInfo::executionModel: Unsupported program type"); } - - throw DxvkError("DxbcProgramInfo::executionModel: Unsupported program type"); } -} \ No newline at end of file +} diff --git a/src/dxbc/dxbc_common.h b/src/dxbc/dxbc_common.h index 9c490d62c68..d9d4207673b 100644 --- a/src/dxbc/dxbc_common.h +++ b/src/dxbc/dxbc_common.h @@ -17,7 +17,11 @@ namespace dxvk { HullShader = 3, DomainShader = 4, ComputeShader = 5, + + Count }; + + using DxbcProgramTypeFlags = Flags; /** diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index f0f36233461..9e58e6497fa 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -237,6 +237,7 @@ namespace dxvk { case DxbcProgramType::GeometryShader: this->emitGsFinalize(); break; case DxbcProgramType::PixelShader: this->emitPsFinalize(); break; case DxbcProgramType::ComputeShader: this->emitCsFinalize(); break; + default: throw DxvkError("Invalid shader stage"); } // Emit float control mode if the extension is supported @@ -6138,7 +6139,7 @@ namespace dxvk { case DxbcProgramType::HullShader: emitHsSystemValueStore(sv, mask, value); break; case DxbcProgramType::DomainShader: emitDsSystemValueStore(sv, mask, value); break; case DxbcProgramType::PixelShader: emitPsSystemValueStore(sv, mask, value); break; - case DxbcProgramType::ComputeShader: break; + default: break; } } } @@ -6822,6 +6823,7 @@ namespace dxvk { case DxbcProgramType::GeometryShader: emitGsInit(); break; case DxbcProgramType::PixelShader: emitPsInit(); break; case DxbcProgramType::ComputeShader: emitCsInit(); break; + default: throw DxvkError("Invalid shader stage"); } } diff --git a/src/dxbc/dxbc_module.cpp b/src/dxbc/dxbc_module.cpp index d406bf2925a..16e37ebbbd4 100644 --- a/src/dxbc/dxbc_module.cpp +++ b/src/dxbc/dxbc_module.cpp @@ -42,7 +42,7 @@ namespace dxvk { Rc DxbcModule::compile( const DxbcModuleInfo& moduleInfo, - const std::string& fileName) const { + const std::string& fileName) { if (m_shexChunk == nullptr) throw DxvkError("DxbcModule::compile: No SHDR/SHEX chunk"); @@ -54,6 +54,8 @@ namespace dxvk { m_psgnChunk, analysisInfo); this->runAnalyzer(analyzer, m_shexChunk->slice()); + + m_bindings = std::make_optional(analysisInfo.bindings); DxbcCompiler compiler( fileName, moduleInfo, @@ -62,7 +64,7 @@ namespace dxvk { m_psgnChunk, analysisInfo); this->runCompiler(compiler, m_shexChunk->slice()); - + return compiler.finalize(); } diff --git a/src/dxbc/dxbc_module.h b/src/dxbc/dxbc_module.h index 7609e23225e..7d6088baedf 100644 --- a/src/dxbc/dxbc_module.h +++ b/src/dxbc/dxbc_module.h @@ -7,6 +7,7 @@ #include "dxbc_header.h" #include "dxbc_modinfo.h" #include "dxbc_reader.h" +#include "dxbc_util.h" // References used for figuring out DXBC: // - https://github.com/tgjones/slimshader-cpp @@ -41,7 +42,16 @@ namespace dxvk { return m_shexChunk->programInfo(); } - + + /** + * \brief Queries shader binding mask + * + * Only valid after successfully compiling the shader. + */ + std::optional bindings() const { + return m_bindings; + } + /** * \brief Input and output signature chunks * @@ -50,7 +60,7 @@ namespace dxvk { */ Rc isgn() const { return m_isgnChunk; } Rc osgn() const { return m_osgnChunk; } - + /** * \brief Compiles DXBC shader to SPIR-V module * @@ -61,7 +71,7 @@ namespace dxvk { */ Rc compile( const DxbcModuleInfo& moduleInfo, - const std::string& fileName) const; + const std::string& fileName); /** * \brief Compiles a pass-through geometry shader @@ -85,6 +95,8 @@ namespace dxvk { Rc m_osgnChunk; Rc m_psgnChunk; Rc m_shexChunk; + + std::optional m_bindings; void runAnalyzer( DxbcAnalyzer& analyzer, diff --git a/src/dxbc/dxbc_util.h b/src/dxbc/dxbc_util.h index 04bec752a33..53394c2a99d 100644 --- a/src/dxbc/dxbc_util.h +++ b/src/dxbc/dxbc_util.h @@ -33,6 +33,43 @@ namespace dxvk { }; + /** + * \brief Shader binding mask + * + * Stores a bit masks of resource bindings + * that are accessed by any given shader. + */ + struct DxbcBindingMask { + uint32_t cbvMask = 0u; + uint32_t samplerMask = 0u; + uint64_t uavMask = 0u; + std::array srvMask = { }; + + void reset() { + cbvMask = 0u; + samplerMask = 0u; + uavMask = 0u; + srvMask = { }; + } + + bool empty() const { + uint64_t mask = (uint64_t(cbvMask) | uint64_t(samplerMask) << 32u) + | (uavMask | srvMask[0] | srvMask[1]); + return !mask; + } + + DxbcBindingMask operator & (const DxbcBindingMask& other) const { + DxbcBindingMask result = *this; + result.cbvMask &= other.cbvMask; + result.samplerMask &= other.samplerMask; + result.uavMask &= other.uavMask; + result.srvMask[0] &= other.srvMask[0]; + result.srvMask[1] &= other.srvMask[1]; + return result; + } + }; + + /** * \brief Computes first binding index for a given stage * @@ -124,4 +161,4 @@ namespace dxvk { uint32_t primitiveVertexCount( DxbcPrimitive primitive); -} \ No newline at end of file +} From a319f3b01862284e36d077e69a03af02900be231 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 18 Feb 2025 01:08:28 +0100 Subject: [PATCH 03/17] [d3d11] Store used binding mask inside shader objects --- src/d3d11/d3d11_shader.cpp | 6 ++++++ src/d3d11/d3d11_shader.h | 14 ++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/d3d11/d3d11_shader.cpp b/src/d3d11/d3d11_shader.cpp index 9560eb131d2..332d9cffdea 100644 --- a/src/d3d11/d3d11_shader.cpp +++ b/src/d3d11/d3d11_shader.cpp @@ -79,6 +79,12 @@ namespace dxvk { } pDevice->GetDXVKDevice()->registerShader(m_shader); + + // Write back binding mask + auto bindings = module.bindings(); + + if (bindings) + m_bindings = *bindings; } diff --git a/src/d3d11/d3d11_shader.h b/src/d3d11/d3d11_shader.h index 44a3bcb984d..8be4353f7c5 100644 --- a/src/d3d11/d3d11_shader.h +++ b/src/d3d11/d3d11_shader.h @@ -18,7 +18,7 @@ namespace dxvk { class D3D11Device; - + /** * \brief Common shader object * @@ -52,12 +52,18 @@ namespace dxvk { std::string GetName() const { return m_shader->debugName(); } - + + DxbcBindingMask GetBindingMask() const { + return m_bindings; + } + private: - + Rc m_shader; Rc m_buffer; - + + DxbcBindingMask m_bindings = { }; + }; From 3a60592699e287028a6a00c4010c6a7ebfeeb585 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 18 Feb 2025 01:10:49 +0100 Subject: [PATCH 04/17] [d3d11] Rework DXBC shader stage to Vulkan shader stage mapping We're going to have to do this at runtime, so this needs to be fast. --- src/d3d11/d3d11_util.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/d3d11/d3d11_util.h b/src/d3d11/d3d11_util.h index 8720bbbb328..10a4a8fc5ec 100644 --- a/src/d3d11/d3d11_util.h +++ b/src/d3d11/d3d11_util.h @@ -52,15 +52,15 @@ namespace dxvk { * \returns Corresponding Vulkan shader stage */ constexpr VkShaderStageFlagBits GetShaderStage(DxbcProgramType ProgramType) { - switch (ProgramType) { - case DxbcProgramType::VertexShader: return VK_SHADER_STAGE_VERTEX_BIT; - case DxbcProgramType::HullShader: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - case DxbcProgramType::DomainShader: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; - case DxbcProgramType::GeometryShader: return VK_SHADER_STAGE_GEOMETRY_BIT; - case DxbcProgramType::PixelShader: return VK_SHADER_STAGE_FRAGMENT_BIT; - case DxbcProgramType::ComputeShader: return VK_SHADER_STAGE_COMPUTE_BIT; - default: return VkShaderStageFlagBits(0); - } + constexpr uint64_t lut + = (uint64_t(VK_SHADER_STAGE_VERTEX_BIT) << (8u * uint32_t(DxbcProgramType::VertexShader))) + | (uint64_t(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) << (8u * uint32_t(DxbcProgramType::HullShader))) + | (uint64_t(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) << (8u * uint32_t(DxbcProgramType::DomainShader))) + | (uint64_t(VK_SHADER_STAGE_GEOMETRY_BIT) << (8u * uint32_t(DxbcProgramType::GeometryShader))) + | (uint64_t(VK_SHADER_STAGE_FRAGMENT_BIT) << (8u * uint32_t(DxbcProgramType::PixelShader))) + | (uint64_t(VK_SHADER_STAGE_COMPUTE_BIT) << (8u * uint32_t(DxbcProgramType::ComputeShader))); + + return VkShaderStageFlagBits((lut >> (8u * uint32_t(ProgramType))) & 0xff); } -} \ No newline at end of file +} From 9a0dacbae50946c385a72036652449b7078c3b1a Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 18 Feb 2025 22:00:20 +0100 Subject: [PATCH 05/17] [d3d11] Track shader stages with dirty bindings as well as used bindings --- src/d3d11/d3d11_context.cpp | 53 +++++++++++++++++++++++++++++++++ src/d3d11/d3d11_context.h | 2 ++ src/d3d11/d3d11_context_state.h | 28 ++++++++++++++++- 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 16529a2179d..8d42c283b71 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -3423,6 +3423,17 @@ namespace dxvk { if (unlikely(shader->needsLibraryCompile())) m_device->requestCompileShader(shader); + // If this shader activates any bindings that have not yet been applied, + // mark the shader stage as dirty so it gets applied on the next draw. + // Don't apply it right away since any dirty bindings are likely redundant. + m_state.lazy.shadersUsed.set(ShaderStage); + m_state.lazy.bindingsUsed[ShaderStage] = pShaderModule->GetBindingMask(); + + if (!m_state.lazy.shadersDirty.test(ShaderStage)) { + if (!(m_state.lazy.bindingsDirty[ShaderStage] & m_state.lazy.bindingsUsed[ShaderStage]).empty()) + m_state.lazy.shadersDirty.set(ShaderStage); + } + EmitCs([ cBuffer = std::move(buffer), cShader = std::move(shader) @@ -3438,6 +3449,15 @@ namespace dxvk { Forwarder::move(cBuffer)); }); } else { + // Mark shader stage as inactive and clean since we'll have no active + // bindings. This works because if the app changes any binding at all + // for this stage, it will get flagged as dirty, and if another shader + // gets bound, it will check for any dirty bindings again. + m_state.lazy.shadersUsed.clr(ShaderStage); + m_state.lazy.shadersDirty.clr(ShaderStage); + + m_state.lazy.bindingsUsed[ShaderStage].reset(); + EmitCs([] (DxvkContext* ctx) { constexpr VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); @@ -4516,6 +4536,9 @@ namespace dxvk { m_state.srv.reset(); m_state.uav.reset(); m_state.samplers.reset(); + + // Reset dirty tracking + m_state.lazy.reset(); } @@ -4623,6 +4646,36 @@ namespace dxvk { } + template + void D3D11CommonContext::RestoreUsedBindings() { + // Mark all bindings used since the last reset as dirty so that subsequent draws + // and dispatches will reapply them as necessary. Marking null bindings here may + // lead to some redundant CS thread traffic, but is otherwise harmless. + auto maxBindings = GetMaxUsedBindings(); + + for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) { + auto stage = DxbcProgramType(i); + auto stageInfo = maxBindings.stages[i]; + + m_state.lazy.bindingsDirty[stage].cbvMask |= (1u << stageInfo.cbvCount) - 1u; + m_state.lazy.bindingsDirty[stage].samplerMask |= (1u << stageInfo.samplerCount) - 1u; + + if (stageInfo.uavCount) + m_state.lazy.bindingsDirty[stage].uavMask |= uint64_t(-1) >> (64u - stageInfo.uavCount); + + if (stageInfo.srvCount > 64u) { + m_state.lazy.bindingsDirty[stage].srvMask[0] |= uint64_t(-1); + m_state.lazy.bindingsDirty[stage].srvMask[1] |= uint64_t(-1) >> (128u - stageInfo.srvCount); + } else if (stageInfo.srvCount) { + m_state.lazy.bindingsDirty[stage].srvMask[0] |= uint64_t(-1) >> (64u - stageInfo.srvCount); + } + + if (m_state.lazy.shadersUsed.test(stage) && !m_state.lazy.bindingsDirty[stage].empty()) + m_state.lazy.shadersDirty.set(stage); + } + } + + template void D3D11CommonContext::RestoreCommandListState() { BindFramebuffer(); diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index a2d6c365982..32f0ee77f08 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -967,6 +967,8 @@ namespace dxvk { void ResolveOmUavHazards( D3D11RenderTargetView* pView); + void RestoreUsedBindings(); + void RestoreCommandListState(); template diff --git a/src/d3d11/d3d11_context_state.h b/src/d3d11/d3d11_context_state.h index 9dea349375b..be42ef5991b 100644 --- a/src/d3d11/d3d11_context_state.h +++ b/src/d3d11/d3d11_context_state.h @@ -302,6 +302,30 @@ namespace dxvk { predicateValue = false; } }; + + + /** + * \brief Lazy binding state + * + * Keeps track of what state needs to be + * re-applied to the context. + */ + struct D3D11LazyBindings { + DxbcProgramTypeFlags shadersUsed = 0u; + DxbcProgramTypeFlags shadersDirty = 0u; + + D3D11ShaderStageState bindingsUsed; + D3D11ShaderStageState bindingsDirty; + + void reset() { + shadersUsed = 0u; + shadersDirty = 0u; + + bindingsUsed.reset(); + bindingsDirty.reset(); + } + }; + /** * \brief Context state @@ -325,6 +349,8 @@ namespace dxvk { D3D11SrvBindings srv; D3D11UavBindings uav; D3D11SamplerBindings samplers; + + D3D11LazyBindings lazy; }; /** @@ -342,7 +368,7 @@ namespace dxvk { * \brief Maximum used binding numbers for all context state */ struct D3D11MaxUsedBindings { - std::array stages; + std::array stages; uint32_t vbCount; uint32_t soCount; }; From 459b7840333009c031ee71535431708ca3144666 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 19 Feb 2025 00:40:06 +0100 Subject: [PATCH 06/17] [d3d11] Change AllowFlush behaviour No functional change, just makes it less annoying to use in methods that can be called from both immediate and deferred contexts- --- src/d3d11/d3d11_context.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 32f0ee77f08..32fed700c8b 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -1085,7 +1085,7 @@ namespace dxvk { DxvkMultisampleState* pMsState, UINT SampleMask); - template + template void EmitCs(Cmd&& command) { m_cmdData = nullptr; @@ -1093,14 +1093,14 @@ namespace dxvk { GetTypedContext()->EmitCsChunk(std::move(m_csChunk)); m_csChunk = AllocCsChunk(); - if constexpr (AllowFlush) + if constexpr (!IsDeferred && AllowFlush) GetTypedContext()->ConsiderFlush(GpuFlushType::ImplicitWeakHint); m_csChunk->push(command); } } - template + template M* EmitCsCmd(Cmd&& command, Args&&... args) { M* data = m_csChunk->pushCmd( command, std::forward(args)...); @@ -1109,7 +1109,7 @@ namespace dxvk { GetTypedContext()->EmitCsChunk(std::move(m_csChunk)); m_csChunk = AllocCsChunk(); - if constexpr (AllowFlush) + if constexpr (!IsDeferred && AllowFlush) GetTypedContext()->ConsiderFlush(GpuFlushType::ImplicitWeakHint); // We must record this command after the potential From b337a27e9705a13580f3cb8ae4fb80ddf1343eb0 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 19 Feb 2025 01:49:45 +0100 Subject: [PATCH 07/17] [d3d11] Lazy-bind constant buffers --- src/d3d11/d3d11_context.cpp | 213 ++++++++++++++++++++++++-------- src/d3d11/d3d11_context.h | 32 ++++- src/d3d11/d3d11_context_ext.cpp | 12 ++ 3 files changed, 201 insertions(+), 56 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 8d42c283b71..16383768996 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -1009,6 +1009,9 @@ namespace dxvk { if (!ctrBuf.defined()) return; + if (unlikely(HasDirtyGraphicsBindings())) + ApplyDirtyGraphicsBindings(); + // We bind the SO counter as an indirect count buffer, // so reset any tracking we may have been doing here. m_state.id.reset(); @@ -1035,6 +1038,9 @@ namespace dxvk { UINT StartVertexLocation) { D3D10DeviceLock lock = LockContext(); + if (unlikely(HasDirtyGraphicsBindings())) + ApplyDirtyGraphicsBindings(); + EmitCs([=] (DxvkContext* ctx) { ctx->draw( VertexCount, 1, @@ -1050,6 +1056,9 @@ namespace dxvk { INT BaseVertexLocation) { D3D10DeviceLock lock = LockContext(); + if (unlikely(HasDirtyGraphicsBindings())) + ApplyDirtyGraphicsBindings(); + EmitCs([=] (DxvkContext* ctx) { ctx->drawIndexed( IndexCount, 1, @@ -1067,6 +1076,9 @@ namespace dxvk { UINT StartInstanceLocation) { D3D10DeviceLock lock = LockContext(); + if (unlikely(HasDirtyGraphicsBindings())) + ApplyDirtyGraphicsBindings(); + EmitCs([=] (DxvkContext* ctx) { ctx->draw( VertexCountPerInstance, @@ -1086,6 +1098,9 @@ namespace dxvk { UINT StartInstanceLocation) { D3D10DeviceLock lock = LockContext(); + if (unlikely(HasDirtyGraphicsBindings())) + ApplyDirtyGraphicsBindings(); + EmitCs([=] (DxvkContext* ctx) { ctx->drawIndexed( IndexCountPerInstance, @@ -1107,6 +1122,9 @@ namespace dxvk { if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand))) return; + if (unlikely(HasDirtyGraphicsBindings())) + ApplyDirtyGraphicsBindings(); + // If possible, batch up multiple indirect draw calls of // the same type into one single multiDrawIndirect call auto cmdData = static_cast(m_cmdData); @@ -1142,6 +1160,9 @@ namespace dxvk { if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand))) return; + if (unlikely(HasDirtyGraphicsBindings())) + ApplyDirtyGraphicsBindings(); + // If possible, batch up multiple indirect draw calls of // the same type into one single multiDrawIndirect call auto cmdData = static_cast(m_cmdData); @@ -1174,6 +1195,9 @@ namespace dxvk { UINT ThreadGroupCountZ) { D3D10DeviceLock lock = LockContext(); + if (unlikely(HasDirtyComputeBindings())) + ApplyDirtyComputeBindings(); + EmitCs([=] (DxvkContext* ctx) { ctx->dispatch( ThreadGroupCountX, @@ -1193,6 +1217,9 @@ namespace dxvk { if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDispatchIndirectCommand))) return; + if (unlikely(HasDirtyComputeBindings())) + ApplyDirtyComputeBindings(); + EmitCs([cOffset = AlignedByteOffsetForArgs] (DxvkContext* ctx) { ctx->dispatchIndirect(cOffset); @@ -3157,6 +3184,59 @@ namespace dxvk { } + template + void D3D11CommonContext::ApplyDirtyConstantBuffers( + DxbcProgramType Stage, + const DxbcBindingMask& BoundMask, + DxbcBindingMask& DirtyMask) { + uint32_t bindMask = BoundMask.cbvMask & DirtyMask.cbvMask; + + if (!bindMask) + return; + + const auto& state = m_state.cbv[Stage]; + DirtyMask.cbvMask -= bindMask; + + for (uint32_t slot : bit::BitMask(bindMask)) { + const auto& cbv = state.buffers[slot]; + + BindConstantBuffer(Stage, slot, cbv.buffer.ptr(), + cbv.constantOffset, cbv.constantBound); + } + } + + + template + void D3D11CommonContext::ApplyDirtyGraphicsBindings() { + auto dirtyMask = m_state.lazy.shadersDirty & m_state.lazy.shadersUsed; + dirtyMask.clr(DxbcProgramType::ComputeShader); + + for (uint32_t stageIndex : bit::BitMask(uint32_t(dirtyMask.raw()))) { + DxbcProgramType stage = DxbcProgramType(stageIndex); + + auto& boundMask = m_state.lazy.bindingsUsed[stage]; + auto& dirtyMask = m_state.lazy.bindingsDirty[stage]; + + ApplyDirtyConstantBuffers(stage, boundMask, dirtyMask); + + m_state.lazy.shadersDirty.clr(stage); + } + } + + + template + void D3D11CommonContext::ApplyDirtyComputeBindings() { + DxbcProgramType stage = DxbcProgramType::ComputeShader; + + auto& boundMask = m_state.lazy.bindingsUsed[stage]; + auto& dirtyMask = m_state.lazy.bindingsDirty[stage]; + + ApplyDirtyConstantBuffers(stage, boundMask, dirtyMask); + + m_state.lazy.shadersDirty.clr(stage); + } + + template void D3D11CommonContext::ApplyInputLayout() { auto inputLayout = m_state.ia.inputLayout.prvRef(); @@ -3686,45 +3766,49 @@ namespace dxvk { template - template void D3D11CommonContext::BindConstantBuffer( + DxbcProgramType ShaderStage, UINT Slot, D3D11Buffer* pBuffer, UINT Offset, UINT Length) { + uint32_t slotId = computeConstantBufferBinding(ShaderStage, Slot); + if (pBuffer) { EmitCs([ - cSlotId = Slot, + cSlotId = slotId, + cStage = GetShaderStage(ShaderStage), cBufferSlice = pBuffer->GetBufferSlice(16 * Offset, 16 * Length) ] (DxvkContext* ctx) mutable { - VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); - ctx->bindUniformBuffer(stage, cSlotId, + ctx->bindUniformBuffer(cStage, cSlotId, Forwarder::move(cBufferSlice)); }); } else { EmitCs([ - cSlotId = Slot + cSlotId = slotId, + cStage = GetShaderStage(ShaderStage) ] (DxvkContext* ctx) { - VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); - ctx->bindUniformBuffer(stage, cSlotId, DxvkBufferSlice()); + ctx->bindUniformBuffer(cStage, cSlotId, DxvkBufferSlice()); }); } } template - template void D3D11CommonContext::BindConstantBufferRange( + DxbcProgramType ShaderStage, UINT Slot, UINT Offset, UINT Length) { + uint32_t slotId = computeConstantBufferBinding(ShaderStage, Slot); + EmitCs([ - cSlotId = Slot, - cOffset = 16 * Offset, - cLength = 16 * Length + cSlotId = slotId, + cStage = GetShaderStage(ShaderStage), + cOffset = 16u * Offset, + cLength = 16u * Length ] (DxvkContext* ctx) { - VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); - ctx->bindUniformBufferRange(stage, cSlotId, cOffset, cLength); + ctx->bindUniformBufferRange(cStage, cSlotId, cOffset, cLength); }); } @@ -4236,6 +4320,48 @@ namespace dxvk { } + template + template + bool D3D11CommonContext::DirtyBindingGeneric( + DxbcProgramType ShaderStage, + T BoundMask, + T& DirtyMask, + T DirtyBit, + bool IsNull) { + if ((BoundMask & ~DirtyMask) & DirtyBit) { + // If we're binding a non-null resource to an active slot that has not been + // marked for lazy binding yet, forward the call immediately in order to + // avoid tracking overhead. This is by far the most common case. + if (likely(!IsNull)) + return false; + + // If we are binding a null resource to an active slot, the app will likely + // either bind something else or bind a shader that does not use this slot. + // In that case, avoid likely redundant CS traffic and apply the binding on + // the next draw. + m_state.lazy.shadersDirty.set(ShaderStage); + } + + // Binding is either inactive or already dirty. In the inactive case, there + // is no need to mark the shader stage as dirty since binding a shader that + // activates the binding will implicitly do so. + DirtyMask |= DirtyBit; + return true; + } + + + template + bool D3D11CommonContext::DirtyConstantBuffer( + DxbcProgramType ShaderStage, + uint32_t Slot, + bool IsNull) { + return DirtyBindingGeneric(ShaderStage, + m_state.lazy.bindingsUsed[ShaderStage].cbvMask, + m_state.lazy.bindingsDirty[ShaderStage].cbvMask, + 1u << Slot, IsNull); + } + + template void D3D11CommonContext::DiscardBuffer( ID3D11Resource* pResource) { @@ -4398,6 +4524,21 @@ namespace dxvk { } + template + bool D3D11CommonContext::HasDirtyComputeBindings() { + return m_state.lazy.shadersDirty.test(DxbcProgramType::ComputeShader); + } + + + template + bool D3D11CommonContext::HasDirtyGraphicsBindings() { + return (m_state.lazy.shadersDirty & m_state.lazy.shadersUsed).any( + DxbcProgramType::VertexShader, DxbcProgramType::GeometryShader, + DxbcProgramType::HullShader, DxbcProgramType::DomainShader, + DxbcProgramType::PixelShader); + } + + template void D3D11CommonContext::ResetCommandListState() { EmitCs([ @@ -4646,36 +4787,6 @@ namespace dxvk { } - template - void D3D11CommonContext::RestoreUsedBindings() { - // Mark all bindings used since the last reset as dirty so that subsequent draws - // and dispatches will reapply them as necessary. Marking null bindings here may - // lead to some redundant CS thread traffic, but is otherwise harmless. - auto maxBindings = GetMaxUsedBindings(); - - for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) { - auto stage = DxbcProgramType(i); - auto stageInfo = maxBindings.stages[i]; - - m_state.lazy.bindingsDirty[stage].cbvMask |= (1u << stageInfo.cbvCount) - 1u; - m_state.lazy.bindingsDirty[stage].samplerMask |= (1u << stageInfo.samplerCount) - 1u; - - if (stageInfo.uavCount) - m_state.lazy.bindingsDirty[stage].uavMask |= uint64_t(-1) >> (64u - stageInfo.uavCount); - - if (stageInfo.srvCount > 64u) { - m_state.lazy.bindingsDirty[stage].srvMask[0] |= uint64_t(-1); - m_state.lazy.bindingsDirty[stage].srvMask[1] |= uint64_t(-1) >> (128u - stageInfo.srvCount); - } else if (stageInfo.srvCount) { - m_state.lazy.bindingsDirty[stage].srvMask[0] |= uint64_t(-1) >> (64u - stageInfo.srvCount); - } - - if (m_state.lazy.shadersUsed.test(stage) && !m_state.lazy.bindingsDirty[stage].empty()) - m_state.lazy.shadersDirty.set(stage); - } - } - - template void D3D11CommonContext::RestoreCommandListState() { BindFramebuffer(); @@ -4747,10 +4858,8 @@ namespace dxvk { template void D3D11CommonContext::RestoreConstantBuffers() { const auto& bindings = m_state.cbv[Stage]; - uint32_t slotId = computeConstantBufferBinding(Stage, 0); - for (uint32_t i = 0; i < bindings.maxCount; i++) { - BindConstantBuffer(slotId + i, bindings.buffers[i].buffer.ptr(), + BindConstantBuffer(Stage, i, bindings.buffers[i].buffer.ptr(), bindings.buffers[i].constantOffset, bindings.buffers[i].constantBound); } } @@ -4807,7 +4916,6 @@ namespace dxvk { UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers) { auto& bindings = m_state.cbv[ShaderStage]; - uint32_t slotId = computeConstantBufferBinding(ShaderStage, StartSlot); for (uint32_t i = 0; i < NumBuffers; i++) { auto newBuffer = static_cast(ppConstantBuffers[i]); @@ -4824,7 +4932,8 @@ namespace dxvk { bindings.buffers[StartSlot + i].constantCount = constantCount; bindings.buffers[StartSlot + i].constantBound = constantCount; - BindConstantBuffer(slotId + i, newBuffer, 0, constantCount); + if (!DirtyConstantBuffer(ShaderStage, StartSlot + i, !newBuffer)) + BindConstantBuffer(ShaderStage, StartSlot + i, newBuffer, 0, constantCount); } } @@ -4843,8 +4952,6 @@ namespace dxvk { const UINT* pNumConstants) { auto& bindings = m_state.cbv[ShaderStage]; - uint32_t slotId = computeConstantBufferBinding(ShaderStage, StartSlot); - for (uint32_t i = 0; i < NumBuffers; i++) { auto newBuffer = static_cast(ppConstantBuffers[i]); @@ -4883,14 +4990,16 @@ namespace dxvk { bindings.buffers[StartSlot + i].constantCount = constantCount; bindings.buffers[StartSlot + i].constantBound = constantBound; - BindConstantBuffer(slotId + i, newBuffer, constantOffset, constantBound); + if (!DirtyConstantBuffer(ShaderStage, StartSlot + i, !newBuffer)) + BindConstantBuffer(ShaderStage, StartSlot + i, newBuffer, constantOffset, constantBound); } else if (bindings.buffers[StartSlot + i].constantOffset != constantOffset || bindings.buffers[StartSlot + i].constantCount != constantCount) { bindings.buffers[StartSlot + i].constantOffset = constantOffset; bindings.buffers[StartSlot + i].constantCount = constantCount; bindings.buffers[StartSlot + i].constantBound = constantBound; - BindConstantBufferRange(slotId + i, constantOffset, constantBound); + if (!DirtyConstantBuffer(ShaderStage, StartSlot + i, !newBuffer)) + BindConstantBufferRange(ShaderStage, StartSlot + i, constantOffset, constantBound); } } diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 32fed700c8b..734cede69a5 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -799,6 +799,15 @@ namespace dxvk { DxvkBufferSlice AllocStagingBuffer( VkDeviceSize Size); + void ApplyDirtyConstantBuffers( + DxbcProgramType Stage, + const DxbcBindingMask& BoundMask, + DxbcBindingMask& DirtyMask); + + void ApplyDirtyGraphicsBindings(); + + void ApplyDirtyComputeBindings(); + void ApplyInputLayout(); void ApplyPrimitiveTopology(); @@ -854,15 +863,15 @@ namespace dxvk { D3D11Buffer* pBuffer, UINT Offset); - template void BindConstantBuffer( + DxbcProgramType ShaderStage, UINT Slot, D3D11Buffer* pBuffer, UINT Offset, UINT Length); - template void BindConstantBufferRange( + DxbcProgramType ShaderStage, UINT Slot, UINT Offset, UINT Length); @@ -911,6 +920,19 @@ namespace dxvk { DxvkBufferSlice BufferSlice, UINT Flags); + template + bool DirtyBindingGeneric( + DxbcProgramType ShaderStage, + T BoundMask, + T& DirtyMask, + T DirtyBit, + bool IsNull); + + bool DirtyConstantBuffer( + DxbcProgramType ShaderStage, + uint32_t Slot, + bool IsNull); + void DiscardBuffer( ID3D11Resource* pResource); @@ -943,6 +965,10 @@ namespace dxvk { D3D11MaxUsedBindings GetMaxUsedBindings(); + bool HasDirtyComputeBindings(); + + bool HasDirtyGraphicsBindings(); + void ResetCommandListState(); void ResetContextState(); @@ -967,8 +993,6 @@ namespace dxvk { void ResolveOmUavHazards( D3D11RenderTargetView* pView); - void RestoreUsedBindings(); - void RestoreCommandListState(); template diff --git a/src/d3d11/d3d11_context_ext.cpp b/src/d3d11/d3d11_context_ext.cpp index 5254f480f43..c933f571d80 100644 --- a/src/d3d11/d3d11_context_ext.cpp +++ b/src/d3d11/d3d11_context_ext.cpp @@ -48,6 +48,9 @@ namespace dxvk { D3D10DeviceLock lock = m_ctx->LockContext(); m_ctx->SetDrawBuffers(pBufferForArgs, nullptr); + if (unlikely(m_ctx->HasDirtyGraphicsBindings())) + m_ctx->ApplyDirtyGraphicsBindings(); + m_ctx->EmitCs([ cCount = DrawCount, cOffset = ByteOffsetForArgs, @@ -67,6 +70,9 @@ namespace dxvk { D3D10DeviceLock lock = m_ctx->LockContext(); m_ctx->SetDrawBuffers(pBufferForArgs, nullptr); + if (unlikely(m_ctx->HasDirtyGraphicsBindings())) + m_ctx->ApplyDirtyGraphicsBindings(); + m_ctx->EmitCs([ cCount = DrawCount, cOffset = ByteOffsetForArgs, @@ -88,6 +94,9 @@ namespace dxvk { D3D10DeviceLock lock = m_ctx->LockContext(); m_ctx->SetDrawBuffers(pBufferForArgs, pBufferForCount); + if (unlikely(m_ctx->HasDirtyGraphicsBindings())) + m_ctx->ApplyDirtyGraphicsBindings(); + m_ctx->EmitCs([ cMaxCount = MaxDrawCount, cArgOffset = ByteOffsetForArgs, @@ -110,6 +119,9 @@ namespace dxvk { D3D10DeviceLock lock = m_ctx->LockContext(); m_ctx->SetDrawBuffers(pBufferForArgs, pBufferForCount); + if (unlikely(m_ctx->HasDirtyGraphicsBindings())) + m_ctx->ApplyDirtyGraphicsBindings(); + m_ctx->EmitCs([ cMaxCount = MaxDrawCount, cArgOffset = ByteOffsetForArgs, From 5409af074a1b3f9427d0f702952a21ecaeb54116 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 19 Feb 2025 02:22:23 +0100 Subject: [PATCH 08/17] [d3d11] Lazy-bind shader resources --- src/d3d11/d3d11_context.cpp | 74 ++++++++++++++++++++++++++++--------- src/d3d11/d3d11_context.h | 12 +++++- 2 files changed, 67 insertions(+), 19 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 16383768996..58a110048e6 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -3206,6 +3206,28 @@ namespace dxvk { } + template + void D3D11CommonContext::ApplyDirtyShaderResources( + DxbcProgramType Stage, + const DxbcBindingMask& BoundMask, + DxbcBindingMask& DirtyMask) { + const auto& state = m_state.srv[Stage]; + + for (uint32_t i = 0; i < state.maxCount; i += 64u) { + uint32_t maskIndex = i / 64u; + uint64_t bindMask = BoundMask.srvMask[maskIndex] & DirtyMask.srvMask[maskIndex]; + + if (!bindMask) + continue; + + DirtyMask.srvMask[maskIndex] -= bindMask; + + for (uint32_t slot : bit::BitMask(bindMask)) + BindShaderResource(Stage, slot + i, state.views[slot + i].ptr()); + } + } + + template void D3D11CommonContext::ApplyDirtyGraphicsBindings() { auto dirtyMask = m_state.lazy.shadersDirty & m_state.lazy.shadersUsed; @@ -3218,6 +3240,7 @@ namespace dxvk { auto& dirtyMask = m_state.lazy.bindingsDirty[stage]; ApplyDirtyConstantBuffers(stage, boundMask, dirtyMask); + ApplyDirtyShaderResources(stage, boundMask, dirtyMask); m_state.lazy.shadersDirty.clr(stage); } @@ -3232,6 +3255,7 @@ namespace dxvk { auto& dirtyMask = m_state.lazy.bindingsDirty[stage]; ApplyDirtyConstantBuffers(stage, boundMask, dirtyMask); + ApplyDirtyShaderResources(stage, boundMask, dirtyMask); m_state.lazy.shadersDirty.clr(stage); } @@ -3839,36 +3863,38 @@ namespace dxvk { template - template void D3D11CommonContext::BindShaderResource( + DxbcProgramType ShaderStage, UINT Slot, D3D11ShaderResourceView* pResource) { + uint32_t slotId = computeSrvBinding(ShaderStage, Slot); + if (pResource) { if (pResource->GetViewInfo().Dimension != D3D11_RESOURCE_DIMENSION_BUFFER) { EmitCs([ - cSlotId = Slot, + cSlotId = slotId, + cStage = GetShaderStage(ShaderStage), cView = pResource->GetImageView() ] (DxvkContext* ctx) mutable { - VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); - ctx->bindResourceImageView(stage, cSlotId, + ctx->bindResourceImageView(cStage, cSlotId, Forwarder::move(cView)); }); } else { EmitCs([ - cSlotId = Slot, + cSlotId = slotId, + cStage = GetShaderStage(ShaderStage), cView = pResource->GetBufferView() ] (DxvkContext* ctx) mutable { - VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); - ctx->bindResourceBufferView(stage, cSlotId, + ctx->bindResourceBufferView(cStage, cSlotId, Forwarder::move(cView)); }); } } else { EmitCs([ - cSlotId = Slot + cSlotId = slotId, + cStage = GetShaderStage(ShaderStage) ] (DxvkContext* ctx) { - VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); - ctx->bindResourceImageView(stage, cSlotId, nullptr); + ctx->bindResourceImageView(cStage, cSlotId, nullptr); }); } } @@ -4362,6 +4388,20 @@ namespace dxvk { } + template + bool D3D11CommonContext::DirtyShaderResource( + DxbcProgramType ShaderStage, + uint32_t Slot, + bool IsNull) { + uint32_t idx = Slot / 64u; + + return DirtyBindingGeneric(ShaderStage, + m_state.lazy.bindingsUsed[ShaderStage].srvMask[idx], + m_state.lazy.bindingsDirty[ShaderStage].srvMask[idx], + uint64_t(1u) << Slot, IsNull); + } + + template void D3D11CommonContext::DiscardBuffer( ID3D11Resource* pResource) { @@ -4694,8 +4734,6 @@ namespace dxvk { void D3D11CommonContext::ResolveSrvHazards( T* pView) { auto& bindings = m_state.srv[ShaderStage]; - - uint32_t slotId = computeSrvBinding(ShaderStage, 0); int32_t srvId = bindings.hazardous.findNext(0); while (srvId >= 0) { @@ -4708,7 +4746,8 @@ namespace dxvk { bindings.views[srvId] = nullptr; bindings.hazardous.clr(srvId); - BindShaderResource(slotId + srvId, nullptr); + if (!DirtyShaderResource(ShaderStage, srvId, true)) + BindShaderResource(ShaderStage, srvId, nullptr); } } else { // Avoid further redundant iterations @@ -4880,10 +4919,8 @@ namespace dxvk { template void D3D11CommonContext::RestoreShaderResources() { const auto& bindings = m_state.srv[Stage]; - uint32_t slotId = computeSrvBinding(Stage, 0); - for (uint32_t i = 0; i < bindings.maxCount; i++) - BindShaderResource(slotId + i, bindings.views[i].ptr()); + BindShaderResource(Stage, i, bindings.views[i].ptr()); } @@ -5015,7 +5052,6 @@ namespace dxvk { UINT NumResources, ID3D11ShaderResourceView* const* ppResources) { auto& bindings = m_state.srv[ShaderStage]; - uint32_t slotId = computeSrvBinding(ShaderStage, StartSlot); for (uint32_t i = 0; i < NumResources; i++) { auto resView = static_cast(ppResources[i]); @@ -5034,7 +5070,9 @@ namespace dxvk { } bindings.views[StartSlot + i] = resView; - BindShaderResource(slotId + i, resView); + + if (!DirtyShaderResource(ShaderStage, StartSlot + i, !resView)) + BindShaderResource(ShaderStage, StartSlot + i, resView); } } diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 734cede69a5..6a0b9f7fb0b 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -804,6 +804,11 @@ namespace dxvk { const DxbcBindingMask& BoundMask, DxbcBindingMask& DirtyMask); + void ApplyDirtyShaderResources( + DxbcProgramType Stage, + const DxbcBindingMask& BoundMask, + DxbcBindingMask& DirtyMask); + void ApplyDirtyGraphicsBindings(); void ApplyDirtyComputeBindings(); @@ -881,8 +886,8 @@ namespace dxvk { UINT Slot, D3D11SamplerState* pSampler); - template void BindShaderResource( + DxbcProgramType ShaderStage, UINT Slot, D3D11ShaderResourceView* pResource); @@ -933,6 +938,11 @@ namespace dxvk { uint32_t Slot, bool IsNull); + bool DirtyShaderResource( + DxbcProgramType ShaderStage, + uint32_t Slot, + bool IsNull); + void DiscardBuffer( ID3D11Resource* pResource); From ffe721c49e65aecab3da543234fd17b10b34b536 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 19 Feb 2025 02:36:45 +0100 Subject: [PATCH 09/17] [d3d11] Lazy-bind samplers --- src/d3d11/d3d11_context.cpp | 57 +++++++++++++++++++++++++++++-------- src/d3d11/d3d11_context.h | 12 +++++++- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 58a110048e6..e61fb1a0eed 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -3206,6 +3206,24 @@ namespace dxvk { } + template + void D3D11CommonContext::ApplyDirtySamplers( + DxbcProgramType Stage, + const DxbcBindingMask& BoundMask, + DxbcBindingMask& DirtyMask) { + uint32_t bindMask = BoundMask.samplerMask & DirtyMask.samplerMask; + + if (!bindMask) + return; + + const auto& state = m_state.samplers[Stage]; + DirtyMask.samplerMask -= bindMask; + + for (uint32_t slot : bit::BitMask(bindMask)) + BindSampler(Stage, slot, state.samplers[slot]); + } + + template void D3D11CommonContext::ApplyDirtyShaderResources( DxbcProgramType Stage, @@ -3239,6 +3257,7 @@ namespace dxvk { auto& boundMask = m_state.lazy.bindingsUsed[stage]; auto& dirtyMask = m_state.lazy.bindingsDirty[stage]; + ApplyDirtySamplers(stage, boundMask, dirtyMask); ApplyDirtyConstantBuffers(stage, boundMask, dirtyMask); ApplyDirtyShaderResources(stage, boundMask, dirtyMask); @@ -3254,6 +3273,7 @@ namespace dxvk { auto& boundMask = m_state.lazy.bindingsUsed[stage]; auto& dirtyMask = m_state.lazy.bindingsDirty[stage]; + ApplyDirtySamplers(stage, boundMask, dirtyMask); ApplyDirtyConstantBuffers(stage, boundMask, dirtyMask); ApplyDirtyShaderResources(stage, boundMask, dirtyMask); @@ -3838,25 +3858,27 @@ namespace dxvk { template - template void D3D11CommonContext::BindSampler( + DxbcProgramType ShaderStage, UINT Slot, D3D11SamplerState* pSampler) { + uint32_t slotId = computeSamplerBinding(ShaderStage, Slot); + if (pSampler) { EmitCs([ - cSlotId = Slot, + cSlotId = slotId, + cStage = GetShaderStage(ShaderStage), cSampler = pSampler->GetDXVKSampler() ] (DxvkContext* ctx) mutable { - VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); - ctx->bindResourceSampler(stage, cSlotId, + ctx->bindResourceSampler(cStage, cSlotId, Forwarder::move(cSampler)); }); } else { EmitCs([ - cSlotId = Slot + cSlotId = slotId, + cStage = GetShaderStage(ShaderStage) ] (DxvkContext* ctx) { - VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); - ctx->bindResourceSampler(stage, cSlotId, nullptr); + ctx->bindResourceSampler(cStage, cSlotId, nullptr); }); } } @@ -4388,6 +4410,18 @@ namespace dxvk { } + template + bool D3D11CommonContext::DirtySampler( + DxbcProgramType ShaderStage, + uint32_t Slot, + bool IsNull) { + return DirtyBindingGeneric(ShaderStage, + m_state.lazy.bindingsUsed[ShaderStage].samplerMask, + m_state.lazy.bindingsDirty[ShaderStage].samplerMask, + 1u << Slot, IsNull); + } + + template bool D3D11CommonContext::DirtyShaderResource( DxbcProgramType ShaderStage, @@ -4908,10 +4942,8 @@ namespace dxvk { template void D3D11CommonContext::RestoreSamplers() { const auto& bindings = m_state.samplers[Stage]; - uint32_t slotId = computeSamplerBinding(Stage, 0); - for (uint32_t i = 0; i < bindings.maxCount; i++) - BindSampler(slotId + i, bindings.samplers[i]); + BindSampler(Stage, i, bindings.samplers[i]); } @@ -5088,14 +5120,15 @@ namespace dxvk { UINT NumSamplers, ID3D11SamplerState* const* ppSamplers) { auto& bindings = m_state.samplers[ShaderStage]; - uint32_t slotId = computeSamplerBinding(ShaderStage, StartSlot); for (uint32_t i = 0; i < NumSamplers; i++) { auto sampler = static_cast(ppSamplers[i]); if (bindings.samplers[StartSlot + i] != sampler) { bindings.samplers[StartSlot + i] = sampler; - BindSampler(slotId + i, sampler); + + if (!DirtySampler(ShaderStage, StartSlot + i, !sampler)) + BindSampler(ShaderStage, StartSlot + i, sampler); } } diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 6a0b9f7fb0b..62fb05ac07c 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -804,6 +804,11 @@ namespace dxvk { const DxbcBindingMask& BoundMask, DxbcBindingMask& DirtyMask); + void ApplyDirtySamplers( + DxbcProgramType Stage, + const DxbcBindingMask& BoundMask, + DxbcBindingMask& DirtyMask); + void ApplyDirtyShaderResources( DxbcProgramType Stage, const DxbcBindingMask& BoundMask, @@ -881,8 +886,8 @@ namespace dxvk { UINT Offset, UINT Length); - template void BindSampler( + DxbcProgramType ShaderStage, UINT Slot, D3D11SamplerState* pSampler); @@ -938,6 +943,11 @@ namespace dxvk { uint32_t Slot, bool IsNull); + bool DirtySampler( + DxbcProgramType ShaderStage, + uint32_t Slot, + bool IsNull); + bool DirtyShaderResource( DxbcProgramType ShaderStage, uint32_t Slot, From 6fb6252e1821f9aabff66a23cd520b613442441a Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 19 Feb 2025 13:11:26 +0100 Subject: [PATCH 10/17] [d3d11] Refactor BindUnorderedAccessView We won't do lazy bindings for UAVs, but at least bring this function in line with the rest of the binding functions. --- src/d3d11/d3d11_context.cpp | 88 +++++++++++++------------------------ src/d3d11/d3d11_context.h | 5 +-- 2 files changed, 32 insertions(+), 61 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index e61fb1a0eed..7c310411b18 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -2158,9 +2158,6 @@ namespace dxvk { return; // Unbind previously bound conflicting UAVs - uint32_t uavSlotId = computeUavBinding (DxbcProgramType::ComputeShader, 0); - uint32_t ctrSlotId = computeUavCounterBinding(DxbcProgramType::ComputeShader, 0); - int32_t uavId = m_state.uav.mask.findNext(0); while (uavId >= 0) { @@ -2172,9 +2169,7 @@ namespace dxvk { m_state.uav.views[uavId] = nullptr; m_state.uav.mask.clr(uavId); - BindUnorderedAccessView( - uavSlotId + uavId, nullptr, - ctrSlotId + uavId, ~0u); + BindUnorderedAccessView(DxbcProgramType::ComputeShader, uavId, nullptr, ~0u); } } @@ -2193,10 +2188,7 @@ namespace dxvk { m_state.uav.views[StartSlot + i] = uav; m_state.uav.mask.set(StartSlot + i, uav != nullptr); - BindUnorderedAccessView( - uavSlotId + StartSlot + i, uav, - ctrSlotId + StartSlot + i, ctr); - + BindUnorderedAccessView(DxbcProgramType::ComputeShader, StartSlot + i, uav, ctr); ResolveCsSrvHazards(uav); } } @@ -3923,25 +3915,28 @@ namespace dxvk { template - template void D3D11CommonContext::BindUnorderedAccessView( - UINT UavSlot, + DxbcProgramType ShaderStage, + UINT Slot, D3D11UnorderedAccessView* pUav, - UINT CtrSlot, UINT Counter) { + uint32_t uavSlotId = computeUavBinding(ShaderStage, Slot); + uint32_t ctrSlotId = computeUavCounterBinding(ShaderStage, Slot); + + VkShaderStageFlags stages = ShaderStage == DxbcProgramType::ComputeShader + ? VK_SHADER_STAGE_COMPUTE_BIT + : VK_SHADER_STAGE_ALL_GRAPHICS; + if (pUav) { if (pUav->GetViewInfo().Dimension == D3D11_RESOURCE_DIMENSION_BUFFER) { EmitCs([ - cUavSlotId = UavSlot, - cCtrSlotId = CtrSlot, + cUavSlotId = uavSlotId, + cCtrSlotId = ctrSlotId, + cStages = stages, cBufferView = pUav->GetBufferView(), cCounterView = pUav->GetCounterView(), cCounterValue = Counter ] (DxvkContext* ctx) mutable { - VkShaderStageFlags stages = ShaderStage == DxbcProgramType::ComputeShader - ? VK_SHADER_STAGE_COMPUTE_BIT - : VK_SHADER_STAGE_ALL_GRAPHICS; - if (cCounterView != nullptr && cCounterValue != ~0u) { DxvkBufferSlice counterSlice(cCounterView); @@ -3952,37 +3947,31 @@ namespace dxvk { &cCounterValue); } - ctx->bindResourceBufferView(stages, cUavSlotId, + ctx->bindResourceBufferView(cStages, cUavSlotId, Forwarder::move(cBufferView)); - ctx->bindResourceBufferView(stages, cCtrSlotId, + ctx->bindResourceBufferView(cStages, cCtrSlotId, Forwarder::move(cCounterView)); }); } else { EmitCs([ - cUavSlotId = UavSlot, - cCtrSlotId = CtrSlot, + cUavSlotId = uavSlotId, + cCtrSlotId = ctrSlotId, + cStages = stages, cImageView = pUav->GetImageView() ] (DxvkContext* ctx) mutable { - VkShaderStageFlags stages = ShaderStage == DxbcProgramType::ComputeShader - ? VK_SHADER_STAGE_COMPUTE_BIT - : VK_SHADER_STAGE_ALL_GRAPHICS; - - ctx->bindResourceImageView(stages, cUavSlotId, + ctx->bindResourceImageView(cStages, cUavSlotId, Forwarder::move(cImageView)); - ctx->bindResourceBufferView(stages, cCtrSlotId, nullptr); + ctx->bindResourceBufferView(cStages, cCtrSlotId, nullptr); }); } } else { EmitCs([ - cUavSlotId = UavSlot, - cCtrSlotId = CtrSlot + cUavSlotId = uavSlotId, + cCtrSlotId = ctrSlotId, + cStages = stages ] (DxvkContext* ctx) { - VkShaderStageFlags stages = ShaderStage == DxbcProgramType::ComputeShader - ? VK_SHADER_STAGE_COMPUTE_BIT - : VK_SHADER_STAGE_ALL_GRAPHICS; - - ctx->bindResourceImageView(stages, cUavSlotId, nullptr); - ctx->bindResourceBufferView(stages, cCtrSlotId, nullptr); + ctx->bindResourceImageView(cStages, cUavSlotId, nullptr); + ctx->bindResourceBufferView(cStages, cCtrSlotId, nullptr); }); } } @@ -4845,16 +4834,11 @@ namespace dxvk { if (!pView || !pView->HasBindFlag(D3D11_BIND_UNORDERED_ACCESS)) return; - uint32_t uavSlotId = computeUavBinding (DxbcProgramType::PixelShader, 0); - uint32_t ctrSlotId = computeUavCounterBinding(DxbcProgramType::PixelShader, 0); - for (uint32_t i = 0; i < m_state.om.maxUav; i++) { if (CheckViewOverlap(pView, m_state.om.uavs[i].ptr())) { m_state.om.uavs[i] = nullptr; - BindUnorderedAccessView( - uavSlotId + i, nullptr, - ctrSlotId + i, ~0u); + BindUnorderedAccessView(DxbcProgramType::PixelShader, i, nullptr, ~0u); } } } @@ -4967,14 +4951,8 @@ namespace dxvk { ? m_state.uav.maxCount : m_state.om.maxUav; - uint32_t uavSlotId = computeUavBinding(Stage, 0); - uint32_t ctrSlotId = computeUavCounterBinding(Stage, 0); - - for (uint32_t i = 0; i < maxCount; i++) { - BindUnorderedAccessView( - uavSlotId + i, views[i].ptr(), - ctrSlotId + i, ~0u); - } + for (uint32_t i = 0; i < maxCount; i++) + BindUnorderedAccessView(Stage, i, views[i].ptr(), ~0u); } @@ -5191,9 +5169,6 @@ namespace dxvk { } if (unlikely(NumUAVs || m_state.om.maxUav)) { - uint32_t uavSlotId = computeUavBinding (DxbcProgramType::PixelShader, 0); - uint32_t ctrSlotId = computeUavCounterBinding(DxbcProgramType::PixelShader, 0); - if (likely(NumUAVs != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)) { uint32_t newMaxUav = NumUAVs ? UAVStartSlot + NumUAVs : 0; uint32_t oldMaxUav = std::exchange(m_state.om.maxUav, newMaxUav); @@ -5210,10 +5185,7 @@ namespace dxvk { if (m_state.om.uavs[i] != uav || ctr != ~0u) { m_state.om.uavs[i] = uav; - BindUnorderedAccessView( - uavSlotId + i, uav, - ctrSlotId + i, ctr); - + BindUnorderedAccessView(DxbcProgramType::PixelShader, i, uav, ctr); ResolveOmSrvHazards(uav); if (NumRTVs == D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL) diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 62fb05ac07c..8196de0bd48 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -896,11 +896,10 @@ namespace dxvk { UINT Slot, D3D11ShaderResourceView* pResource); - template void BindUnorderedAccessView( - UINT UavSlot, + DxbcProgramType ShaderStage, + UINT Slot, D3D11UnorderedAccessView* pUav, - UINT CtrSlot, UINT Counter); VkClearValue ConvertColorValue( From fe6226229eef495f60365e41efe308c649128100 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 19 Feb 2025 13:36:37 +0100 Subject: [PATCH 11/17] [d3d11] Don't template methods that restore shader bindings This was only needed because Bind* methods were also templated. --- src/d3d11/d3d11_context.cpp | 52 +++++++++++++++---------------------- src/d3d11/d3d11_context.h | 22 ++++++++-------- 2 files changed, 32 insertions(+), 42 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 7c310411b18..054560088a9 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -4880,29 +4880,16 @@ namespace dxvk { for (uint32_t i = 0; i < m_state.so.targets.size(); i++) BindXfbBuffer(i, m_state.so.targets[i].buffer.ptr(), ~0u); - RestoreConstantBuffers(); - RestoreConstantBuffers(); - RestoreConstantBuffers(); - RestoreConstantBuffers(); - RestoreConstantBuffers(); - RestoreConstantBuffers(); - - RestoreShaderResources(); - RestoreShaderResources(); - RestoreShaderResources(); - RestoreShaderResources(); - RestoreShaderResources(); - RestoreShaderResources(); - - RestoreUnorderedAccessViews(); - RestoreUnorderedAccessViews(); - - RestoreSamplers(); - RestoreSamplers(); - RestoreSamplers(); - RestoreSamplers(); - RestoreSamplers(); - RestoreSamplers(); + for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) { + auto stage = DxbcProgramType(i); + + RestoreConstantBuffers(stage); + RestoreShaderResources(stage); + RestoreSamplers(stage); + } + + RestoreUnorderedAccessViews(DxbcProgramType::PixelShader); + RestoreUnorderedAccessViews(DxbcProgramType::ComputeShader); // Draw buffer bindings aren't persistent at the API level, and // we can't meaningfully track them. Just reset this state here @@ -4912,9 +4899,10 @@ namespace dxvk { template - template - void D3D11CommonContext::RestoreConstantBuffers() { + void D3D11CommonContext::RestoreConstantBuffers( + DxbcProgramType Stage) { const auto& bindings = m_state.cbv[Stage]; + for (uint32_t i = 0; i < bindings.maxCount; i++) { BindConstantBuffer(Stage, i, bindings.buffers[i].buffer.ptr(), bindings.buffers[i].constantOffset, bindings.buffers[i].constantBound); @@ -4923,26 +4911,28 @@ namespace dxvk { template - template - void D3D11CommonContext::RestoreSamplers() { + void D3D11CommonContext::RestoreSamplers( + DxbcProgramType Stage) { const auto& bindings = m_state.samplers[Stage]; + for (uint32_t i = 0; i < bindings.maxCount; i++) BindSampler(Stage, i, bindings.samplers[i]); } template - template - void D3D11CommonContext::RestoreShaderResources() { + void D3D11CommonContext::RestoreShaderResources( + DxbcProgramType Stage) { const auto& bindings = m_state.srv[Stage]; + for (uint32_t i = 0; i < bindings.maxCount; i++) BindShaderResource(Stage, i, bindings.views[i].ptr()); } template - template - void D3D11CommonContext::RestoreUnorderedAccessViews() { + void D3D11CommonContext::RestoreUnorderedAccessViews( + DxbcProgramType Stage) { const auto& views = Stage == DxbcProgramType::ComputeShader ? m_state.uav.views : m_state.om.uavs; diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 8196de0bd48..0f95ed41411 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -1014,18 +1014,18 @@ namespace dxvk { void RestoreCommandListState(); - template - void RestoreConstantBuffers(); - - template - void RestoreSamplers(); - - template - void RestoreShaderResources(); - - template - void RestoreUnorderedAccessViews(); + void RestoreConstantBuffers( + DxbcProgramType Stage); + void RestoreSamplers( + DxbcProgramType Stage); + + void RestoreShaderResources( + DxbcProgramType Stage); + + void RestoreUnorderedAccessViews( + DxbcProgramType Stage); + template void SetConstantBuffers( UINT StartSlot, From f0db8720fa70d2ac85085fdc1eb5a3f285c2f895 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 19 Feb 2025 15:40:47 +0100 Subject: [PATCH 12/17] [d3d11] Reset dirty tracking when re-applying context state --- src/d3d11/d3d11_context.cpp | 11 +++++++++++ src/d3d11/d3d11_context.h | 2 ++ src/d3d11/d3d11_context_imm.cpp | 6 +++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 054560088a9..ca2c9a3169c 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -4746,6 +4746,15 @@ namespace dxvk { } + template + void D3D11CommonContext::ResetDirtyTracking() { + // Must only be called when all bindings are guaranteed to get applied + // to the DXVK context before the next draw or dispatch command. + m_state.lazy.bindingsDirty.reset(); + m_state.lazy.shadersDirty = 0u; + } + + template void D3D11CommonContext::ResetStagingBuffer() { m_staging.reset(); @@ -4880,6 +4889,8 @@ namespace dxvk { for (uint32_t i = 0; i < m_state.so.targets.size(); i++) BindXfbBuffer(i, m_state.so.targets[i].buffer.ptr(), ~0u); + ResetDirtyTracking(); + for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) { auto stage = DxbcProgramType(i); diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 0f95ed41411..0577c314d5b 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -992,6 +992,8 @@ namespace dxvk { void ResetContextState(); + void ResetDirtyTracking(); + void ResetStagingBuffer(); template diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index e22201afaf2..44d9d7ce3cc 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -754,7 +754,11 @@ namespace dxvk { if (!pState) return; - // Reset all state affected by the current context state + // Clear dirty tracking here since all context state will be + // re-applied anyway when the context state is swapped in again. + ResetDirtyTracking(); + + // Reset all state affected by the current context state. ResetCommandListState(); Com oldState = std::move(m_stateObject); From 0a101cdb50e4f0961ef9dc864f46be1e5d74b777 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 19 Feb 2025 15:41:39 +0100 Subject: [PATCH 13/17] [d3d11] Reset dirty bindings on command submission --- src/d3d11/d3d11_context.cpp | 5 +++ src/d3d11/d3d11_context_imm.cpp | 77 ++++++++++++++++++++++++++++++++- src/d3d11/d3d11_context_imm.h | 2 + src/d3d11/d3d11_video.cpp | 3 ++ 4 files changed, 86 insertions(+), 1 deletion(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index ca2c9a3169c..59d5b72c4b7 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -3186,6 +3186,7 @@ namespace dxvk { if (!bindMask) return; + // Need to clear dirty bits before binding const auto& state = m_state.cbv[Stage]; DirtyMask.cbvMask -= bindMask; @@ -3208,6 +3209,7 @@ namespace dxvk { if (!bindMask) return; + // Need to clear dirty bits before binding const auto& state = m_state.samplers[Stage]; DirtyMask.samplerMask -= bindMask; @@ -3230,6 +3232,7 @@ namespace dxvk { if (!bindMask) continue; + // Need to clear dirty bits before binding DirtyMask.srvMask[maskIndex] -= bindMask; for (uint32_t slot : bit::BitMask(bindMask)) @@ -4889,6 +4892,8 @@ namespace dxvk { for (uint32_t i = 0; i < m_state.so.targets.size(); i++) BindXfbBuffer(i, m_state.so.targets[i].buffer.ptr(), ~0u); + // Reset dirty binding and shader masks before applying + // bindings to avoid implicit null binding overrids. ResetDirtyTracking(); for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) { diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index 44d9d7ce3cc..e6f6cd0da15 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -218,7 +218,12 @@ namespace dxvk { D3D10DeviceLock lock = LockContext(); auto commandList = static_cast(pCommandList); - + + // Reset dirty binding tracking before submitting any CS chunks. + // This is needed so that any submission that might occur during + // this call does not disrupt bindings set by the deferred context. + ResetDirtyTracking(); + // Clear state so that the command list can't observe any // current context state. The command list itself will clean // up after execution to ensure that no state changes done @@ -979,6 +984,73 @@ namespace dxvk { } + void D3D11ImmediateContext::ApplyDirtyNullBindings() { + // At the end of a submission, set all bindings that have not been applied yet + // to null on the DXVK context. This way, we avoid keeping resources alive that + // are bound to the DXVK context but not to the immediate context. + // + // Note: This requires that all methods that may modify dirty bindings on the + // DXVK context also reset the corresponding dirty bits *before* performing the + // bind operation, or otherwise an implicit flush can potentially override them. + auto& dirtyState = m_state.lazy.bindingsDirty; + + EmitCs([ + cDirtyState = dirtyState + ] (DxvkContext* ctx) { + for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) { + auto dxStage = DxbcProgramType(i); + auto vkStage = GetShaderStage(dxStage); + + // Unbind all dirty constant buffers + auto cbvSlot = computeConstantBufferBinding(dxStage, 0); + + for (uint32_t index : bit::BitMask(cDirtyState[dxStage].cbvMask)) + ctx->bindUniformBuffer(vkStage, cbvSlot + index, DxvkBufferSlice()); + + // Unbind all dirty samplers + auto samplerSlot = computeSamplerBinding(dxStage, 0); + + for (uint32_t index : bit::BitMask(cDirtyState[dxStage].samplerMask)) + ctx->bindResourceSampler(vkStage, samplerSlot + index, nullptr); + + // Unbind all dirty shader resource views + auto srvSlot = computeSrvBinding(dxStage, 0); + + for (uint32_t m = 0; m < cDirtyState[dxStage].srvMask.size(); m++) { + for (uint32_t index : bit::BitMask(cDirtyState[dxStage].srvMask[m])) + ctx->bindResourceImageView(vkStage, srvSlot + index + m * 64u, nullptr); + } + } + }); + + // Since we set the DXVK context bindings to null, any bindings that are null + // on the D3D context are no longer dirty, so we can clear the respective bits. + for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) { + auto stage = DxbcProgramType(i); + + for (uint32_t index : bit::BitMask(dirtyState[stage].cbvMask)) { + if (!m_state.cbv[stage].buffers[index].buffer.ptr()) + dirtyState[stage].cbvMask &= ~(1u << index); + } + + for (uint32_t index : bit::BitMask(dirtyState[stage].samplerMask)) { + if (!m_state.samplers[stage].samplers[index]) + dirtyState[stage].samplerMask &= ~(1u << index); + } + + for (uint32_t m = 0; m < dirtyState[stage].srvMask.size(); m++) { + for (uint32_t index : bit::BitMask(dirtyState[stage].srvMask[m])) { + if (!m_state.srv[stage].views[index + m * 64u].ptr()) + dirtyState[stage].srvMask[m] &= ~(uint64_t(1u) << index); + } + } + + if (dirtyState[stage].empty()) + m_state.lazy.shadersDirty.clr(stage); + } + } + + void D3D11ImmediateContext::ConsiderFlush( GpuFlushType FlushType) { uint64_t chunkId = GetCurrentSequenceNumber(); @@ -1002,6 +1074,9 @@ namespace dxvk { if (!GetPendingCsChunks() && !hEvent) return; + // Unbind unused resources + ApplyDirtyNullBindings(); + // Signal the submission fence and flush the command list uint64_t submissionId = ++m_submissionId; diff --git a/src/d3d11/d3d11_context_imm.h b/src/d3d11/d3d11_context_imm.h index a73dfe6d053..faf61842381 100644 --- a/src/d3d11/d3d11_context_imm.h +++ b/src/d3d11/d3d11_context_imm.h @@ -195,6 +195,8 @@ namespace dxvk { uint64_t GetPendingCsChunks(); + void ApplyDirtyNullBindings(); + void ConsiderFlush( GpuFlushType FlushType); diff --git a/src/d3d11/d3d11_video.cpp b/src/d3d11/d3d11_video.cpp index 1be4802a8cb..a5b20351dec 100644 --- a/src/d3d11/d3d11_video.cpp +++ b/src/d3d11/d3d11_video.cpp @@ -1037,7 +1037,9 @@ namespace dxvk { continue; if (!hasStreamsEnabled) { + m_ctx->ResetDirtyTracking(); m_ctx->ResetCommandListState(); + BindOutputView(pOutputView); hasStreamsEnabled = true; } @@ -1047,6 +1049,7 @@ namespace dxvk { if (hasStreamsEnabled) { UnbindResources(); + m_ctx->RestoreCommandListState(); } From aa25e4207061ce73f09de3e4889499b904e25970 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 19 Feb 2025 16:16:13 +0100 Subject: [PATCH 14/17] [d3d11] Add compile-time debug flag for lazy binding --- src/d3d11/d3d11_context.cpp | 8 ++++++-- src/d3d11/d3d11_context.h | 5 +++++ src/d3d11/d3d11_context_imm.cpp | 4 ++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 59d5b72c4b7..f722211d92a 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -3548,7 +3548,7 @@ namespace dxvk { m_state.lazy.shadersUsed.set(ShaderStage); m_state.lazy.bindingsUsed[ShaderStage] = pShaderModule->GetBindingMask(); - if (!m_state.lazy.shadersDirty.test(ShaderStage)) { + if (!m_state.lazy.shadersDirty.test(ShaderStage) && (DebugLazyBinding != Tristate::False)) { if (!(m_state.lazy.bindingsDirty[ShaderStage] & m_state.lazy.bindingsUsed[ShaderStage]).empty()) m_state.lazy.shadersDirty.set(ShaderStage); } @@ -4368,11 +4368,15 @@ namespace dxvk { T& DirtyMask, T DirtyBit, bool IsNull) { + // Forward immediately if lazy binding is forced off + if (DebugLazyBinding == Tristate::False) + return false; + if ((BoundMask & ~DirtyMask) & DirtyBit) { // If we're binding a non-null resource to an active slot that has not been // marked for lazy binding yet, forward the call immediately in order to // avoid tracking overhead. This is by far the most common case. - if (likely(!IsNull)) + if (likely(!IsNull && DebugLazyBinding != Tristate::True)) return false; // If we are binding a null resource to an active slot, the app will likely diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 0577c314d5b..5c376d195ed 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -75,6 +75,11 @@ namespace dxvk { // Use a local staging buffer to handle tiny uploads, most // of the time we're fine with hitting the global allocator constexpr static VkDeviceSize StagingBufferSize = 256ull << 10; + + protected: + // Compile-time debug flag to force lazy binding on (True) or off (False) + constexpr static Tristate DebugLazyBinding = Tristate::Auto; + public: D3D11CommonContext( diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index e6f6cd0da15..538e3789c73 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -1053,6 +1053,10 @@ namespace dxvk { void D3D11ImmediateContext::ConsiderFlush( GpuFlushType FlushType) { + // In stress test mode, behave as if this would always flush + if (DebugLazyBinding == Tristate::True) + ApplyDirtyNullBindings(); + uint64_t chunkId = GetCurrentSequenceNumber(); uint64_t submissionId = m_submissionFence->value(); From c2da20c15fbd8037c321c442b2d98e4369c0b27a Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Thu, 20 Feb 2025 15:09:35 +0100 Subject: [PATCH 15/17] [d3d11] Lazy-bind compute shader UAVs And factor UAV counter updates out of binding. --- src/d3d11/d3d11_context.cpp | 89 +++++++++++++++++++++++++-------- src/d3d11/d3d11_context.h | 16 +++++- src/d3d11/d3d11_context_imm.cpp | 19 +++++++ src/d3d11/d3d11_view_uav.h | 4 ++ 4 files changed, 105 insertions(+), 23 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index f722211d92a..8a324395465 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -2169,7 +2169,8 @@ namespace dxvk { m_state.uav.views[uavId] = nullptr; m_state.uav.mask.clr(uavId); - BindUnorderedAccessView(DxbcProgramType::ComputeShader, uavId, nullptr, ~0u); + if (!DirtyComputeUnorderedAccessView(uavId, true)) + BindUnorderedAccessView(DxbcProgramType::ComputeShader, uavId, nullptr); } } @@ -2184,11 +2185,16 @@ namespace dxvk { auto uav = static_cast(ppUnorderedAccessViews[i]); auto ctr = pUAVInitialCounts ? pUAVInitialCounts[i] : ~0u; - if (m_state.uav.views[StartSlot + i] != uav || ctr != ~0u) { + if (ctr != ~0u && uav && uav->HasCounter()) + UpdateUnorderedAccessViewCounter(uav, ctr); + + if (m_state.uav.views[StartSlot + i] != uav) { m_state.uav.views[StartSlot + i] = uav; m_state.uav.mask.set(StartSlot + i, uav != nullptr); - BindUnorderedAccessView(DxbcProgramType::ComputeShader, StartSlot + i, uav, ctr); + if (!DirtyComputeUnorderedAccessView(StartSlot + i, !uav)) + BindUnorderedAccessView(DxbcProgramType::ComputeShader, StartSlot + i, uav); + ResolveCsSrvHazards(uav); } } @@ -3241,6 +3247,28 @@ namespace dxvk { } + template + void D3D11CommonContext::ApplyDirtyUnorderedAccessViews( + DxbcProgramType Stage, + const DxbcBindingMask& BoundMask, + DxbcBindingMask& DirtyMask) { + uint64_t bindMask = BoundMask.uavMask & DirtyMask.uavMask; + + if (!bindMask) + return; + + const auto& views = Stage == DxbcProgramType::ComputeShader + ? m_state.uav.views + : m_state.om.uavs; + + // Need to clear dirty bits before binding + DirtyMask.uavMask -= bindMask; + + for (uint32_t slot : bit::BitMask(bindMask)) + BindUnorderedAccessView(Stage, slot, views[slot].ptr()); + } + + template void D3D11CommonContext::ApplyDirtyGraphicsBindings() { auto dirtyMask = m_state.lazy.shadersDirty & m_state.lazy.shadersUsed; @@ -3271,6 +3299,7 @@ namespace dxvk { ApplyDirtySamplers(stage, boundMask, dirtyMask); ApplyDirtyConstantBuffers(stage, boundMask, dirtyMask); ApplyDirtyShaderResources(stage, boundMask, dirtyMask); + ApplyDirtyUnorderedAccessViews(stage, boundMask, dirtyMask); m_state.lazy.shadersDirty.clr(stage); } @@ -3921,8 +3950,7 @@ namespace dxvk { void D3D11CommonContext::BindUnorderedAccessView( DxbcProgramType ShaderStage, UINT Slot, - D3D11UnorderedAccessView* pUav, - UINT Counter) { + D3D11UnorderedAccessView* pUav) { uint32_t uavSlotId = computeUavBinding(ShaderStage, Slot); uint32_t ctrSlotId = computeUavCounterBinding(ShaderStage, Slot); @@ -3937,19 +3965,8 @@ namespace dxvk { cCtrSlotId = ctrSlotId, cStages = stages, cBufferView = pUav->GetBufferView(), - cCounterView = pUav->GetCounterView(), - cCounterValue = Counter + cCounterView = pUav->GetCounterView() ] (DxvkContext* ctx) mutable { - if (cCounterView != nullptr && cCounterValue != ~0u) { - DxvkBufferSlice counterSlice(cCounterView); - - ctx->updateBuffer( - counterSlice.buffer(), - counterSlice.offset(), - sizeof(uint32_t), - &cCounterValue); - } - ctx->bindResourceBufferView(cStages, cUavSlotId, Forwarder::move(cBufferView)); ctx->bindResourceBufferView(cStages, cCtrSlotId, @@ -4432,6 +4449,19 @@ namespace dxvk { } + template + bool D3D11CommonContext::DirtyComputeUnorderedAccessView( + uint32_t Slot, + bool IsNull) { + constexpr DxbcProgramType ShaderStage = DxbcProgramType::ComputeShader; + + return DirtyBindingGeneric(ShaderStage, + m_state.lazy.bindingsUsed[ShaderStage].uavMask, + m_state.lazy.bindingsDirty[ShaderStage].uavMask, + uint64_t(1u) << Slot, IsNull); + } + + template void D3D11CommonContext::DiscardBuffer( ID3D11Resource* pResource) { @@ -4854,7 +4884,7 @@ namespace dxvk { if (CheckViewOverlap(pView, m_state.om.uavs[i].ptr())) { m_state.om.uavs[i] = nullptr; - BindUnorderedAccessView(DxbcProgramType::PixelShader, i, nullptr, ~0u); + BindUnorderedAccessView(DxbcProgramType::PixelShader, i, nullptr); } } } @@ -4962,7 +4992,7 @@ namespace dxvk { : m_state.om.maxUav; for (uint32_t i = 0; i < maxCount; i++) - BindUnorderedAccessView(Stage, i, views[i].ptr(), ~0u); + BindUnorderedAccessView(Stage, i, views[i].ptr()); } @@ -5192,10 +5222,13 @@ namespace dxvk { ctr = pUAVInitialCounts ? pUAVInitialCounts[i - UAVStartSlot] : ~0u; } - if (m_state.om.uavs[i] != uav || ctr != ~0u) { + if (ctr != ~0u && uav && uav->HasCounter()) + UpdateUnorderedAccessViewCounter(uav, ctr); + + if (m_state.om.uavs[i] != uav) { m_state.om.uavs[i] = uav; - BindUnorderedAccessView(DxbcProgramType::PixelShader, i, uav, ctr); + BindUnorderedAccessView(DxbcProgramType::PixelShader, i, uav); ResolveOmSrvHazards(uav); if (NumRTVs == D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL) @@ -5590,6 +5623,20 @@ namespace dxvk { } + template + void D3D11CommonContext::UpdateUnorderedAccessViewCounter( + D3D11UnorderedAccessView* pUav, + uint32_t CounterValue) { + EmitCs([ + cView = pUav->GetCounterView(), + cCounter = CounterValue + ] (DxvkContext* ctx) { + ctx->updateBuffer(cView->buffer(), + cView->info().offset, sizeof(cCounter), &cCounter); + }); + } + + template bool D3D11CommonContext::ValidateRenderTargets( UINT NumViews, diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 5c376d195ed..91530c8d120 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -819,6 +819,11 @@ namespace dxvk { const DxbcBindingMask& BoundMask, DxbcBindingMask& DirtyMask); + void ApplyDirtyUnorderedAccessViews( + DxbcProgramType Stage, + const DxbcBindingMask& BoundMask, + DxbcBindingMask& DirtyMask); + void ApplyDirtyGraphicsBindings(); void ApplyDirtyComputeBindings(); @@ -904,8 +909,7 @@ namespace dxvk { void BindUnorderedAccessView( DxbcProgramType ShaderStage, UINT Slot, - D3D11UnorderedAccessView* pUav, - UINT Counter); + D3D11UnorderedAccessView* pUav); VkClearValue ConvertColorValue( const FLOAT Color[4], @@ -957,6 +961,10 @@ namespace dxvk { uint32_t Slot, bool IsNull); + bool DirtyComputeUnorderedAccessView( + uint32_t Slot, + bool IsNull); + void DiscardBuffer( ID3D11Resource* pResource); @@ -1115,6 +1123,10 @@ namespace dxvk { UINT SrcDepthPitch, UINT CopyFlags); + void UpdateUnorderedAccessViewCounter( + D3D11UnorderedAccessView* pUav, + uint32_t CounterValue); + bool ValidateRenderTargets( UINT NumViews, ID3D11RenderTargetView* const* ppRenderTargetViews, diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index 538e3789c73..8f85f082e49 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -1020,6 +1020,18 @@ namespace dxvk { for (uint32_t index : bit::BitMask(cDirtyState[dxStage].srvMask[m])) ctx->bindResourceImageView(vkStage, srvSlot + index + m * 64u, nullptr); } + + // Unbind all dirty unordered access views. Only consider compute + // here since we don't actually lazy-bind graphics UAVs. + if (dxStage == DxbcProgramType::ComputeShader) { + auto uavSlot = computeUavBinding(dxStage, 0); + auto ctrSlot = computeUavCounterBinding(dxStage, 0); + + for (uint32_t index : bit::BitMask(cDirtyState[dxStage].uavMask)) { + ctx->bindResourceImageView(vkStage, uavSlot + index, nullptr); + ctx->bindResourceBufferView(vkStage, ctrSlot + index, nullptr); + } + } } }); @@ -1045,6 +1057,13 @@ namespace dxvk { } } + if (stage == DxbcProgramType::ComputeShader) { + for (uint32_t index : bit::BitMask(dirtyState[stage].uavMask)) { + if (!m_state.uav.views[index].ptr()) + dirtyState[stage].uavMask &= ~(uint64_t(1u) << index); + } + } + if (dirtyState[stage].empty()) m_state.lazy.shadersDirty.clr(stage); } diff --git a/src/d3d11/d3d11_view_uav.h b/src/d3d11/d3d11_view_uav.h index c86eba5fa88..8396fb8a1eb 100644 --- a/src/d3d11/d3d11_view_uav.h +++ b/src/d3d11/d3d11_view_uav.h @@ -43,6 +43,10 @@ namespace dxvk { return m_info.BindFlags & Flags; } + BOOL HasCounter() const { + return m_counterView != nullptr; + } + D3D11_RESOURCE_DIMENSION GetResourceType() const { D3D11_RESOURCE_DIMENSION type; m_resource->GetType(&type); From d48cc1be4dff2391a1bc808d541fad27873aa75f Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Thu, 20 Feb 2025 17:05:16 +0100 Subject: [PATCH 16/17] [d3d11] Lazy-bind pixel shader UAVs Moderately cursed because PS UAVs are also available to other graphics stages. --- src/d3d11/d3d11_context.cpp | 71 ++++++++++++++++++++++++++++++++- src/d3d11/d3d11_context.h | 3 ++ src/d3d11/d3d11_context_imm.cpp | 18 ++++++--- src/d3d11/d3d11_context_state.h | 2 + 4 files changed, 87 insertions(+), 7 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 8a324395465..1e5ec512a7c 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -3274,6 +3274,15 @@ namespace dxvk { auto dirtyMask = m_state.lazy.shadersDirty & m_state.lazy.shadersUsed; dirtyMask.clr(DxbcProgramType::ComputeShader); + if (unlikely(!(dirtyMask & m_state.lazy.graphicsUavShaders).isClear())) { + DxbcProgramType stage = DxbcProgramType::PixelShader; + + auto& boundMask = m_state.lazy.bindingsUsed[stage]; + auto& dirtyMask = m_state.lazy.bindingsDirty[stage]; + + ApplyDirtyUnorderedAccessViews(stage, boundMask, dirtyMask); + } + for (uint32_t stageIndex : bit::BitMask(uint32_t(dirtyMask.raw()))) { DxbcProgramType stage = DxbcProgramType(stageIndex); @@ -3564,6 +3573,8 @@ namespace dxvk { template void D3D11CommonContext::BindShader( const D3D11CommonShader* pShaderModule) { + uint64_t oldUavMask = m_state.lazy.bindingsUsed[ShaderStage].uavMask; + if (pShaderModule) { auto buffer = pShaderModule->GetIcb(); auto shader = pShaderModule->GetShader(); @@ -3616,6 +3627,33 @@ namespace dxvk { ctx->bindUniformBuffer(stage, slotId, DxvkBufferSlice()); }); } + + // On graphics, UAVs are available to all stages, but we treat them as part + // of the pixel shader binding set. Re-compute the active UAV mask. We don't + // need to set the PS as active or dirty here though since the UAV update + // code will mark all other stages that access UAVs as dirty, too. + uint64_t newUavMask = m_state.lazy.bindingsUsed[ShaderStage].uavMask; + + if (ShaderStage != DxbcProgramType::ComputeShader && oldUavMask != newUavMask) { + constexpr DxbcProgramType ps = DxbcProgramType::PixelShader; + + // Since dirty UAVs are only tracked on the PS mask, we need to mark the + // stage as dirty if any of the used UAVs overlap with the dirty PS mask. + if (m_state.lazy.bindingsDirty[ps].uavMask & newUavMask) + m_state.lazy.shadersDirty.set(ShaderStage); + + // Accumulate graphics UAV mask and write it back to the pixel shader mask. + m_state.lazy.graphicsUavShaders.clr(ShaderStage); + + for (uint32_t stageIndex : bit::BitMask(uint32_t(m_state.lazy.graphicsUavShaders.raw()))) + newUavMask |= m_state.lazy.bindingsUsed[DxbcProgramType(stageIndex)].uavMask; + + m_state.lazy.bindingsUsed[ps].uavMask = newUavMask; + + // Update bit mask of shaders actively accessing graphics UAVs + if (newUavMask) + m_state.lazy.graphicsUavShaders.set(ShaderStage); + } } @@ -4462,6 +4500,32 @@ namespace dxvk { } + template + bool D3D11CommonContext::DirtyGraphicsUnorderedAccessView( + uint32_t Slot) { + constexpr DxbcProgramType ShaderStage = DxbcProgramType::PixelShader; + + if (DebugLazyBinding == Tristate::False) + return false; + + // Use different logic here and always use lazy binding for graphics UAVs. + // Since graphics UAVs are generally bound together with render targets, + // looking at the active binding mask doesn't really help us here. + uint64_t dirtyBit = uint64_t(1u) << Slot; + + if (m_state.lazy.bindingsUsed[ShaderStage].uavMask & dirtyBit) { + // Need to mark all graphics stages that use UAVs as dirty here to + // make sure that bindings actually get reapplied properly. There + // may be no pixel shader bound in this case, even though we do + // all the tracking on the pixel shader bit mask. + m_state.lazy.shadersDirty.set(m_state.lazy.graphicsUavShaders); + } + + m_state.lazy.bindingsDirty[ShaderStage].uavMask |= dirtyBit; + return true; + } + + template void D3D11CommonContext::DiscardBuffer( ID3D11Resource* pResource) { @@ -4884,7 +4948,8 @@ namespace dxvk { if (CheckViewOverlap(pView, m_state.om.uavs[i].ptr())) { m_state.om.uavs[i] = nullptr; - BindUnorderedAccessView(DxbcProgramType::PixelShader, i, nullptr); + if (!DirtyGraphicsUnorderedAccessView(i)) + BindUnorderedAccessView(DxbcProgramType::PixelShader, i, nullptr); } } } @@ -5228,7 +5293,9 @@ namespace dxvk { if (m_state.om.uavs[i] != uav) { m_state.om.uavs[i] = uav; - BindUnorderedAccessView(DxbcProgramType::PixelShader, i, uav); + if (!DirtyGraphicsUnorderedAccessView(i)) + BindUnorderedAccessView(DxbcProgramType::PixelShader, i, uav); + ResolveOmSrvHazards(uav); if (NumRTVs == D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL) diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 91530c8d120..d4b06e2d8e0 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -965,6 +965,9 @@ namespace dxvk { uint32_t Slot, bool IsNull); + bool DirtyGraphicsUnorderedAccessView( + uint32_t Slot); + void DiscardBuffer( ID3D11Resource* pResource); diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index 8f85f082e49..15005f40f01 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -1021,9 +1021,15 @@ namespace dxvk { ctx->bindResourceImageView(vkStage, srvSlot + index + m * 64u, nullptr); } - // Unbind all dirty unordered access views. Only consider compute - // here since we don't actually lazy-bind graphics UAVs. - if (dxStage == DxbcProgramType::ComputeShader) { + // Unbind all dirty unordered access views + VkShaderStageFlags uavStages = 0u; + + if (dxStage == DxbcProgramType::ComputeShader) + uavStages = VK_SHADER_STAGE_COMPUTE_BIT; + else if (dxStage == DxbcProgramType::PixelShader) + uavStages = VK_SHADER_STAGE_ALL_GRAPHICS; + + if (uavStages) { auto uavSlot = computeUavBinding(dxStage, 0); auto ctrSlot = computeUavCounterBinding(dxStage, 0); @@ -1057,9 +1063,11 @@ namespace dxvk { } } - if (stage == DxbcProgramType::ComputeShader) { + if (stage == DxbcProgramType::ComputeShader || stage == DxbcProgramType::PixelShader) { + auto& uavs = stage == DxbcProgramType::ComputeShader ? m_state.uav.views : m_state.om.uavs; + for (uint32_t index : bit::BitMask(dirtyState[stage].uavMask)) { - if (!m_state.uav.views[index].ptr()) + if (!uavs[index].ptr()) dirtyState[stage].uavMask &= ~(uint64_t(1u) << index); } } diff --git a/src/d3d11/d3d11_context_state.h b/src/d3d11/d3d11_context_state.h index be42ef5991b..7e56258fb40 100644 --- a/src/d3d11/d3d11_context_state.h +++ b/src/d3d11/d3d11_context_state.h @@ -313,6 +313,7 @@ namespace dxvk { struct D3D11LazyBindings { DxbcProgramTypeFlags shadersUsed = 0u; DxbcProgramTypeFlags shadersDirty = 0u; + DxbcProgramTypeFlags graphicsUavShaders = 0u; D3D11ShaderStageState bindingsUsed; D3D11ShaderStageState bindingsDirty; @@ -320,6 +321,7 @@ namespace dxvk { void reset() { shadersUsed = 0u; shadersDirty = 0u; + graphicsUavShaders = 0u; bindingsUsed.reset(); bindingsDirty.reset(); From 5490c64398bdb2d7ffc540795219a792e7af796c Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Thu, 20 Feb 2025 17:26:02 +0100 Subject: [PATCH 17/17] [d3d11] Skip unnecessary iterations when binding graphics UAVs Some games will unconditionally use a high index for UAVStartSlot. --- src/d3d11/d3d11_context.cpp | 8 ++++++-- src/d3d11/d3d11_context_state.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 1e5ec512a7c..ae2edcc499f 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -5275,10 +5275,14 @@ namespace dxvk { if (unlikely(NumUAVs || m_state.om.maxUav)) { if (likely(NumUAVs != D3D11_KEEP_UNORDERED_ACCESS_VIEWS)) { - uint32_t newMaxUav = NumUAVs ? UAVStartSlot + NumUAVs : 0; + uint32_t newMinUav = NumUAVs ? UAVStartSlot : D3D11_1_UAV_SLOT_COUNT; + uint32_t newMaxUav = NumUAVs ? UAVStartSlot + NumUAVs : 0u; + + uint32_t oldMinUav = std::exchange(m_state.om.minUav, newMinUav); uint32_t oldMaxUav = std::exchange(m_state.om.maxUav, newMaxUav); - for (uint32_t i = 0; i < std::max(oldMaxUav, newMaxUav); i++) { + for (uint32_t i = std::min(oldMinUav, newMinUav); + i < std::max(oldMaxUav, newMaxUav); i++) { D3D11UnorderedAccessView* uav = nullptr; uint32_t ctr = ~0u; diff --git a/src/d3d11/d3d11_context_state.h b/src/d3d11/d3d11_context_state.h index 7e56258fb40..6886dd99d84 100644 --- a/src/d3d11/d3d11_context_state.h +++ b/src/d3d11/d3d11_context_state.h @@ -199,6 +199,7 @@ namespace dxvk { UINT stencilRef = D3D11_DEFAULT_STENCIL_REFERENCE; UINT maxRtv = 0u; + UINT minUav = 0u; UINT maxUav = 0u; void reset() {