From be76735a73d94e6cbd72c4af13aacbd5a33ccb09 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sat, 15 Feb 2025 22:16:03 +0100 Subject: [PATCH] [dxvk] Track atomic stores in barrier tracker This elides barriers between draws or dispatches if we can prove order-invariance through atomic operations. --- src/dxvk/dxvk_barrier.cpp | 47 ++++++++++++++++++++++++++++++++------- src/dxvk/dxvk_barrier.h | 24 ++++++++++++++++---- src/dxvk/dxvk_context.cpp | 18 +++++++-------- 3 files changed, 68 insertions(+), 21 deletions(-) diff --git a/src/dxvk/dxvk_barrier.cpp b/src/dxvk/dxvk_barrier.cpp index 287a0ff2fcc4..36b2f0a20737 100644 --- a/src/dxvk/dxvk_barrier.cpp +++ b/src/dxvk/dxvk_barrier.cpp @@ -20,20 +20,36 @@ namespace dxvk { bool DxvkBarrierTracker::findRange( const DxvkAddressRange& range, - DxvkAccess accessType) const { + DxvkAccess accessType, + DxvkAtomicStore storeOp) const { uint32_t rootIndex = computeRootIndex(range, accessType); - return findNode(range, rootIndex); + uint32_t nodeIndex = findNode(range, rootIndex); + + if (!nodeIndex || !storeOp) + return nodeIndex; + + // If we are checking for a specific order-invariant store + // op, report a hazard if either the found range does not + // cover the entire range to look up, or if the store op + // does not match previous stores. + auto& node = m_nodes[nodeIndex]; + + return node.payload.storeOp != storeOp + || !node.addressRange.contains(range); } void DxvkBarrierTracker::insertRange( const DxvkAddressRange& range, - DxvkAccess accessType) { - uint32_t rootIndex = computeRootIndex(range, accessType); + DxvkAccess accessType, + DxvkAtomicStore storeOp) { + DxvkBarrierPayload payload = { }; + payload.storeOp = storeOp; // If we can just insert the node with no conflicts, // we don't have to do anything. - uint32_t nodeIndex = insertNode(range, rootIndex); + uint32_t rootIndex = computeRootIndex(range, accessType); + uint32_t nodeIndex = insertNode(range, rootIndex, payload); if (likely(!nodeIndex)) return; @@ -42,6 +58,11 @@ namespace dxvk { // range we want to add already, also don't do anything. auto& node = m_nodes[nodeIndex]; + // If we merge ranges and the store ops don't match, clear any + // store op info so that accesses will have to be ordered. + if (node.payload.storeOp != payload.storeOp) + node.payload.storeOp = DxvkAtomicStore::None; + if (node.addressRange.contains(range)) return; @@ -82,12 +103,15 @@ namespace dxvk { mergedRange.rangeStart = std::min(mergedRange.rangeStart, node.addressRange.rangeStart); mergedRange.rangeEnd = std::max(mergedRange.rangeEnd, node.addressRange.rangeEnd); + if (node.payload.storeOp != payload.storeOp) + payload.storeOp = DxvkAtomicStore::None; + removeNode(nodeIndex, rootIndex); nodeIndex = findNode(range, rootIndex); } - insertNode(mergedRange, rootIndex); + insertNode(mergedRange, rootIndex, payload); } @@ -166,7 +190,8 @@ namespace dxvk { uint32_t DxvkBarrierTracker::insertNode( const DxvkAddressRange& range, - uint32_t rootIndex) { + uint32_t rootIndex, + DxvkBarrierPayload payload) { // Check if the given root is valid at all uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u); @@ -178,6 +203,7 @@ namespace dxvk { auto& node = m_nodes[rootIndex]; node.header = 0; node.addressRange = range; + node.payload = payload; return 0; } else { // Traverse tree and abort if we find any range @@ -209,6 +235,7 @@ namespace dxvk { node.setRed(true); node.setParent(parentIndex); node.addressRange = range; + node.payload = payload; // Only do the fixup to maintain red-black properties if // we haven't marked the root node as red in a deletion. @@ -238,6 +265,7 @@ namespace dxvk { childIndex = m_nodes[childIndex].child(0); node.addressRange = m_nodes[childIndex].addressRange; + node.payload = m_nodes[childIndex].payload; removeNode(childIndex, rootIndex); } else { // Deletion is expected to be exceptionally rare, to the point of @@ -268,6 +296,7 @@ namespace dxvk { node.setRed(child.isRed()); node.addressRange = child.addressRange; + node.payload = child.payload; if (cl) m_nodes[cl].setParent(nodeIndex); if (cr) m_nodes[cr].setParent(nodeIndex); @@ -378,6 +407,7 @@ namespace dxvk { node.setChild(1, rr); std::swap(node.addressRange, m_nodes[r].addressRange); + std::swap(node.payload, m_nodes[r].payload); } @@ -406,6 +436,7 @@ namespace dxvk { node.setChild(1, l); std::swap(node.addressRange, m_nodes[l].addressRange); + std::swap(node.payload, m_nodes[l].payload); } @@ -498,4 +529,4 @@ namespace dxvk { flush(list); } -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_barrier.h b/src/dxvk/dxvk_barrier.h index 3b4cdf9c598f..a75a33c50b81 100644 --- a/src/dxvk/dxvk_barrier.h +++ b/src/dxvk/dxvk_barrier.h @@ -42,6 +42,14 @@ namespace dxvk { }; + /** + * \brief Barrier nodepayload + */ + struct DxvkBarrierPayload { + DxvkAtomicStore storeOp = DxvkAtomicStore::None; + }; + + /** * \brief Barrier tree node * @@ -62,6 +70,9 @@ namespace dxvk { // Address range of the node DxvkAddressRange addressRange = { }; + // Node payload + DxvkBarrierPayload payload = { }; + void setRed(bool red) { header &= ~uint64_t(1u); header |= uint64_t(red); @@ -117,21 +128,25 @@ namespace dxvk { * * \param [in] range Resource range * \param [in] accessType Access type + * \param [in] storeOp Invariant store op * \returns \c true if the range has a pending access */ bool findRange( const DxvkAddressRange& range, - DxvkAccess accessType) const; + DxvkAccess accessType, + DxvkAtomicStore storeOp) const; /** * \brief Inserts address range for a given access type * * \param [in] range Resource range * \param [in] accessType Access type + * \param [in] storeOp Invariant store op */ void insertRange( const DxvkAddressRange& range, - DxvkAccess accessType); + DxvkAccess accessType, + DxvkAtomicStore storeOp); /** * \brief Clears the entire structure @@ -166,7 +181,8 @@ namespace dxvk { uint32_t insertNode( const DxvkAddressRange& range, - uint32_t rootIndex); + uint32_t rootIndex, + DxvkBarrierPayload payload); void removeNode( uint32_t nodeIndex, @@ -285,4 +301,4 @@ namespace dxvk { }; -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index e448d639dd69..ee6c269268b5 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -7723,9 +7723,9 @@ namespace dxvk { + (subresources.baseArrayLayer + subresources.layerCount - 1u); if (hasWrite) - m_barrierTracker.insertRange(range, DxvkAccess::Write); + m_barrierTracker.insertRange(range, DxvkAccess::Write, storeOp); if (hasRead) - m_barrierTracker.insertRange(range, DxvkAccess::Read); + m_barrierTracker.insertRange(range, DxvkAccess::Read, storeOp); } else { DxvkAddressRange range; range.resource = image.getResourceId(); @@ -7735,9 +7735,9 @@ namespace dxvk { range.rangeEnd = range.rangeStart + subresources.layerCount - 1u; if (hasWrite) - m_barrierTracker.insertRange(range, DxvkAccess::Write); + m_barrierTracker.insertRange(range, DxvkAccess::Write, storeOp); if (hasRead) - m_barrierTracker.insertRange(range, DxvkAccess::Read); + m_barrierTracker.insertRange(range, DxvkAccess::Read, storeOp); } } } @@ -7790,9 +7790,9 @@ namespace dxvk { range.rangeEnd = offset + size - 1; if (srcAccess & vk::AccessWriteMask) - m_barrierTracker.insertRange(range, DxvkAccess::Write); + m_barrierTracker.insertRange(range, DxvkAccess::Write, storeOp); if (srcAccess & vk::AccessReadMask) - m_barrierTracker.insertRange(range, DxvkAccess::Read); + m_barrierTracker.insertRange(range, DxvkAccess::Read, storeOp); } } @@ -7954,7 +7954,7 @@ namespace dxvk { range.rangeStart = offset; range.rangeEnd = offset + size - 1; - return m_barrierTracker.findRange(range, access); + return m_barrierTracker.findRange(range, access, storeOp); } @@ -7988,7 +7988,7 @@ namespace dxvk { // Probe all subresources first, only check individual mip levels // if there are overlaps and if we are checking a subset of array // layers of multiple mips. - bool dirty = m_barrierTracker.findRange(range, access); + bool dirty = m_barrierTracker.findRange(range, access, storeOp); if (!dirty || subresources.levelCount == 1u || subresources.layerCount == layerCount) return dirty; @@ -7997,7 +7997,7 @@ namespace dxvk { range.rangeStart = i * layerCount + subresources.baseArrayLayer; range.rangeEnd = range.rangeStart + subresources.layerCount - 1u; - dirty = m_barrierTracker.findRange(range, access); + dirty = m_barrierTracker.findRange(range, access, storeOp); } return dirty;