diff --git a/src/d3d9/d3d9_common_buffer.h b/src/d3d9/d3d9_common_buffer.h index 745961980aaa..91dbbc9d945b 100644 --- a/src/d3d9/d3d9_common_buffer.h +++ b/src/d3d9/d3d9_common_buffer.h @@ -211,6 +211,10 @@ namespace dxvk { : DxvkCsThread::SynchronizeAll; } + bool IsSysmemDynamic() const { + return m_desc.Pool == D3DPOOL_SYSTEMMEM && m_desc.Usage == D3DUSAGE_DYNAMIC; + } + private: Rc CreateBuffer() const; diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index e4525eba67ab..e431af6ee3b6 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -2615,6 +2615,60 @@ namespace dxvk { if (unlikely(!PrimitiveCount)) return S_OK; + bool promoteToUPDraw = true; + for (uint32_t i = 0; i < caps::MaxStreams && promoteToUPDraw; i++) { + auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer); + promoteToUPDraw &= vbo == nullptr || vbo->IsSysmemDynamic(); + } + if (unlikely(promoteToUPDraw)) { + // If this draw only uses D3DPOOL_SYSTEMMEM + D3DUSAGE_DYNAMIC buffers, + // we only upload the bits of data that this specific draw actually uses. + // Similar to DrawPrimitiveUp + + PrepareDraw(PrimitiveType, true); + uint32_t vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount); + + for (uint32_t i = 0; i < caps::MaxStreams; i++) { + auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer); + const uint32_t vertexStride = m_state.vertexDecl->GetSize(i); + uint32_t offset = StartVertex * vertexStride; + const uint32_t vertexBufferSize = vbo ? vbo->Desc()->Size : 0; + if (vbo == nullptr || offset >= vertexBufferSize) { + EmitCs([ + cStream = i + ](DxvkContext* ctx) { + ctx->bindVertexBuffer(cStream, DxvkBufferSlice(), 0); + }); + continue; + } + + const uint32_t vertexDataSize = std::min(GetUPBufferSize(vertexCount, vertexStride), vertexBufferSize - offset); + + auto upSlice = AllocUPBuffer(vertexDataSize); + uint8_t* data = reinterpret_cast(upSlice.mapPtr); + uint8_t* src = reinterpret_cast(vbo->GetMappedSlice().mapPtr) + offset; + std::memcpy(data, src, vertexDataSize); + + EmitCs([ + cStream = i, + cBufferSlice = std::move(upSlice.slice), + cStride = vertexStride + ](DxvkContext* ctx) mutable { + ctx->bindVertexBuffer(cStream, std::move(cBufferSlice), cStride); + }); + } + + EmitCs([ + cVertexCount = vertexCount + ](DxvkContext* ctx) { + ctx->draw(cVertexCount, 1, 0, 0); + }); + + m_flags.set(D3D9DeviceFlag::DirtyVertexBuffers); + + return D3D_OK; + } + PrepareDraw(PrimitiveType, false); EmitCs([this, @@ -2652,6 +2706,90 @@ namespace dxvk { if (unlikely(!PrimitiveCount)) return S_OK; + bool promoteToUPDraw = true; + for (uint32_t i = 0; i < caps::MaxStreams && promoteToUPDraw; i++) { + auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer); + promoteToUPDraw &= vbo == nullptr || vbo->IsSysmemDynamic(); + } + D3D9CommonBuffer* ibo = GetCommonBuffer(m_state.indices); + promoteToUPDraw &= ibo->IsSysmemDynamic(); + if (unlikely(promoteToUPDraw)) { + // If this draw only uses D3DPOOL_SYSTEMMEM + D3DUSAGE_DYNAMIC buffers, + // we only upload the bits of data that this specific draw actually uses. + // Similar to DrawPrimitiveUp + + PrepareDraw(PrimitiveType, true); + + for (uint32_t i = 0; i < caps::MaxStreams; i++) { + auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer); + const uint32_t vertexStride = m_state.vertexDecl->GetSize(i); + uint32_t offset = (BaseVertexIndex + MinVertexIndex) * vertexStride; + const uint32_t vertexBufferSize = vbo ? vbo->Desc()->Size : 0; + if (vbo == nullptr || offset >= vertexBufferSize) { + EmitCs([ + cStream = i + ](DxvkContext* ctx) { + ctx->bindVertexBuffer(cStream, DxvkBufferSlice(), 0); + }); + continue; + } + + const uint32_t vertexDataSize = std::min(GetUPBufferSize(NumVertices, vertexStride), vertexBufferSize - offset); + + auto upSlice = AllocUPBuffer(vertexDataSize); + uint8_t* data = reinterpret_cast(upSlice.mapPtr); + uint8_t* src = reinterpret_cast(vbo->GetMappedSlice().mapPtr) + offset; + std::memcpy(data, src, vertexDataSize); + + EmitCs([ + cStream = i, + cBufferSlice = std::move(upSlice.slice), + cStride = vertexStride + ](DxvkContext* ctx) mutable { + ctx->bindVertexBuffer(cStream, std::move(cBufferSlice), cStride); + }); + } + + uint32_t indexStride = ibo && ibo->Desc()->Format == D3D9Format::INDEX16 ? 2 : 4; + VkIndexType indexType = ibo && ibo->Desc()->Format == D3D9Format::INDEX16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; + uint32_t offset = indexStride * StartIndex; + uint32_t indexBufferSize = ibo ? ibo->Desc()->Size - offset : 0; + uint32_t vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount); + if (ibo == nullptr || offset >= indexBufferSize) { + EmitCs([ + cIndexType = indexType + ](DxvkContext* ctx) { + ctx->bindIndexBuffer(DxvkBufferSlice(), cIndexType); + }); + } else { + uint32_t indexDataSize = vertexCount * indexStride; + + auto upSlice = AllocUPBuffer(indexDataSize); + uint8_t* data = reinterpret_cast(upSlice.mapPtr); + uint8_t* src = reinterpret_cast(ibo->GetMappedSlice().mapPtr) + offset; + + std::memcpy(data, src, indexDataSize); + EmitCs([ + cBufferSlice = std::move(upSlice.slice), + cIndexType = indexType + ](DxvkContext* ctx) mutable { + ctx->bindIndexBuffer(std::move(cBufferSlice), cIndexType); + }); + } + + EmitCs([ + cVertexCount = vertexCount, + cInstanceCount = GetInstanceCount() + ](DxvkContext* ctx) { + ctx->drawIndexed(cVertexCount, cInstanceCount, 0, 0, 0); + }); + + m_flags.set(D3D9DeviceFlag::DirtyVertexBuffers); + m_flags.set(D3D9DeviceFlag::DirtyIndexBuffer); + + return D3D_OK; + } + PrepareDraw(PrimitiveType, false); EmitCs([this, @@ -2842,7 +2980,7 @@ namespace dxvk { decl = iter->second.ptr(); } - uint32_t offset = DestIndex * decl->GetSize(); + uint32_t offset = DestIndex * decl->GetSize(0); auto slice = dst->GetBufferSlice(); slice = slice.subSlice(offset, slice.length() - offset); @@ -2889,7 +3027,7 @@ namespace dxvk { } if (dst->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) { - uint32_t copySize = VertexCount * decl->GetSize(); + uint32_t copySize = VertexCount * decl->GetSize(0); EmitCs([ cSrcBuffer = dst->GetBuffer(), @@ -6616,6 +6754,19 @@ namespace dxvk { } BindSpecConstants(); + + if (unlikely(m_flags.test(D3D9DeviceFlag::DirtyVertexBuffers))) { + for (uint32_t i = 0; i < caps::MaxStreams; i++) { + const D3D9VBO& vbo = m_state.vertexBuffers[i]; + BindVertexBuffer(i, vbo.vertexBuffer.ptr(), vbo.offset, vbo.stride); + } + m_flags.clr(D3D9DeviceFlag::DirtyVertexBuffers); + } + + if (unlikely(m_flags.test(D3D9DeviceFlag::DirtyIndexBuffer))) { + BindIndices(); + m_flags.clr(D3D9DeviceFlag::DirtyIndexBuffer); + } } diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index fccba4c717a5..15151e1e38b7 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -66,6 +66,8 @@ namespace dxvk { DirtyInputLayout, DirtyViewportScissor, DirtyMultiSampleState, + DirtyVertexBuffers, + DirtyIndexBuffer, DirtyFogState, DirtyFogColor, @@ -1063,7 +1065,7 @@ namespace dxvk { } inline uint32_t GetUPBufferSize(uint32_t vertexCount, uint32_t stride) { - return (vertexCount - 1) * stride + std::max(m_state.vertexDecl->GetSize(), stride); + return (vertexCount - 1) * stride + std::max(m_state.vertexDecl->GetSize(0), stride); } inline void FillUPVertexBuffer(void* buffer, const void* userData, uint32_t dataSize, uint32_t bufferSize) { diff --git a/src/d3d9/d3d9_swvp_emu.cpp b/src/d3d9/d3d9_swvp_emu.cpp index 210edf01f1f6..213bd2790847 100644 --- a/src/d3d9/d3d9_swvp_emu.cpp +++ b/src/d3d9/d3d9_swvp_emu.cpp @@ -144,7 +144,7 @@ namespace dxvk { uint32_t primitiveId = m_module.opLoad(uint_t, primitiveIdPtr); // The size of any given vertex - uint32_t vertexSize = m_module.constu32(pDecl->GetSize() / sizeof(uint32_t)); + uint32_t vertexSize = m_module.constu32(pDecl->GetSize(0) / sizeof(uint32_t)); //The offset of this vertex from the beginning of the buffer uint32_t thisVertexOffset = m_module.opIMul(uint_t, vertexSize, primitiveId); diff --git a/src/d3d9/d3d9_vertex_declaration.cpp b/src/d3d9/d3d9_vertex_declaration.cpp index 58c43be43ac0..be9cd25e80e7 100644 --- a/src/d3d9/d3d9_vertex_declaration.cpp +++ b/src/d3d9/d3d9_vertex_declaration.cpp @@ -354,8 +354,8 @@ namespace dxvk { void D3D9VertexDecl::Classify() { for (const auto& element : m_elements) { - if (element.Stream == 0 && element.Type != D3DDECLTYPE_UNUSED) - m_size = std::max(m_size, element.Offset + GetDecltypeSize(D3DDECLTYPE(element.Type))); + if (element.Type != D3DDECLTYPE_UNUSED) + m_sizes[element.Stream] = std::max(m_sizes[element.Stream], element.Offset + GetDecltypeSize(D3DDECLTYPE(element.Type))); if (element.Usage == D3DDECLUSAGE_COLOR && element.UsageIndex == 0) m_flags.set(D3D9VertexDeclFlag::HasColor0); diff --git a/src/d3d9/d3d9_vertex_declaration.h b/src/d3d9/d3d9_vertex_declaration.h index 82b182756fdf..404555cc8905 100644 --- a/src/d3d9/d3d9_vertex_declaration.h +++ b/src/d3d9/d3d9_vertex_declaration.h @@ -50,8 +50,8 @@ namespace dxvk { return m_elements; } - UINT GetSize() const { - return m_size; + UINT GetSize(UINT Stream) const { + return m_sizes[Stream]; } bool TestFlag(D3D9VertexDeclFlag flag) const { @@ -94,8 +94,7 @@ namespace dxvk { uint32_t m_texcoordMask = 0; - // The size of Stream 0. That's all we care about. - uint32_t m_size = 0; + std::array m_sizes = {}; };