diff --git a/src/core/g_palSettings.cpp b/src/core/g_palSettings.cpp index d7668139f..82a8675e1 100644 --- a/src/core/g_palSettings.cpp +++ b/src/core/g_palSettings.cpp @@ -162,6 +162,7 @@ void SettingsLoader::SetupDefaults() m_settings.overlayReportMes = true; m_settings.mipGenUseFastPath = false; m_settings.useFp16GenMips = false; + m_settings.maxMappedPoolsSize = 0; m_settings.tmzEnabled = true; #if PAL_DEVELOPER_BUILD m_settings.dbgHelperBits = 0x0; @@ -610,6 +611,11 @@ void SettingsLoader::ReadSettings() &m_settings.useFp16GenMips, InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pmaxMappedPoolsSizeStr, + Util::ValueType::Uint64, + &m_settings.maxMappedPoolsSize, + InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pTmzEnabledStr, Util::ValueType::Boolean, &m_settings.tmzEnabled, @@ -655,6 +661,11 @@ void SettingsLoader::RereadSettings() &m_settings.useFp16GenMips, InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pmaxMappedPoolsSizeStr, + Util::ValueType::Uint64, + &m_settings.maxMappedPoolsSize, + InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pUseDccStr, Util::ValueType::Uint, &m_settings.useDcc, @@ -1100,6 +1111,11 @@ void SettingsLoader::InitSettingsInfo() info.valueSize = sizeof(m_settings.useFp16GenMips); m_settingsInfoMap.Insert(192229910, info); + info.type = SettingType::Uint64; + info.pValuePtr = &m_settings.maxMappedPoolsSize; + info.valueSize = sizeof(m_settings.maxMappedPoolsSize); + m_settingsInfoMap.Insert(3814409436, info); + info.type = SettingType::Boolean; info.pValuePtr = &m_settings.tmzEnabled; info.valueSize = sizeof(m_settings.tmzEnabled); diff --git a/src/core/g_palSettings.h b/src/core/g_palSettings.h index 021077102..c2b208488 100644 --- a/src/core/g_palSettings.h +++ b/src/core/g_palSettings.h @@ -279,6 +279,7 @@ struct PalSettings : public Pal::DriverSettings bool overlayReportMes; bool mipGenUseFastPath; bool useFp16GenMips; + gpusize maxMappedPoolsSize; bool tmzEnabled; #if PAL_DEVELOPER_BUILD uint64 dbgHelperBits; @@ -378,6 +379,7 @@ static const char* pDebugForceResourceAdditionalPaddingStr = "#3601080919"; static const char* pOverlayReportMesStr = "#1685803860"; static const char* pMipGenUseFastPathStr = "#3353227045"; static const char* pUseFp16GenMipsStr = "#192229910"; +static const char* pmaxMappedPoolsSizeStr = "#3814409436"; static const char* pTmzEnabledStr = "#2606194033"; #if PAL_DEVELOPER_BUILD static const char* pDbgHelperBitsStr = "#3894710420"; diff --git a/src/core/hw/gfxip/pipeline.cpp b/src/core/hw/gfxip/pipeline.cpp index cb6a49620..142d890f3 100644 --- a/src/core/hw/gfxip/pipeline.cpp +++ b/src/core/hw/gfxip/pipeline.cpp @@ -881,7 +881,7 @@ Result PipelineUploader::UploadUsingCpu( const SectionAddressCalculator& addressCalc, void** ppMappedPtr) { - Result result = m_pGpuMemory->Map(&m_pMappedPtr); + Result result = m_pDevice->MemMgr()->Map(m_pGpuMemory, &m_pMappedPtr); if (result == Result::Success) { m_pMappedPtr = VoidPtrInc(m_pMappedPtr, static_cast(m_baseOffset)); @@ -1104,7 +1104,7 @@ Result PipelineUploader::End( else { PAL_ASSERT(m_pMappedPtr != nullptr); - result = m_pGpuMemory->Unmap(); + m_pDevice->MemMgr()->Unmap(m_pGpuMemory); } m_pMappedPtr = nullptr; diff --git a/src/core/internalMemMgr.cpp b/src/core/internalMemMgr.cpp index d39ebdeef..0919eab6d 100644 --- a/src/core/internalMemMgr.cpp +++ b/src/core/internalMemMgr.cpp @@ -129,6 +129,7 @@ InternalMemMgr::InternalMemMgr( : m_pDevice(pDevice), m_poolList(pDevice->GetPlatform()), + m_unusedMappedPoolList(pDevice->GetPlatform()), m_references(pDevice->GetPlatform()), m_referenceWatermark(0) { @@ -138,6 +139,24 @@ InternalMemMgr::InternalMemMgr( // Explicitly frees all GPU memory allocations. void InternalMemMgr::FreeAllocations() { + + for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next()) + { + PAL_ASSERT((it.Get() != nullptr) && (it.Get()->pBuddyAllocator != nullptr)); + + if ((it.Get()->pData != nullptr) && (it.Get()->pGpuMemory != nullptr)) + { + it.Get()->pGpuMemory->Unmap(); + it.Get()->pData = nullptr; + } + } + + while (m_unusedMappedPoolList.NumElements() > 0) + { + auto it = m_unusedMappedPoolList.Begin(); + m_unusedMappedPoolList.Erase(&it); + } + // Delete the GPU memory objects using the references list while (m_references.NumElements() != 0) { @@ -563,4 +582,129 @@ uint32 InternalMemMgr::GetReferencesCount() return static_cast(m_references.NumElements()); } +// ===================================================================================================================== +// Map the GPU memory allocation for cpu access +Result InternalMemMgr::Map( + GpuMemory* pGpuMemory, + void** ppData) +{ + PAL_ASSERT(pGpuMemory != nullptr); + Result result = Result::ErrorInvalidValue; + if (pGpuMemory->WasBuddyAllocated()) + { + Util::MutexAuto allocatorLock(&m_allocatorLock); // Ensure thread-safety using the lock + // Try to find the allocation in the pool list + for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next()) + { + GpuMemoryPool* pPool = it.Get(); + + PAL_ASSERT((pPool->pGpuMemory != nullptr) && (pPool->pBuddyAllocator != nullptr)); + + if (pPool->pGpuMemory == pGpuMemory) + { + if (pPool->pData == nullptr) + { + result = pPool->pGpuMemory->Map(&pPool->pData); + if (result != Result::Success) + { + pPool->pData = nullptr; + break; + } + m_totalSizeMappedPools += pPool->pGpuMemory->Desc().size; + CheckMappedPoolLimit(); + } + else if (pPool->refCount == 0) + { + // should be in unused list, remove it from there. + for (auto it2 = m_unusedMappedPoolList.Begin(); it2.Get() != nullptr; it2.Next()) + { + if (*(it2.Get()) == pPool) + { + m_unusedMappedPoolList.Erase(&it2); + break; + } + } + } + pPool->refCount++; + *ppData = pPool->pData; + result = Result::Success; + break; + } + } + + // If we didn't find the allocation in the pool list then something went wrong with the allocation scheme + PAL_ASSERT(result == Result::Success); + } + else + { + result = pGpuMemory->Map(ppData); + } + + return result; +} + +// ===================================================================================================================== +// Unmap the GPU memory allocation from cpu address space +Result InternalMemMgr::Unmap( + GpuMemory* pGpuMemory) +{ + PAL_ASSERT(pGpuMemory != nullptr); + if (pGpuMemory->WasBuddyAllocated()) + { + Util::MutexAuto allocatorLock(&m_allocatorLock); // Ensure thread-safety using the lock + // Try to find the allocation in the pool list + for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next()) + { + GpuMemoryPool* pPool = it.Get(); + + PAL_ASSERT((pPool->pGpuMemory != nullptr) && (pPool->pBuddyAllocator != nullptr)); + if (pPool->pGpuMemory == pGpuMemory) + { + if (pPool->pData != nullptr) + { + pPool->refCount--; + if (pPool->refCount == 0) + { + m_unusedMappedPoolList.PushBack(pPool); + CheckMappedPoolLimit(); + } + } + break; + } + } + } + else + { + pGpuMemory->Unmap(); + } + + return Result::Success; +} + +// ===================================================================================================================== +// Check the total size of mapped pools, if it is greater than maximum limit then unmap the least recently used memory +void InternalMemMgr::CheckMappedPoolLimit() +{ + if (m_pDevice->Settings().maxMappedPoolsSize >= 0) + { + while ((m_totalSizeMappedPools > m_pDevice->Settings().maxMappedPoolsSize) + && (m_unusedMappedPoolList.NumElements() > 0)) + { + auto it = m_unusedMappedPoolList.Begin(); + GpuMemoryPool *pPool = *it.Get(); + + PAL_ASSERT(pPool->pBuddyAllocator != nullptr); + if ((pPool->pData != nullptr) && (pPool->pGpuMemory != nullptr)) + { + pPool->pGpuMemory->Unmap(); + pPool->pData = nullptr; + } + m_unusedMappedPoolList.Erase(&it); + PAL_ASSERT(m_totalSizeMappedPools >= pPool->pGpuMemory->Desc().size); + m_totalSizeMappedPools -= pPool->pGpuMemory->Desc().size; + } + } +} + + } // Pal diff --git a/src/core/internalMemMgr.h b/src/core/internalMemMgr.h index 2c9541d8f..aa27be1e2 100644 --- a/src/core/internalMemMgr.h +++ b/src/core/internalMemMgr.h @@ -56,6 +56,8 @@ struct GpuMemoryPool uint64 pagingFenceVal; // Paging fence value Util::BuddyAllocator* pBuddyAllocator; // Buddy allocator used for the suballocation + void* pData; // address of the already existing mapping + size_t refCount; // refCount the number of memory allocations use this mapping }; // ===================================================================================================================== @@ -77,6 +79,7 @@ class InternalMemMgr typedef Util::ListIterator GpuMemoryListIterator; typedef Util::List GpuMemoryPoolList; + typedef Util::List GpuMemoryPoolRefList; explicit InternalMemMgr(Device* pDevice); ~InternalMemMgr() { FreeAllocations(); } @@ -115,6 +118,17 @@ class InternalMemMgr // Number of all allocations in the reference list. Note that this function takes the reference list lock. uint32 GetReferencesCount(); + Result Map( + GpuMemory* pGpuMemory, + void** ppData); + + Result Unmap( + GpuMemory* pGpuMemory); + + // If the number of mapped pools are more then the maximum limit then unmap the least recently used pool. + void CheckMappedPoolLimit(); + + private: Result AllocateBaseGpuMem( const GpuMemoryCreateInfo& createInfo, @@ -133,6 +147,9 @@ class InternalMemMgr // Maintain a list of GPU memory objects that are sub-allocated GpuMemoryPoolList m_poolList; + // Maintain a list of GPU memory objects that are sub-allocated and mapped but unused + GpuMemoryPoolRefList m_unusedMappedPoolList; + // Maintain a list of internal GPU memory references GpuMemoryList m_references; @@ -142,6 +159,9 @@ class InternalMemMgr // Ever-incrementing watermark to signal changes to the internal memory reference list uint32 m_referenceWatermark; + // Total size of mapped pools + gpusize m_totalSizeMappedPools; + PAL_DISALLOW_COPY_AND_ASSIGN(InternalMemMgr); PAL_DISALLOW_DEFAULT_CTOR(InternalMemMgr); }; diff --git a/src/core/settings_core.json b/src/core/settings_core.json index 14242a5bd..2abc96079 100644 --- a/src/core/settings_core.json +++ b/src/core/settings_core.json @@ -1881,6 +1881,23 @@ "VariableName": "useFp16GenMips", "Description": "If mipGenUseFastPath == true and this is true - use the fp16 single-pass GenMips compute pass." }, + { + "Name": "maxMappedPoolsSize", + "Tags": [ + "Resource Settings", + "Performance" + ], + "Defaults": { + "Default": 0 + }, + "Flags": { + "RereadSetting": true + }, + "Scope": "PrivatePalKey", + "Type": "gpusize", + "VariableName": "maxMappedPoolsSize", + "Description": "If maxMappedPoolsSize > 0 the mapped gpu memory for pipeline creation will not be unmapped. If the total size of mapped pools grows greater than maxMappedPoolsSize, then the least recently used pools will be unmapped." + }, { "Name": "TmzEnabled", "Tags": [ @@ -2019,4 +2036,4 @@ "Description": "Maximum string length for a miscellaneous string setting" } ] -} \ No newline at end of file +}