From 73236efe5887ea6644572fd651d930c8b408ea9f Mon Sep 17 00:00:00 2001 From: kd-11 <15904127+kd-11@users.noreply.github.com> Date: Thu, 12 Dec 2019 16:29:55 +0300 Subject: [PATCH 01/21] vk: Remove some outdated code (#7060) --- rpcs3/Emu/RSX/VK/VKHelpers.h | 69 ++++-------------------------------- 1 file changed, 7 insertions(+), 62 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index a688041dc53c..dc0def2eadcb 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -1735,58 +1735,9 @@ namespace vk VkDevice m_device; }; - class swapchain_image_WSI + struct swapchain_image_WSI { - VkImageView view = nullptr; - VkImage image = nullptr; - VkFormat internal_format; - vk::render_device *owner = nullptr; - - public: - swapchain_image_WSI() = default; - - void create(vk::render_device &dev, VkImage &swap_image, VkFormat format) - { - VkImageViewCreateInfo color_image_view = {}; - - color_image_view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - color_image_view.format = format; - - color_image_view.components.r = VK_COMPONENT_SWIZZLE_R; - color_image_view.components.g = VK_COMPONENT_SWIZZLE_G; - color_image_view.components.b = VK_COMPONENT_SWIZZLE_B; - color_image_view.components.a = VK_COMPONENT_SWIZZLE_A; - - color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - color_image_view.subresourceRange.baseMipLevel = 0; - color_image_view.subresourceRange.levelCount = 1; - color_image_view.subresourceRange.baseArrayLayer = 0; - color_image_view.subresourceRange.layerCount = 1; - - color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D; - - color_image_view.image = swap_image; - vkCreateImageView(dev, &color_image_view, nullptr, &view); - - image = swap_image; - internal_format = format; - owner = &dev; - } - - void discard(vk::render_device &dev) - { - vkDestroyImageView(dev, view, nullptr); - } - - operator VkImage&() - { - return image; - } - - operator VkImageView&() - { - return view; - } + VkImage value = VK_NULL_HANDLE; }; class swapchain_image_RPCS3 : public image @@ -1878,7 +1829,7 @@ namespace vk virtual bool init() = 0; virtual u32 get_swap_image_count() const = 0; - virtual VkImage& get_image(u32 index) = 0; + virtual VkImage get_image(u32 index) = 0; virtual VkResult acquire_next_swapchain_image(VkSemaphore semaphore, u64 timeout, u32* result) = 0; virtual void end_frame(command_buffer& cmd, u32 index) = 0; virtual VkResult present(VkSemaphore semaphore, u32 index) = 0; @@ -2210,7 +2161,7 @@ namespace vk swapchain_images[index].second->do_dma_transfer(cmd); } - VkImage& get_image(u32 index) override + VkImage get_image(u32 index) override { return swapchain_images[index].second->value; } @@ -2261,7 +2212,7 @@ namespace vk swapchain_images.resize(nb_swap_images); for (u32 i = 0; i < nb_swap_images; ++i) { - swapchain_images[i].create(dev, vk_images[i], m_surface_format); + swapchain_images[i].value = vk_images[i]; } } @@ -2309,9 +2260,6 @@ namespace vk { if (m_vk_swapchain) { - for (auto &img : swapchain_images) - img.discard(dev); - destroySwapchainKHR(pdev, m_vk_swapchain, nullptr); } @@ -2444,9 +2392,6 @@ namespace vk { if (!swapchain_images.empty()) { - for (auto &img : swapchain_images) - img.discard(dev); - swapchain_images.clear(); } @@ -2485,9 +2430,9 @@ namespace vk return queuePresentKHR(vk_present_queue, &present); } - VkImage& get_image(u32 index) override + VkImage get_image(u32 index) override { - return static_cast(swapchain_images[index]); + return swapchain_images[index].value; } VkImageLayout get_optimal_present_layout() override From 69e81da57b06e7d80ad3526c7b184972e08cab62 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Tue, 10 Dec 2019 19:17:03 +0300 Subject: [PATCH 02/21] Fix some warnings --- Utilities/Thread.cpp | 7 ++++--- rpcs3/Emu/Cell/Modules/cellSysutil.cpp | 8 ++++++-- rpcs3/cmake_modules/ConfigureCompiler.cmake | 2 -- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index da5f79e36eb1..0ec3aa70c539 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -1323,8 +1323,8 @@ bool handle_access_violation(u32 addr, bool is_writing, x64_context* context) { const auto& spu = static_cast(*cpu); - const u64 type = spu.offset < RAW_SPU_BASE_ADDR ? - SYS_MEMORY_PAGE_FAULT_TYPE_SPU_THREAD : + const u64 type = spu.offset < RAW_SPU_BASE_ADDR ? + SYS_MEMORY_PAGE_FAULT_TYPE_SPU_THREAD : SYS_MEMORY_PAGE_FAULT_TYPE_RAW_SPU; data2 = (type << 32) | spu.lv2_id; @@ -1807,7 +1807,8 @@ void thread_ctrl::_wait_for(u64 usec, bool alert /* true */) timeout.it_interval.tv_sec = 0; timeout.it_interval.tv_nsec = 0; timerfd_settime(_this->m_timer, 0, &timeout, NULL); - read(_this->m_timer, &missed, sizeof(missed)); + if (read(_this->m_timer, &missed, sizeof(missed)) != sizeof(missed)) + LOG_ERROR(GENERAL, "timerfd: read() failed"); return; } #endif diff --git a/rpcs3/Emu/Cell/Modules/cellSysutil.cpp b/rpcs3/Emu/Cell/Modules/cellSysutil.cpp index 18da8df6f0c2..d78b3cdc7786 100644 --- a/rpcs3/Emu/Cell/Modules/cellSysutil.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSysutil.cpp @@ -129,6 +129,10 @@ void fmt_class_string::format(std::string& out, u64 arg) case CELL_SYSUTIL_SYSTEMPARAM_ID_MAGNETOMETER: return "ID_MAGNETOMETER"; case CELL_SYSUTIL_SYSTEMPARAM_ID_NICKNAME: return "ID_NICKNAME"; case CELL_SYSUTIL_SYSTEMPARAM_ID_CURRENT_USERNAME: return "ID_CURRENT_USERNAME"; + case CELL_SYSUTIL_SYSTEMPARAM_ID_x1008: return "ID_x1008"; + case CELL_SYSUTIL_SYSTEMPARAM_ID_x1011: return "ID_x1011"; + case CELL_SYSUTIL_SYSTEMPARAM_ID_x1012: return "ID_x1012"; + case CELL_SYSUTIL_SYSTEMPARAM_ID_x1024: return "ID_x1024"; } return unknown; @@ -143,7 +147,7 @@ s32 sysutil_check_name_string(const char* src, s32 minlen, s32 maxlen) if (g_ps3_process_info.sdk_ver > 0x36FFFF) { // Limit null terminator boundary to before buffer max size - lastpos = std::max(maxlen - 1, 0); + lastpos = std::max(maxlen - 1, 0); } else { @@ -307,7 +311,7 @@ error_code cellSysutilGetSystemParamString(CellSysutilParamId id, vm::ptr case CELL_SYSUTIL_SYSTEMPARAM_ID_CURRENT_USERNAME: { const fs::file username(vfs::get(fmt::format("/dev_hdd0/home/%08u/localusername", Emu.GetUsrId()))); - + if (!username) { cellSysutil.error("cellSysutilGetSystemParamString(): Username for user %08u doesn't exist. Did you delete the username file?", Emu.GetUsrId()); diff --git a/rpcs3/cmake_modules/ConfigureCompiler.cmake b/rpcs3/cmake_modules/ConfigureCompiler.cmake index e17580d787ef..1cfd3682f0be 100644 --- a/rpcs3/cmake_modules/ConfigureCompiler.cmake +++ b/rpcs3/cmake_modules/ConfigureCompiler.cmake @@ -39,8 +39,6 @@ else() add_compile_options(-Wno-attributes) add_compile_options(-Wno-enum-compare) add_compile_options(-Wno-comment) - add_compile_options(-Wno-overloaded-virtual) - add_compile_options(-Wno-missing-braces) add_compile_options(-Wno-sign-compare) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") From dbece4d00fc8b6d815f33e596f0bd81bf35e4937 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Fri, 13 Dec 2019 02:11:21 +0300 Subject: [PATCH 03/21] LLVM: add workaround for cascadelake CPU Need to investigate it later. --- Utilities/JIT.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index e8ebed49cb2e..1f7f9c19be27 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -982,6 +982,10 @@ std::string jit_compiler::cpu(const std::string& _cpu) { m_cpu = "skylake"; } + else + { + m_cpu = "skylake-avx512"; + } } if (m_cpu == "znver1" && utils::has_clwb()) From 321f7e71977f9f7d32766e159a67debe7ebc2c36 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Fri, 13 Dec 2019 03:21:43 +0300 Subject: [PATCH 04/21] Fix missing-braces warnings --- rpcs3/Emu/Cell/Modules/cellNetCtl.cpp | 6 +++--- rpcs3/Emu/Cell/Modules/cellSpurs.cpp | 8 ++++---- rpcs3/Emu/Cell/Modules/cellSync.cpp | 5 ++--- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 2 +- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp b/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp index dcf88f79b0fa..5b13477abb51 100644 --- a/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp +++ b/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp @@ -415,7 +415,7 @@ error_code cellGameUpdateCheckStartAsyncEx(vm::cptr param, cellNetCtl.todo("cellGameUpdateCheckStartAsyncEx(param=*0x%x, cb_func=*0x%x, userdata=*0x%x)", param, cb_func, userdata); sysutil_register_cb([=](ppu_thread& ppu) -> s32 { - cb_func(ppu, vm::make_var(CellGameUpdateResult{ CELL_GAMEUPDATE_RESULT_STATUS_NO_UPDATE, CELL_OK, 0x0, 0x0}), userdata); + cb_func(ppu, vm::make_var(CellGameUpdateResult{CELL_GAMEUPDATE_RESULT_STATUS_NO_UPDATE, CELL_OK}), userdata); return CELL_OK; }); return CELL_OK; @@ -427,7 +427,7 @@ error_code cellGameUpdateCheckFinishAsyncEx(vm::ptr cb const s32 PROCESSING_COMPLETE = 5; sysutil_register_cb([=](ppu_thread& ppu) -> s32 { - cb_func(ppu, vm::make_var(CellGameUpdateResult{ CELL_GAMEUPDATE_RESULT_STATUS_FINISHED, CELL_OK, 0x0, 0x0}), userdata); + cb_func(ppu, vm::make_var(CellGameUpdateResult{CELL_GAMEUPDATE_RESULT_STATUS_FINISHED, CELL_OK}), userdata); return CELL_OK; }); return CELL_OK; @@ -438,7 +438,7 @@ error_code cellGameUpdateCheckStartWithoutDialogAsyncEx(vm::ptr s32 { - cb_func(ppu, vm::make_var(CellGameUpdateResult{ CELL_GAMEUPDATE_RESULT_STATUS_NO_UPDATE, CELL_OK, 0x0, 0x0}), userdata); + cb_func(ppu, vm::make_var(CellGameUpdateResult{CELL_GAMEUPDATE_RESULT_STATUS_NO_UPDATE, CELL_OK}), userdata); return CELL_OK; }); return CELL_OK; diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index e57e879a1a27..012f9829d9de 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -765,7 +765,7 @@ void _spurs::event_helper_entry(ppu_thread& ppu, vm::ptr spurs) s32 _spurs::create_event_helper(ppu_thread& ppu, vm::ptr spurs, u32 ppuPriority) { - if (s32 rc = _spurs::create_lv2_eq(ppu, spurs, spurs.ptr(&CellSpurs::eventQueue), spurs.ptr(&CellSpurs::spuPort), 0x2A, sys_event_queue_attribute_t{ SYS_SYNC_PRIORITY, SYS_PPU_QUEUE, "_spuPrv\0"_u64 })) + if (s32 rc = _spurs::create_lv2_eq(ppu, spurs, spurs.ptr(&CellSpurs::eventQueue), spurs.ptr(&CellSpurs::spuPort), 0x2A, sys_event_queue_attribute_t{SYS_SYNC_PRIORITY, SYS_PPU_QUEUE, {"_spuPrv\0"_u64}})) { return rc; } @@ -1142,14 +1142,14 @@ s32 _spurs::initialize(ppu_thread& ppu, vm::ptr spurs, u32 revision, const auto lwCond = spurs.ptr(&CellSpurs::cond); // Create a mutex to protect access to SPURS handler thread data - if (s32 rc = sys_lwmutex_create(ppu, lwMutex, vm::make_var(sys_lwmutex_attribute_t{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, "_spuPrv\0"_u64 }))) + if (s32 rc = sys_lwmutex_create(ppu, lwMutex, vm::make_var(sys_lwmutex_attribute_t{SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, {"_spuPrv\0"_u64}}))) { _spurs::finalize_spu(ppu, spurs); return rollback(), rc; } // Create condition variable to signal the SPURS handler thread - if (s32 rc = sys_lwcond_create(ppu, lwCond, lwMutex, vm::make_var(sys_lwcond_attribute_t{ "_spuPrv\0"_u64 }))) + if (s32 rc = sys_lwcond_create(ppu, lwCond, lwMutex, vm::make_var(sys_lwcond_attribute_t{"_spuPrv\0"_u64}))) { sys_lwmutex_destroy(ppu, lwMutex); _spurs::finalize_spu(ppu, spurs); @@ -3046,7 +3046,7 @@ s32 cellSpursEventFlagAttachLv2EventQueue(ppu_thread& ppu, vm::ptr queue, vm::ptr buf } // clear sync var, write size, depth, buffer addr and sync - queue->ctrl.store({ 0, 0 }); + queue->ctrl.store({}); queue->size = size; queue->depth = depth; queue->buffer = buffer; @@ -722,8 +722,7 @@ error_code cellSyncQueueClear(ppu_thread& ppu, vm::ptr queue) } } - queue->ctrl.exchange({ 0, 0 }); - + queue->ctrl.store({}); return CELL_OK; } diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 6f14697c6ce2..adb6eb22540a 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -208,7 +208,7 @@ namespace vk VkImageSubresourceRange range{ surface->aspect(), 0, 1, 0, 1 }; if (surface->aspect() & VK_IMAGE_ASPECT_COLOR_BIT) { - VkClearColorValue color = { 0.f, 0.f, 0.f, 1.f }; + VkClearColorValue color = {{0.f, 0.f, 0.f, 1.f}}; vkCmdClearColorImage(cmd, surface->value, surface->current_layout, &color, 1, &range); } else From e3e97da7bb6c4b58dc89da78223efeb208efb76f Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Fri, 13 Dec 2019 16:19:08 +0300 Subject: [PATCH 05/21] LLVM: remove cascadelake workaround and update LLVM fork VPTERNLOG fix may be necessary. --- Utilities/JIT.cpp | 4 ---- llvm | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index 1f7f9c19be27..e8ebed49cb2e 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -982,10 +982,6 @@ std::string jit_compiler::cpu(const std::string& _cpu) { m_cpu = "skylake"; } - else - { - m_cpu = "skylake-avx512"; - } } if (m_cpu == "znver1" && utils::has_clwb()) diff --git a/llvm b/llvm index 9836c299733e..2e038bff1082 160000 --- a/llvm +++ b/llvm @@ -1 +1 @@ -Subproject commit 9836c299733ee5ef14760cd5bdae27e8233d2393 +Subproject commit 2e038bff1082175b510a2e8336edf897af9b87a3 From 364ba95d261616e16a73e21e2f4d03af7da23297 Mon Sep 17 00:00:00 2001 From: Jayveer Date: Wed, 4 Dec 2019 19:55:40 +0000 Subject: [PATCH 06/21] Updated sys_net to use WSAPoll on win32 as this fixes connection issues in Metal Gear Online. Also aligns with implementations on non win32 platforms --- rpcs3/Emu/Cell/Modules/cellNetCtl.cpp | 8 ++- rpcs3/Emu/Cell/lv2/sys_net.cpp | 89 +++------------------------ 2 files changed, 16 insertions(+), 81 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp b/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp index 5b13477abb51..a3537b569cdc 100644 --- a/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp +++ b/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp @@ -280,10 +280,16 @@ error_code cellNetCtlNetStartDialogUnloadAsync(vm::ptrsize != 8) { + result->result = CELL_NET_CTL_ERROR_INVALID_SIZE; return CELL_NET_CTL_ERROR_INVALID_SIZE; } - result->result = CELL_NET_CTL_ERROR_DIALOG_CANCELED; + if (g_cfg.net.net_status == CELL_NET_CTL_STATE_Disconnected) + { + result->result = CELL_NET_CTL_ERROR_NET_NOT_CONNECTED; + return CELL_NET_CTL_ERROR_NET_NOT_CONNECTED; + } + sysutil_send_system_cmd(CELL_SYSUTIL_NET_CTL_NETSTART_UNLOADED, 0); return CELL_OK; diff --git a/rpcs3/Emu/Cell/lv2/sys_net.cpp b/rpcs3/Emu/Cell/lv2/sys_net.cpp index a9a627dc7cfb..c4f7d8497c55 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_net.cpp @@ -175,14 +175,9 @@ struct network_thread { #ifdef _WIN32 WSACleanup(); - CloseHandle(_eventh); #endif } -#ifdef _WIN32 - HANDLE _eventh = CreateEventW(nullptr, false, false, nullptr); -#endif - void operator()() { std::vector> socklist; @@ -190,16 +185,13 @@ struct network_thread s_to_awake.clear(); -#ifdef _WIN32 -#else ::pollfd fds[lv2_socket::id_count]{}; -#endif while (thread_ctrl::state() != thread_state::aborting) { // Wait with 1ms timeout #ifdef _WIN32 - WaitForSingleObjectEx(_eventh, 1, false); + ::WSAPoll(fds, socklist.size(), 1); #else ::poll(fds, socklist.size(), 1); #endif @@ -212,40 +204,12 @@ struct network_thread lv2_socket& sock = *socklist[i]; -#ifdef _WIN32 - WSANETWORKEVENTS nwe; - if (WSAEnumNetworkEvents(sock.socket, nullptr, &nwe) == 0) - { - sock.ev_set |= nwe.lNetworkEvents; - - if (sock.ev_set & (FD_READ | FD_ACCEPT | FD_CLOSE) && sock.events.test_and_reset(lv2_socket::poll::read)) - events += lv2_socket::poll::read; - if (sock.ev_set & (FD_WRITE | FD_CONNECT) && sock.events.test_and_reset(lv2_socket::poll::write)) - events += lv2_socket::poll::write; - - if ((nwe.lNetworkEvents & FD_READ && nwe.iErrorCode[FD_READ_BIT]) || - (nwe.lNetworkEvents & FD_ACCEPT && nwe.iErrorCode[FD_ACCEPT_BIT]) || - (nwe.lNetworkEvents & FD_CLOSE && nwe.iErrorCode[FD_CLOSE_BIT]) || - (nwe.lNetworkEvents & FD_WRITE && nwe.iErrorCode[FD_WRITE_BIT]) || - (nwe.lNetworkEvents & FD_CONNECT && nwe.iErrorCode[FD_CONNECT_BIT])) - { - // TODO - if (sock.events.test_and_reset(lv2_socket::poll::error)) - events += lv2_socket::poll::error; - } - } - else - { - sys_net.error("WSAEnumNetworkEvents() failed (s=%d)", i); - } -#else if (fds[i].revents & (POLLIN | POLLHUP) && socklist[i]->events.test_and_reset(lv2_socket::poll::read)) events += lv2_socket::poll::read; if (fds[i].revents & POLLOUT && socklist[i]->events.test_and_reset(lv2_socket::poll::write)) events += lv2_socket::poll::write; if (fds[i].revents & POLLERR && socklist[i]->events.test_and_reset(lv2_socket::poll::error)) events += lv2_socket::poll::error; -#endif if (events) { @@ -295,16 +259,12 @@ struct network_thread { auto events = socklist[i]->events.load(); -#ifdef _WIN32 - verify(HERE), 0 == WSAEventSelect(socklist[i]->socket, _eventh, FD_READ | FD_ACCEPT | FD_CLOSE | FD_WRITE | FD_CONNECT); -#else fds[i].fd = events ? socklist[i]->socket : -1; fds[i].events = (events & lv2_socket::poll::read ? POLLIN : 0) | (events & lv2_socket::poll::write ? POLLOUT : 0) | 0; fds[i].revents = 0; -#endif } } } @@ -1616,15 +1576,11 @@ error_code sys_net_bnet_poll(ppu_thread& ppu, vm::ptr fds, s32 n reader_lock lock(id_manager::g_mutex); -#ifndef _WIN32 ::pollfd _fds[1024]{}; -#endif for (s32 i = 0; i < nfds; i++) { -#ifndef _WIN32 _fds[i].fd = -1; -#endif fds[i].revents = 0; if (fds[i].fd < 0) @@ -1636,23 +1592,11 @@ error_code sys_net_bnet_poll(ppu_thread& ppu, vm::ptr fds, s32 n { if (fds[i].events & ~(SYS_NET_POLLIN | SYS_NET_POLLOUT)) sys_net.error("sys_net_bnet_poll(fd=%d): events=0x%x", fds[i].fd, fds[i].events); -#ifdef _WIN32 - if (fds[i].events & SYS_NET_POLLIN && sock->ev_set & (FD_READ | FD_ACCEPT | FD_CLOSE)) - fds[i].revents |= SYS_NET_POLLIN; - if (fds[i].events & SYS_NET_POLLOUT && sock->ev_set & (FD_WRITE | FD_CONNECT)) - fds[i].revents |= SYS_NET_POLLOUT; - - if (fds[i].revents) - { - signaled++; - } -#else _fds[i].fd = sock->socket; if (fds[i].events & SYS_NET_POLLIN) _fds[i].events |= POLLIN; if (fds[i].events & SYS_NET_POLLOUT) _fds[i].events |= POLLOUT; -#endif } else { @@ -1661,9 +1605,11 @@ error_code sys_net_bnet_poll(ppu_thread& ppu, vm::ptr fds, s32 n } } -#ifndef _WIN32 +#ifdef _WIN32 + ::WSAPoll(_fds, nfds, 0); +#else ::poll(_fds, nfds, 0); - +#endif for (s32 i = 0; i < nfds; i++) { if (_fds[i].revents & (POLLIN | POLLHUP)) @@ -1678,7 +1624,6 @@ error_code sys_net_bnet_poll(ppu_thread& ppu, vm::ptr fds, s32 n signaled++; } } -#endif if (ms == 0 || signaled) { @@ -1791,15 +1736,11 @@ error_code sys_net_bnet_select(ppu_thread& ppu, s32 nfds, vm::ptr selected{}; if (readfds && readfds->bit(i)) @@ -1820,24 +1761,11 @@ error_code sys_net_bnet_select(ppu_thread& ppu, s32 nfds, vm::ptr((lv2_socket::id_base & -1024) + i)) { -#ifdef _WIN32 - bool sig = false; - if (sock->ev_set & (FD_READ | FD_ACCEPT | FD_CLOSE) && selected & lv2_socket::poll::read) - sig = true, rread.set(i); - if (sock->ev_set & (FD_WRITE | FD_CONNECT) && selected & lv2_socket::poll::write) - sig = true, rwrite.set(i); - - if (sig) - { - signaled++; - } -#else _fds[i].fd = sock->socket; if (selected & lv2_socket::poll::read) _fds[i].events |= POLLIN; if (selected & lv2_socket::poll::write) _fds[i].events |= POLLOUT; -#endif } else { @@ -1845,9 +1773,11 @@ error_code sys_net_bnet_select(ppu_thread& ppu, s32 nfds, vm::ptr Date: Sat, 14 Dec 2019 12:48:34 +0100 Subject: [PATCH 07/21] Qt: Use QT_ENABLE_HIGHDPI_SCALING starting with Qt 5.14 (#7090) * Qt: Use QT_ENABLE_HIGHDPI_SCALING starting with Qt 5.14 * Qt: Add QT_SCALE_FACTOR_ROUNDING_POLICY starting with Qt 5.14 * Qt: remove style path for Qt versions below 5.11 (5.11.0 is min version) * Qt: Fix dpi-rounding cli arg * Qt: use rounding policy PassThrough as default dpi scaling policy * Qt: add missing Qt Version check (facepalm) --- rpcs3/main.cpp | 65 ++++++++++++++++++++++++++++++++++++- rpcs3/rpcs3.vcxproj | 2 +- rpcs3/rpcs3qt/stylesheets.h | 5 --- 3 files changed, 65 insertions(+), 7 deletions(-) diff --git a/rpcs3/main.cpp b/rpcs3/main.cpp index 32ec66b3bbe8..fbef64b1921d 100644 --- a/rpcs3/main.cpp +++ b/rpcs3/main.cpp @@ -101,6 +101,7 @@ static semaphore<> s_qt_mutex{}; const char* arg_headless = "headless"; const char* arg_no_gui = "no-gui"; const char* arg_high_dpi = "hidpi"; +const char* arg_rounding = "dpi-rounding"; const char* arg_styles = "styles"; const char* arg_style = "style"; const char* arg_stylesheet = "stylesheet"; @@ -127,14 +128,73 @@ QCoreApplication* createApplication(int& argc, char* argv[]) const std::string cmp_str = "0"; const auto i_hdpi_2 = (argc > (i_hdpi + 1)) ? (i_hdpi + 1) : 0; const auto high_dpi_setting = (i_hdpi_2 && !strcmp(cmp_str.c_str(), argv[i_hdpi_2])) ? "0" : "1"; - + +#if (QT_VERSION < QT_VERSION_CHECK(5,14,0)) // Set QT_AUTO_SCREEN_SCALE_FACTOR from environment. Defaults to cli argument, which defaults to 1. use_high_dpi = "1" == qEnvironmentVariable("QT_AUTO_SCREEN_SCALE_FACTOR", high_dpi_setting); +#else + // Set QT_ENABLE_HIGHDPI_SCALING from environment. Defaults to cli argument, which defaults to 1. + use_high_dpi = "1" == qEnvironmentVariable("QT_ENABLE_HIGHDPI_SCALING", high_dpi_setting); +#endif } // AA_EnableHighDpiScaling has to be set before creating a QApplication QApplication::setAttribute(use_high_dpi ? Qt::AA_EnableHighDpiScaling : Qt::AA_DisableHighDpiScaling); +#if (QT_VERSION >= QT_VERSION_CHECK(5,14,0)) + if (use_high_dpi) + { + // Set QT_SCALE_FACTOR_ROUNDING_POLICY from environment. Defaults to cli argument, which defaults to RoundPreferFloor. + auto rounding_val = Qt::HighDpiScaleFactorRoundingPolicy::PassThrough; + auto rounding_str = std::to_string(static_cast(rounding_val)); + const auto i_rounding = find_arg(arg_rounding, argc, argv); + if (i_rounding) + { + const auto i_rounding_2 = (argc > (i_rounding + 1)) ? (i_rounding + 1) : 0; + if (i_rounding_2) + { + const auto arg_val = argv[i_rounding_2]; + try + { + const auto rounding_val_cli = std::stoi(arg_val); + if (rounding_val_cli >= static_cast(Qt::HighDpiScaleFactorRoundingPolicy::Unset) && rounding_val_cli <= static_cast(Qt::HighDpiScaleFactorRoundingPolicy::PassThrough)) + { + rounding_val = static_cast(rounding_val_cli); + rounding_str = std::to_string(static_cast(rounding_val)); + } + else + { + throw std::exception(); + } + } + catch (const std::exception&) + { + std::cout << "The value " << arg_val << " for " << arg_rounding << " is not allowed. Please use a valid value for Qt::HighDpiScaleFactorRoundingPolicy.\n"; + } + } + } + try + { + rounding_str = qEnvironmentVariable("QT_SCALE_FACTOR_ROUNDING_POLICY", rounding_str.c_str()).toStdString(); + const auto rounding_val_final = std::stoi(rounding_str); + if (rounding_val_final >= static_cast(Qt::HighDpiScaleFactorRoundingPolicy::Unset) && rounding_val_final <= static_cast(Qt::HighDpiScaleFactorRoundingPolicy::PassThrough)) + { + rounding_val = static_cast(rounding_val_final); + rounding_str = std::to_string(static_cast(rounding_val)); + } + else + { + throw std::exception(); + } + } + catch (const std::exception&) + { + std::cout << "The value " << rounding_str << " for " << arg_rounding << " is not allowed. Please use a valid value for Qt::HighDpiScaleFactorRoundingPolicy.\n"; + } + QApplication::setHighDpiScaleFactorRoundingPolicy(rounding_val); + } +#endif + return new gui_application(argc, argv); } @@ -175,6 +235,9 @@ int main(int argc, char** argv) parser.addOption(QCommandLineOption(arg_headless, "Run RPCS3 in headless mode.")); parser.addOption(QCommandLineOption(arg_no_gui, "Run RPCS3 without its GUI.")); parser.addOption(QCommandLineOption(arg_high_dpi, "Enables Qt High Dpi Scaling.", "enabled", "1")); +#if (QT_VERSION >= QT_VERSION_CHECK(5,14,0)) + parser.addOption(QCommandLineOption(arg_rounding, "Sets the Qt::HighDpiScaleFactorRoundingPolicy for values like 150% zoom.", "rounding", "4")); +#endif parser.addOption(QCommandLineOption(arg_styles, "Lists the available styles.")); parser.addOption(QCommandLineOption(arg_style, "Loads a custom style.", "style", "")); parser.addOption(QCommandLineOption(arg_stylesheet, "Loads a custom stylesheet.", "path", "")); diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index d05dfa56bc93..d7ca1f97730a 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -2191,4 +2191,4 @@ - + \ No newline at end of file diff --git a/rpcs3/rpcs3qt/stylesheets.h b/rpcs3/rpcs3qt/stylesheets.h index 1df19feec5fb..b44ec8f630eb 100644 --- a/rpcs3/rpcs3qt/stylesheets.h +++ b/rpcs3/rpcs3qt/stylesheets.h @@ -44,13 +44,8 @@ namespace gui "QTableView#game_grid::item:hover:selected { background-color: #007fff; color: #fff; }" // table headers -#if (QT_VERSION < QT_VERSION_CHECK(5,11,0)) - "QHeaderView::section { padding: .5em; border: 0.063em solid #ffffff; }" - "QHeaderView::section:hover { background: #e3e3e3; padding: .5em; border: 0.063em solid #ffffff; }" -#else "QHeaderView::section { padding-left: .5em; padding-right: .5em; padding-top: .4em; padding-bottom: -.1em; border: 0.063em solid #ffffff; }" "QHeaderView::section:hover { background: #e3e3e3; padding-left: .5em; padding-right: .5em; padding-top: .4em; padding-bottom: -.1em; border: 0.063em solid #ffffff; }" -#endif // dock widget "QDockWidget{ background: transparent; color: black; }" From 725761f28221d53983d2f3c692d284c6015fbff9 Mon Sep 17 00:00:00 2001 From: Megamouse Date: Sat, 14 Dec 2019 12:14:51 +0100 Subject: [PATCH 08/21] Qt: move firmware settings to the advanced tab --- rpcs3/Json/tooltips.json | 14 +- rpcs3/rpcs3qt/settings_dialog.cpp | 294 ++++++++++++------------- rpcs3/rpcs3qt/settings_dialog.ui | 349 ++++++++++++++++-------------- 3 files changed, 346 insertions(+), 311 deletions(-) diff --git a/rpcs3/Json/tooltips.json b/rpcs3/Json/tooltips.json index 5819d2fbb624..62a0135fe26b 100644 --- a/rpcs3/Json/tooltips.json +++ b/rpcs3/Json/tooltips.json @@ -1,5 +1,12 @@ { "advanced": { + "libraries": { + "manual": "Allows the user to manually choose the LLE libraries to load.\nIf unsure, don't use this option. Nothing will work if you use this.", + "both": "Load libsysmodule.sprx and chosen list of libraries. Option for backward compatibility.\nIf unsure, don't use this option.", + "liblv2both": "Loads liblv2.sprx and chosen list of libraries.\nIf unsure, don't use this option.", + "liblv2list": "Loads liblv2.sprx and nothing but selected libraries.\nDon't use this option.", + "liblv2": "This closely emulates how games can load and unload system module files on a real PlayStation 3.\nSome games require this.\nThis is the preferred option." + }, "debugConsoleMode": "Increases the amount of usable system memory to match a DECR console and more.\nCauses some software to behave differently than on retail hardware.", "readColor": "Initializes render target memory using vm memory.", "readDepth": "Initializes render target memory using vm memory.", @@ -36,13 +43,6 @@ "ASMJIT": "This is the fast option with very good compatibility.\nIf unsure, use this option.", "LLVM": "This is the fastest option with very good compatibility.\nRecompiles the game's SPU LLVM cache before running which adds extra start-up time.\nIf you experience issues, use the ASMJIT Recompiler." }, - "libraries": { - "manual": "Allows the user to manually choose the LLE libraries to load.\nIf unsure, don't use this option. Nothing will work if you use this.", - "both": "Load libsysmodule.sprx and chosen list of libraries. Option for backward compatibility.\nIf unsure, don't use this option.", - "liblv2both": "Loads liblv2.sprx and chosen list of libraries.\nIf unsure, don't use this option.", - "liblv2list": "Loads liblv2.sprx and nothing but selected libraries.\nDon't use this option.", - "liblv2": "This closely emulates how games can load and unload system module files on a real PlayStation 3.\nSome games require this.\nThis is the preferred option." - }, "checkboxes": { "accurateXFloat": "Fixes bugs in various games at the cost of performance.\nThis setting is only applied when SPU LLVM is active.", "spuCache": "Should normally stay enabled.\nDisable this if the cache becomes too large.\nDisabling it does not remove the existing cache.", diff --git a/rpcs3/rpcs3qt/settings_dialog.cpp b/rpcs3/rpcs3qt/settings_dialog.cpp index 269c5ff83421..057345413887 100644 --- a/rpcs3/rpcs3qt/settings_dialog.cpp +++ b/rpcs3/rpcs3qt/settings_dialog.cpp @@ -80,7 +80,6 @@ settings_dialog::settings_dialog(std::shared_ptr guiSettings, std: QJsonObject json_cpu_spu = json_cpu.value("SPU").toObject(); QJsonObject json_cpu_cbs = json_cpu.value("checkboxes").toObject(); QJsonObject json_cpu_cbo = json_cpu.value("comboboxes").toObject(); - QJsonObject json_cpu_lib = json_cpu.value("libraries").toObject(); QJsonObject json_gpu = json_obj.value("gpu").toObject(); QJsonObject json_gpu_cbo = json_gpu.value("comboboxes").toObject(); @@ -92,7 +91,8 @@ settings_dialog::settings_dialog(std::shared_ptr guiSettings, std: QJsonObject json_sys = json_obj.value("system").toObject(); QJsonObject json_net = json_obj.value("network").toObject(); - QJsonObject json_advanced = json_obj.value("advanced").toObject(); + QJsonObject json_advanced = json_obj.value("advanced").toObject(); + QJsonObject json_advanced_libs = json_advanced.value("libraries").toObject(); QJsonObject json_emu = json_obj.value("emulator").toObject(); QJsonObject json_emu_misc = json_emu.value("misc").toObject(); @@ -309,151 +309,6 @@ settings_dialog::settings_dialog(std::shared_ptr guiSettings, std: ui->spu_llvm->setEnabled(false); #endif - // lib options tool tips - SubscribeTooltip(ui->lib_manu, json_cpu_lib["manual"].toString()); - SubscribeTooltip(ui->lib_both, json_cpu_lib["both"].toString()); - SubscribeTooltip(ui->lib_lv2, json_cpu_lib["liblv2"].toString()); - SubscribeTooltip(ui->lib_lv2b, json_cpu_lib["liblv2both"].toString()); - SubscribeTooltip(ui->lib_lv2l, json_cpu_lib["liblv2list"].toString()); - - // creating this in ui file keeps scrambling the order... - QButtonGroup *libModeBG = new QButtonGroup(this); - libModeBG->addButton(ui->lib_manu, static_cast(lib_loading_type::manual)); - libModeBG->addButton(ui->lib_both, static_cast(lib_loading_type::hybrid)); - libModeBG->addButton(ui->lib_lv2, static_cast(lib_loading_type::liblv2only)); - libModeBG->addButton(ui->lib_lv2b, static_cast(lib_loading_type::liblv2both)); - libModeBG->addButton(ui->lib_lv2l, static_cast(lib_loading_type::liblv2list)); - - {// Handle lib loading options - QString selectedLib = qstr(xemu_settings->GetSetting(emu_settings::LibLoadOptions)); - QStringList libmode_list = xemu_settings->GetSettingOptions(emu_settings::LibLoadOptions); - - for (int i = 0; i < libmode_list.count(); i++) - { - libModeBG->button(i)->setText(libmode_list[i]); - - if (libmode_list[i] == selectedLib) - { - libModeBG->button(i)->setChecked(true); - } - - connect(libModeBG->button(i), &QAbstractButton::clicked, [=]() - { - xemu_settings->SetSetting(emu_settings::LibLoadOptions, sstr(libmode_list[i])); - }); - } - } - - // Sort string vector alphabetically - static const auto sort_string_vector = [](std::vector& vec) - { - std::sort(vec.begin(), vec.end(), [](const std::string &str1, const std::string &str2) { return str1 < str2; }); - }; - - std::vector loadedLibs = xemu_settings->GetLoadedLibraries(); - - sort_string_vector(loadedLibs); - - for (const auto& lib : loadedLibs) - { - QListWidgetItem* item = new QListWidgetItem(qstr(lib), ui->lleList); - item->setFlags(item->flags() | Qt::ItemIsUserCheckable); // set checkable flag - item->setCheckState(Qt::Checked); // AND initialize check state - ui->lleList->addItem(item); - } - - const std::string lle_dir = g_cfg.vfs.get_dev_flash() + "sys/external/"; - - std::unordered_set set(loadedLibs.begin(), loadedLibs.end()); - std::vector lle_module_list_unselected; - - for (const auto& prxf : fs::dir(lle_dir)) - { - // List found unselected modules - if (prxf.is_directory || (prxf.name.substr(std::max(size_t(3), prxf.name.length()) - 4)) != "sprx") - { - continue; - } - if (verify_npdrm_self_headers(fs::file(lle_dir + prxf.name)) && !set.count(prxf.name)) - { - lle_module_list_unselected.push_back(prxf.name); - } - } - - sort_string_vector(lle_module_list_unselected); - - for (const auto& lib : lle_module_list_unselected) - { - QListWidgetItem* item = new QListWidgetItem(qstr(lib), ui->lleList); - item->setFlags(item->flags() | Qt::ItemIsUserCheckable); // set checkable flag - item->setCheckState(Qt::Unchecked); // AND initialize check state - ui->lleList->addItem(item); - } - - ui->searchBox->setPlaceholderText(tr("Search libraries")); - - auto l_OnLibButtonClicked = [=](int ind) - { - if (ind != static_cast(lib_loading_type::liblv2only)) - { - ui->searchBox->setEnabled(true); - ui->lleList->setEnabled(true); - } - else - { - ui->searchBox->setEnabled(false); - ui->lleList->setEnabled(false); - } - }; - - auto l_OnSearchBoxTextChanged = [=](QString text) - { - QString searchTerm = text.toLower(); - std::vector items; - - // duplicate current items, we need clones to preserve checkstates - for (int i = 0; i < ui->lleList->count(); i++) - { - items.push_back(ui->lleList->item(i)->clone()); - } - - // sort items: checked items first then alphabetical order - std::sort(items.begin(), items.end(), [](QListWidgetItem *i1, QListWidgetItem *i2) - { - return (i1->checkState() != i2->checkState()) ? (i1->checkState() > i2->checkState()) : (i1->text() < i2->text()); - }); - - // refill library list - ui->lleList->clear(); - - for (uint i = 0; i < items.size(); i++) - { - ui->lleList->addItem(items[i]); - - // only show items filtered for search text - ui->lleList->setRowHidden(i, !items[i]->text().contains(searchTerm)); - } - }; - - // Events - connect(libModeBG, static_cast(&QButtonGroup::buttonClicked), l_OnLibButtonClicked); - connect(ui->searchBox, &QLineEdit::textChanged, l_OnSearchBoxTextChanged); - - // enable multiselection (there must be a better way) - connect(ui->lleList, &QListWidget::itemChanged, [&](QListWidgetItem* item) - { - for (auto cb : ui->lleList->selectedItems()) - { - cb->setCheckState(item->checkState()); - } - }); - - int buttid = libModeBG->checkedId(); - if (buttid != -1) - { - l_OnLibButtonClicked(buttid); - } - // _____ _____ _ _ _______ _ // / ____| __ \| | | | |__ __| | | // | | __| |__) | | | | | | __ _| |__ @@ -1108,6 +963,151 @@ settings_dialog::settings_dialog(std::shared_ptr guiSettings, std: SubscribeTooltip(ui->clockScale, json_advanced["clocksScale"].toString()); } + // lib options tool tips + SubscribeTooltip(ui->lib_manu, json_advanced_libs["manual"].toString()); + SubscribeTooltip(ui->lib_both, json_advanced_libs["both"].toString()); + SubscribeTooltip(ui->lib_lv2, json_advanced_libs["liblv2"].toString()); + SubscribeTooltip(ui->lib_lv2b, json_advanced_libs["liblv2both"].toString()); + SubscribeTooltip(ui->lib_lv2l, json_advanced_libs["liblv2list"].toString()); + + // creating this in ui file keeps scrambling the order... + QButtonGroup *libModeBG = new QButtonGroup(this); + libModeBG->addButton(ui->lib_manu, static_cast(lib_loading_type::manual)); + libModeBG->addButton(ui->lib_both, static_cast(lib_loading_type::hybrid)); + libModeBG->addButton(ui->lib_lv2, static_cast(lib_loading_type::liblv2only)); + libModeBG->addButton(ui->lib_lv2b, static_cast(lib_loading_type::liblv2both)); + libModeBG->addButton(ui->lib_lv2l, static_cast(lib_loading_type::liblv2list)); + + {// Handle lib loading options + QString selectedLib = qstr(xemu_settings->GetSetting(emu_settings::LibLoadOptions)); + QStringList libmode_list = xemu_settings->GetSettingOptions(emu_settings::LibLoadOptions); + + for (int i = 0; i < libmode_list.count(); i++) + { + libModeBG->button(i)->setText(libmode_list[i]); + + if (libmode_list[i] == selectedLib) + { + libModeBG->button(i)->setChecked(true); + } + + connect(libModeBG->button(i), &QAbstractButton::clicked, [=]() + { + xemu_settings->SetSetting(emu_settings::LibLoadOptions, sstr(libmode_list[i])); + }); + } + } + + // Sort string vector alphabetically + static const auto sort_string_vector = [](std::vector& vec) + { + std::sort(vec.begin(), vec.end(), [](const std::string &str1, const std::string &str2) { return str1 < str2; }); + }; + + std::vector loadedLibs = xemu_settings->GetLoadedLibraries(); + + sort_string_vector(loadedLibs); + + for (const auto& lib : loadedLibs) + { + QListWidgetItem* item = new QListWidgetItem(qstr(lib), ui->lleList); + item->setFlags(item->flags() | Qt::ItemIsUserCheckable); // set checkable flag + item->setCheckState(Qt::Checked); // AND initialize check state + ui->lleList->addItem(item); + } + + const std::string lle_dir = g_cfg.vfs.get_dev_flash() + "sys/external/"; + + std::unordered_set set(loadedLibs.begin(), loadedLibs.end()); + std::vector lle_module_list_unselected; + + for (const auto& prxf : fs::dir(lle_dir)) + { + // List found unselected modules + if (prxf.is_directory || (prxf.name.substr(std::max(size_t(3), prxf.name.length()) - 4)) != "sprx") + { + continue; + } + if (verify_npdrm_self_headers(fs::file(lle_dir + prxf.name)) && !set.count(prxf.name)) + { + lle_module_list_unselected.push_back(prxf.name); + } + } + + sort_string_vector(lle_module_list_unselected); + + for (const auto& lib : lle_module_list_unselected) + { + QListWidgetItem* item = new QListWidgetItem(qstr(lib), ui->lleList); + item->setFlags(item->flags() | Qt::ItemIsUserCheckable); // set checkable flag + item->setCheckState(Qt::Unchecked); // AND initialize check state + ui->lleList->addItem(item); + } + + ui->searchBox->setPlaceholderText(tr("Search libraries")); + + auto l_OnLibButtonClicked = [=](int ind) + { + if (ind != static_cast(lib_loading_type::liblv2only)) + { + ui->searchBox->setEnabled(true); + ui->lleList->setEnabled(true); + } + else + { + ui->searchBox->setEnabled(false); + ui->lleList->setEnabled(false); + } + }; + + auto l_OnSearchBoxTextChanged = [=](QString text) + { + QString searchTerm = text.toLower(); + std::vector items; + + // duplicate current items, we need clones to preserve checkstates + for (int i = 0; i < ui->lleList->count(); i++) + { + items.push_back(ui->lleList->item(i)->clone()); + } + + // sort items: checked items first then alphabetical order + std::sort(items.begin(), items.end(), [](QListWidgetItem *i1, QListWidgetItem *i2) + { + return (i1->checkState() != i2->checkState()) ? (i1->checkState() > i2->checkState()) : (i1->text() < i2->text()); + }); + + // refill library list + ui->lleList->clear(); + + for (uint i = 0; i < items.size(); i++) + { + ui->lleList->addItem(items[i]); + + // only show items filtered for search text + ui->lleList->setRowHidden(i, !items[i]->text().contains(searchTerm)); + } + }; + + // Events + connect(libModeBG, static_cast(&QButtonGroup::buttonClicked), l_OnLibButtonClicked); + connect(ui->searchBox, &QLineEdit::textChanged, l_OnSearchBoxTextChanged); + + // enable multiselection (there must be a better way) + connect(ui->lleList, &QListWidget::itemChanged, [&](QListWidgetItem* item) + { + for (auto cb : ui->lleList->selectedItems()) + { + cb->setCheckState(item->checkState()); + } + }); + + int buttid = libModeBG->checkedId(); + if (buttid != -1) + { + l_OnLibButtonClicked(buttid); + } + // ______ _ _ _______ _ // | ____| | | | | |__ __| | | // | |__ _ __ ___ _ _| | __ _| |_ ___ _ __ | | __ _| |__ diff --git a/rpcs3/rpcs3qt/settings_dialog.ui b/rpcs3/rpcs3qt/settings_dialog.ui index 81bf576f6dda..1b3cce9453bf 100644 --- a/rpcs3/rpcs3qt/settings_dialog.ui +++ b/rpcs3/rpcs3qt/settings_dialog.ui @@ -6,8 +6,8 @@ 0 0 - 752 - 584 + 753 + 597 @@ -42,11 +42,11 @@ CPU - + - + @@ -115,107 +115,25 @@ - - - - 0 - 0 - + + + Qt::Vertical - - Preferred SPU Threads + + QSizePolicy::MinimumExpanding - - - - - - + + + 0 + 0 + + + - - - - - - 0 - 0 - - - - Firmware Settings - - - - - - - 0 - 0 - - - - Manually load selected libraries - - - - - - - - 0 - 0 - - - - Load automatic and manual selection - - - - - - - - 0 - 0 - - - - Load liblv2.sprx only - - - - - - - - 0 - 0 - - - - Load liblv2.sprx and manual selection - - - - - - - - 0 - 0 - - - - Load liblv2.sprx and strict selection - - - - - - + @@ -261,7 +179,27 @@ - + + + Qt::Vertical + + + QSizePolicy::MinimumExpanding + + + + 0 + 0 + + + + + + + + + + 0 @@ -269,62 +207,35 @@ - SPU Block Size + TSX Instructions - + - + - - - - - + - + 0 0 - Firmware Libraries + SPU Block Size - - - - - - 0 - 0 - - - - QAbstractItemView::ExtendedSelection - - - QListView::ListMode - - - + - - - - 0 - 0 - - - + - + 0 @@ -332,15 +243,31 @@ - TSX Instructions + Preferred SPU Threads - + - + + + + + Qt::Vertical + + + QSizePolicy::MinimumExpanding + + + + 0 + 0 + + + + @@ -1640,6 +1567,86 @@ + + + + + 0 + 0 + + + + Firmware Settings + + + + + + + 0 + 0 + + + + Manually load selected libraries + + + + + + + + 0 + 0 + + + + Load automatic and manual selection + + + + + + + + 0 + 0 + + + + Load liblv2.sprx only + + + + + + + + 0 + 0 + + + + Load liblv2.sprx and manual selection + + + + + + + + 0 + 0 + + + + Load liblv2.sprx and strict selection + + + + + + @@ -1655,6 +1662,51 @@ + + + + + + + 0 + 0 + + + + Firmware Libraries + + + + + + + 0 + 0 + + + + QAbstractItemView::ExtendedSelection + + + QListView::ListMode + + + + + + + + 0 + 0 + + + + + + + + + @@ -1700,23 +1752,6 @@ - - - - Qt::Vertical - - - - 0 - 0 - - - - - - - - @@ -1804,7 +1839,7 @@ - + Qt::Vertical From 43cf10fcbd7c3dcf9024506448fc835a84260e83 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 4 Dec 2019 15:07:20 +0300 Subject: [PATCH 09/21] rsx: Remove deprecated do_method path that has been superceded by c++ inheritance for many years --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 50 ++++++----------------------- rpcs3/Emu/RSX/GL/GLGSRender.h | 3 +- rpcs3/Emu/RSX/Null/NullGSRender.cpp | 5 --- rpcs3/Emu/RSX/Null/NullGSRender.h | 1 - rpcs3/Emu/RSX/RSXThread.h | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 18 ----------- rpcs3/Emu/RSX/VK/VKGSRender.h | 3 +- rpcs3/Emu/RSX/rsx_methods.cpp | 23 +++---------- 8 files changed, 16 insertions(+), 89 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 516a96f4bd47..5e19ae3ef612 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1100,7 +1100,7 @@ void GLGSRender::on_exit() void GLGSRender::clear_surface(u32 arg) { - if (skip_current_frame || !framebuffer_status_valid) return; + if (skip_current_frame) return; // If stencil write mask is disabled, remove clear_stencil bit if (!rsx::method_registers.stencil_mask()) arg &= ~0x2u; @@ -1108,6 +1108,14 @@ void GLGSRender::clear_surface(u32 arg) // Ignore invalid clear flags if ((arg & 0xf3) == 0) return; + u8 ctx = rsx::framebuffer_creation_context::context_draw; + if (arg & 0xF0) ctx |= rsx::framebuffer_creation_context::context_clear_color; + if (arg & 0x3) ctx |= rsx::framebuffer_creation_context::context_clear_depth; + + init_buffers((rsx::framebuffer_creation_context)ctx, true); + + if (!framebuffer_status_valid) return; + GLbitfield mask = 0; gl::command_context cmd{ gl_state }; @@ -1223,46 +1231,6 @@ void GLGSRender::clear_surface(u32 arg) glClear(mask); } -bool GLGSRender::do_method(u32 cmd, u32 arg) -{ - switch (cmd) - { - case NV4097_CLEAR_SURFACE: - { - if (arg & 0xF3) - { - //Only do all this if we have actual work to do - u8 ctx = rsx::framebuffer_creation_context::context_draw; - if (arg & 0xF0) ctx |= rsx::framebuffer_creation_context::context_clear_color; - if (arg & 0x3) ctx |= rsx::framebuffer_creation_context::context_clear_depth; - - init_buffers(rsx::framebuffer_creation_context{ctx}, true); - clear_surface(arg); - } - - return true; - } - case NV4097_CLEAR_ZCULL_SURFACE: - { - // NOP - // Clearing zcull memory does not modify depth/stencil buffers 'bound' to the zcull region - return true; - } - case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: - { - // Texture barrier, seemingly not very useful - return true; - } - case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: - { - //flush_draw_buffers = true; - return true; - } - } - - return false; -} - bool GLGSRender::load_program() { if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index ef24dc66fb9d..44311d703b03 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -137,7 +137,6 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control gl::vertex_upload_info set_vertex_buffer(); rsx::vertex_input_layout m_vertex_layout = {}; - void clear_surface(u32 arg); void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false); bool load_program(); @@ -162,12 +161,12 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override; protected: + void clear_surface(u32 arg) override; void begin() override; void end() override; void on_init_thread() override; void on_exit() override; - bool do_method(u32 cmd, u32 arg) override; void flip(const rsx::display_flip_info_t& info) override; void do_local_task(rsx::FIFO_state state) override; diff --git a/rpcs3/Emu/RSX/Null/NullGSRender.cpp b/rpcs3/Emu/RSX/Null/NullGSRender.cpp index e7b1e0096243..e3dcec47fd32 100644 --- a/rpcs3/Emu/RSX/Null/NullGSRender.cpp +++ b/rpcs3/Emu/RSX/Null/NullGSRender.cpp @@ -11,11 +11,6 @@ NullGSRender::NullGSRender() : GSRender() { } -bool NullGSRender::do_method(u32 cmd, u32 value) -{ - return false; -} - void NullGSRender::end() { rsx::method_registers.current_draw_clause.end(); diff --git a/rpcs3/Emu/RSX/Null/NullGSRender.h b/rpcs3/Emu/RSX/Null/NullGSRender.h index a22b4516d94b..a49e711afd16 100644 --- a/rpcs3/Emu/RSX/Null/NullGSRender.h +++ b/rpcs3/Emu/RSX/Null/NullGSRender.h @@ -8,6 +8,5 @@ class NullGSRender : public GSRender NullGSRender(); private: - bool do_method(u32 cmd, u32 value) final; void end() override; }; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index c9161eb985b5..cd0590784326 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -687,13 +687,13 @@ namespace rsx void run_FIFO(); public: + virtual void clear_surface(u32 arg) {}; virtual void begin(); virtual void end(); virtual void execute_nop_draw(); virtual void on_init_rsx() = 0; virtual void on_init_thread() = 0; - virtual bool do_method(u32 /*cmd*/, u32 /*value*/) { return false; } virtual void on_frame_end(u32 buffer, bool forced = false); virtual void flip(const display_flip_info_t& info) = 0; virtual u64 timestamp(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index d3ff600d904b..7a4f6620e2fc 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2485,24 +2485,6 @@ void VKGSRender::do_local_task(rsx::FIFO_state state) } } -bool VKGSRender::do_method(u32 cmd, u32 arg) -{ - switch (cmd) - { - case NV4097_CLEAR_SURFACE: - clear_surface(arg); - return true; - case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: - // Texture barrier, seemingly not very useful - return true; - case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: - //sync_at_semaphore_release(); - return true; - default: - return false; - } -} - bool VKGSRender::load_program() { if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index d3e418fff0cc..9152b8038543 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -426,7 +426,6 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control ~VKGSRender() override; private: - void clear_surface(u32 mask); void prepare_rtts(rsx::framebuffer_creation_context context); void open_command_buffer(); @@ -478,13 +477,13 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control void emergency_query_cleanup(vk::command_buffer* commands); protected: + void clear_surface(u32 mask) override; void begin() override; void end() override; void emit_geometry(u32 sub_index) override; void on_init_thread() override; void on_exit() override; - bool do_method(u32 cmd, u32 arg) override; void flip(const rsx::display_flip_info_t& info) override; void do_local_task(rsx::FIFO_state state) override; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 4b302afbe009..b0893c08a25c 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -152,11 +152,7 @@ namespace rsx { void clear(thread* rsx, u32 _reg, u32 arg) { - // TODO: every backend must override method table to insert its own handlers - if (!rsx->do_method(NV4097_CLEAR_SURFACE, arg)) - { - // - } + rsx->clear_surface(arg); if (capture_current_frame) { @@ -166,8 +162,6 @@ namespace rsx void clear_zcull(thread* rsx, u32 _reg, u32 arg) { - rsx->do_method(NV4097_CLEAR_ZCULL_SURFACE, arg); - if (capture_current_frame) { rsx->capture_frame("clear zcull memory"); @@ -212,11 +206,6 @@ namespace rsx // lle-gcm likes to inject system reserved semaphores, presumably for system/vsh usage // Avoid calling render to avoid any havoc(flickering) they may cause from invalid flush/write const u32 offset = method_registers.semaphore_offset_4097() & -16; - if (offset > 63 * 4 && !rsx->do_method(NV4097_TEXTURE_READ_SEMAPHORE_RELEASE, arg)) - { - // - } - vm::_ref>(get_address(offset, method_registers.semaphore_context_dma_4097())).store( { arg, @@ -228,14 +217,10 @@ namespace rsx void back_end_write_semaphore_release(thread* rsx, u32 _reg, u32 arg) { // Full pipeline barrier - const u32 offset = method_registers.semaphore_offset_4097() & -16; - if (offset > 63 * 4 && !rsx->do_method(NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE, arg)) - { - // - } - rsx->sync(); - u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff); + + const u32 offset = method_registers.semaphore_offset_4097() & -16; + const u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff); vm::_ref>(get_address(offset, method_registers.semaphore_context_dma_4097())).store( { val, From f127f3956577003850b218dc541fa73f7e17d7db Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 30 Nov 2019 15:44:47 +0300 Subject: [PATCH 10/21] rsx: Restructure ZCULL report retirement - Prefer lazy retire model. Sync commands are sent out and the reports will be retired when they are available without forcing. - To make this work with conditional rendering, hardware support is required where the backend will automatically determine visibility by itself during rendering. --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 6 +- rpcs3/Emu/RSX/RSXThread.cpp | 374 +++++++++++++++++++++------ rpcs3/Emu/RSX/RSXThread.h | 103 ++++++-- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 48 ++-- rpcs3/Emu/RSX/VK/VKGSRender.h | 4 +- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 1 - rpcs3/Emu/RSX/rsx_methods.cpp | 10 +- 7 files changed, 401 insertions(+), 145 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 5e19ae3ef612..51ea91c67660 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -176,8 +176,7 @@ void GLGSRender::begin() { rsx::thread::begin(); - if (skip_current_frame || - (conditional_render_enabled && conditional_render_test_failed)) + if (skip_current_frame || cond_render_ctrl.disable_rendering()) return; init_buffers(rsx::framebuffer_creation_context::context_draw); @@ -187,8 +186,7 @@ void GLGSRender::end() { m_profiler.start(); - if (skip_current_frame || !framebuffer_status_valid || - (conditional_render_enabled && conditional_render_test_failed)) + if (skip_current_frame || !framebuffer_status_valid || cond_render_ctrl.disable_rendering()) { execute_nop_draw(); rsx::thread::end(); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index b8ebba05c13a..de4dc6656ffa 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -294,13 +294,33 @@ namespace rsx void thread::begin() { - if (conditional_render_enabled && conditional_render_test_address) + if (cond_render_ctrl.hw_cond_active) { - // Evaluate conditional rendering test - zcull_ctrl->read_barrier(this, conditional_render_test_address, 4, reports::sync_no_notify); - vm::ptr result = vm::cast(conditional_render_test_address); - conditional_render_test_failed = (result->value == 0); - conditional_render_test_address = 0; + if (!cond_render_ctrl.eval_pending()) + { + // End conditional rendering if still active + end_conditional_rendering(); + } + + // If hw cond render is enabled and evalutation is still pending, do nothing + } + else if (cond_render_ctrl.eval_pending()) + { + // Evaluate conditional rendering test or enable hw cond render until results are available + if (backend_config.supports_hw_conditional_render) + { + // In this mode, it is possible to skip the cond render while the backend is still processing data. + // The backend guarantees that any draw calls emitted during this time will NOT generate any ROP writes + verify(HERE), !cond_render_ctrl.hw_cond_active; + + // Pending evaluation, use hardware test + begin_conditional_rendering(); + } + else + { + zcull_ctrl->read_barrier(this, cond_render_ctrl.eval_address, 4, reports::sync_no_notify); + cond_render_ctrl.eval_result(this); + } } if (m_graphics_state & rsx::pipeline_state::fragment_program_dirty) @@ -2134,6 +2154,45 @@ namespace rsx return zcull_ctrl->copy_reports_to(memory_range_start, memory_range, destination); } + void thread::enable_conditional_rendering(vm::addr_t ref) + { + cond_render_ctrl.enable_conditional_render(this, ref); + + auto result = zcull_ctrl->find_query(ref); + if (result.found) + { + if (result.query) + { + cond_render_ctrl.set_sync_tag(result.query->sync_tag); + sync_hint(FIFO_hint::hint_conditional_render_eval, result.query); + } + else + { + bool failed = (result.raw_zpass_result == 0); + cond_render_ctrl.set_eval_result(this, failed); + } + } + else + { + cond_render_ctrl.eval_result(this); + } + } + + void thread::disable_conditional_rendering() + { + cond_render_ctrl.disable_conditional_render(this); + } + + void thread::begin_conditional_rendering() + { + cond_render_ctrl.hw_cond_active = true; + } + + void thread::end_conditional_rendering() + { + cond_render_ctrl.hw_cond_active = false; + } + void thread::sync() { zcull_ctrl->sync(this); @@ -2149,6 +2208,11 @@ namespace rsx //verify (HERE), async_tasks_pending.load() == 0; } + void thread::sync_hint(FIFO_hint /*hint*/, void* /*args*/) + { + zcull_ctrl->on_sync_hint(); + } + void thread::flush_fifo() { // Make sure GET value is exposed before sync points @@ -2369,7 +2433,7 @@ namespace rsx } // Reset zcull ctrl - zcull_ctrl->set_active(this, false); + zcull_ctrl->set_active(this, false, true); zcull_ctrl->clear(this); if (zcull_ctrl->has_pending()) @@ -2525,18 +2589,29 @@ namespace rsx namespace reports { - void ZCULL_control::set_enabled(class ::rsx::thread* ptimer, bool state) + ZCULL_control::ZCULL_control() + { + for (auto& query : m_occlusion_query_data) + { + m_free_occlusion_pool.push(&query); + } + } + + ZCULL_control::~ZCULL_control() + {} + + void ZCULL_control::set_enabled(class ::rsx::thread* ptimer, bool state, bool flush_queue) { if (state != enabled) { enabled = state; if (active && !enabled) - set_active(ptimer, false); + set_active(ptimer, false, flush_queue); } } - void ZCULL_control::set_active(class ::rsx::thread* ptimer, bool state) + void ZCULL_control::set_active(class ::rsx::thread* ptimer, bool state, bool flush_queue) { if (state != active) { @@ -2556,6 +2631,8 @@ namespace rsx end_occlusion_query(m_current_task); m_current_task->active = false; m_current_task->pending = true; + m_current_task->sync_tag = ++m_timer; + m_current_task->timestamp = m_tsc; m_pending_writes.push_back({}); m_pending_writes.back().query = m_current_task; @@ -2564,10 +2641,12 @@ namespace rsx else { discard_occlusion_query(m_current_task); + free_query(m_current_task); m_current_task->active = false; } m_current_task = nullptr; + update(ptimer, 0u, flush_queue); } } } @@ -2582,6 +2661,8 @@ namespace rsx m_current_task->active = false; m_current_task->pending = true; + m_current_task->timestamp = m_tsc; + m_current_task->sync_tag = ++m_timer; m_pending_writes.back().query = m_current_task; allocate_new_query(ptimer); @@ -2589,8 +2670,16 @@ namespace rsx } else { - //Spam; send null query down the pipeline to copy the last result - //Might be used to capture a timestamp (verify) + // Spam; send null query down the pipeline to copy the last result + // Might be used to capture a timestamp (verify) + + if (m_pending_writes.empty()) + { + // No need to queue this if there is no pending request in the pipeline anyway + write(sink, ptimer->timestamp(), type, m_statistics_map[m_statistics_tag_id]); + return; + } + m_pending_writes.push_back({}); } @@ -2600,13 +2689,15 @@ namespace rsx if (!It->sink) { It->counter_tag = m_statistics_tag_id; - It->due_tsc = get_system_time() + m_cycles_delay; It->sink = sink; It->type = type; if (forwarder != &(*It)) { - //Not the last one in the chain, forward the writing operation to the last writer + // Not the last one in the chain, forward the writing operation to the last writer + // Usually comes from truncated queries caused by disabling the testing + verify(HERE), It->query; + It->forwarder = forwarder; It->query->owned = true; } @@ -2625,53 +2716,46 @@ namespace rsx int retries = 0; while (true) { - for (u32 n = 0; n < occlusion_query_count; ++n) + if (!m_free_occlusion_pool.empty()) { - if (m_occlusion_query_data[n].pending || m_occlusion_query_data[n].active) - continue; + m_current_task = m_free_occlusion_pool.top(); + m_free_occlusion_pool.pop(); - m_current_task = &m_occlusion_query_data[n]; m_current_task->num_draws = 0; m_current_task->result = 0; - m_current_task->sync_timestamp = 0; m_current_task->active = true; m_current_task->owned = false; - m_current_task->hint = false; + m_current_task->sync_tag = 0; + m_current_task->timestamp = 0; return; } if (retries > 0) { - LOG_ERROR(RSX, "ZCULL report queue is overflowing!!"); - m_statistics_map[m_statistics_tag_id] = 1; - - verify(HERE), m_pending_writes.front().sink == 0; - m_pending_writes.clear(); + fmt::throw_exception("Allocation failed!"); + } - for (auto &query : m_occlusion_query_data) - { - discard_occlusion_query(&query); - query.pending = false; - } + // All slots are occupied, try to pop the earliest entry - m_current_task = &m_occlusion_query_data[0]; - m_current_task->num_draws = 0; - m_current_task->result = 0; - m_current_task->sync_timestamp = 0; - m_current_task->active = true; - m_current_task->owned = false; - m_current_task->hint = false; - return; + if (!m_pending_writes.front().query) + { + // If this happens, the assert above will fire. There should never be a queue header with no work to be done + LOG_ERROR(RSX, "Close to our death."); } - //All slots are occupied, try to pop the earliest entry - m_tsc += max_zcull_delay_us; - update(ptimer); + m_next_tsc = 0; + update(ptimer, m_pending_writes.front().sink); retries++; } } + void ZCULL_control::free_query(occlusion_query_info* query) + { + query->pending = false; + m_free_occlusion_pool.push(query); + } + void ZCULL_control::clear(class ::rsx::thread* ptimer) { if (!m_pending_writes.empty()) @@ -2683,7 +2767,7 @@ namespace rsx if (!It->sink) { discard_occlusion_query(It->query); - It->query->pending = false; + free_query(It->query); valid_size--; ptimer->async_tasks_pending--; continue; @@ -2703,8 +2787,11 @@ namespace rsx { if (m_current_task) m_current_task->num_draws++; + } - m_cycles_delay = max_zcull_delay_us; + void ZCULL_control::on_sync_hint() + { + m_sync_tag = ++m_timer; } void ZCULL_control::write(vm::addr_t sink, u64 timestamp, u32 type, u32 value) @@ -2745,6 +2832,21 @@ namespace rsx { if (!m_pending_writes.empty()) { + // Quick reverse scan to push commands ahead of time + for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); ++It) + { + if (It->query && It->query->num_draws) + { + if (It->query->sync_tag > m_sync_tag) + { + // LOG_TRACE(RSX, "[Performance warning] Query hint emit during sync command."); + ptimer->sync_hint(FIFO_hint::hint_zcull_sync, It->query); + } + + break; + } + } + u32 processed = 0; const bool has_unclaimed = (m_pending_writes.back().sink == 0); @@ -2778,13 +2880,19 @@ namespace rsx discard_occlusion_query(query); } - query->pending = false; + free_query(query); } if (!writer.forwarder) { // No other queries in the chain, write result write(&writer, ptimer->timestamp(), result); + + if (query && ptimer->cond_render_ctrl.sync_tag == query->sync_tag) + { + const bool eval_failed = (result == 0); + ptimer->cond_render_ctrl.set_eval_result(ptimer, eval_failed); + } } processed++; @@ -2824,19 +2932,9 @@ namespace rsx //Decrement jobs counter ptimer->async_tasks_pending -= processed; } - - if (ptimer->conditional_render_enabled && ptimer->conditional_render_test_address) - { - ptimer->conditional_render_test_failed = vm::read32(ptimer->conditional_render_test_address) == 0; - ptimer->conditional_render_test_address = 0; - } - - //Critical, since its likely a WAIT_FOR_IDLE type has been processed, all results are considered available - m_cycles_delay = min_zcull_delay_us; - m_tsc = std::max(m_tsc, get_system_time()); } - void ZCULL_control::update(::rsx::thread* ptimer, u32 sync_address) + void ZCULL_control::update(::rsx::thread* ptimer, u32 sync_address, bool hint) { if (m_pending_writes.empty()) { @@ -2850,27 +2948,52 @@ namespace rsx return; } - // Update timestamp and proceed with processing only if there is work to be done - m_tsc = std::max(m_tsc, get_system_time()); - if (!sync_address) { - if (m_tsc < front.due_tsc) + if (hint || ptimer->async_tasks_pending >= max_safe_queue_depth) { - if (front.query && !front.query->hint && (front.due_tsc - m_tsc) <= m_backend_warn_threshold) + verify(HERE), !active || !hint; + + // Prepare the whole queue for reading. This happens when zcull activity is disabled or queue is too long + for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); ++It) { - if (front.type == CELL_GCM_ZPASS_PIXEL_CNT || front.type == CELL_GCM_ZCULL_STATS3) + if (It->query) { - // Imminent read - ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast(front.query)); - } + if (It->query->num_draws && It->query->sync_tag > m_sync_tag) + { + ptimer->sync_hint(FIFO_hint::hint_zcull_sync, It->query); + verify(HERE), It->query->sync_tag < m_sync_tag; + } - front.query->hint = true; + break; + } } + } - // Avoid spamming backend with report status updates + if (m_tsc = get_system_time(); m_tsc < m_next_tsc) + { return; } + else + { + // Schedule ahead + m_next_tsc = m_tsc + min_zcull_tick_us; + +#if 0 + // Schedule a queue flush if needed + if (front.query && front.query->num_draws && front.query->sync_tag > m_sync_tag) + { + const auto elapsed = m_tsc - front.query->timestamp; + if (elapsed > max_zcull_delay_us) + { + ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast(front.query)); + verify(HERE), front.query->sync_tag < m_sync_tag; + } + + return; + } +#endif + } } u32 stat_tag_to_remove = m_statistics_tag_id; @@ -2904,7 +3027,7 @@ namespace rsx verify(HERE), query->pending; const bool implemented = (writer.type == CELL_GCM_ZPASS_PIXEL_CNT || writer.type == CELL_GCM_ZCULL_STATS3); - if (force_read || writer.due_tsc < m_tsc) + if (force_read) { if (implemented && !result && query->num_draws) { @@ -2938,13 +3061,6 @@ namespace rsx } else { - if (!query->hint && (writer.due_tsc - m_tsc) <= m_backend_warn_threshold) - { - // Imminent read - ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast(query)); - query->hint = true; - } - //Too early; abort break; } @@ -2956,7 +3072,7 @@ namespace rsx } } - query->pending = false; + free_query(query); } stat_tag_to_remove = writer.counter_tag; @@ -2966,6 +3082,12 @@ namespace rsx { // No other queries in the chain, write result write(&writer, ptimer->timestamp(), result); + + if (query && ptimer->cond_render_ctrl.sync_tag == query->sync_tag) + { + const bool eval_failed = (result == 0); + ptimer->cond_render_ctrl.set_eval_result(ptimer, eval_failed); + } } processed++; @@ -3039,7 +3161,11 @@ namespace rsx { if (!(flags & sync_no_notify)) { - ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast(query)); + if (UNLIKELY(query->sync_tag > m_sync_tag)) + { + ptimer->sync_hint(FIFO_hint::hint_zcull_sync, query); + verify(HERE), m_sync_tag > query->sync_tag; + } } update(ptimer, sync_address); @@ -3049,15 +3175,36 @@ namespace rsx return result_zcull_intr; } - occlusion_query_info* ZCULL_control::find_query(vm::addr_t sink_address) + query_search_result ZCULL_control::find_query(vm::addr_t sink_address) { - for (auto &writer : m_pending_writes) + u32 stat_id = 0; + for (auto It = m_pending_writes.crbegin(); It != m_pending_writes.crend(); ++It) { - if (writer.sink == sink_address) - return writer.query; + if (UNLIKELY(stat_id)) + { + if (It->counter_tag != stat_id) + { + // Zcull stats were cleared between this query and the required one + return { true, 0, nullptr }; + } + + if (It->query) + { + return { true, 0, It->query }; + } + } + else if (It->sink == sink_address) + { + if (It->query) + { + return { true, 0, It->query }; + } + + stat_id = It->counter_tag; + } } - return nullptr; + return {}; } u32 ZCULL_control::copy_reports_to(u32 start, u32 range, u32 dest) @@ -3078,5 +3225,70 @@ namespace rsx return bytes_to_write; } + + + // Conditional rendering helpers + bool conditional_render_eval::disable_rendering() const + { + return (enabled && eval_failed); + } + + bool conditional_render_eval::eval_pending() const + { + return (enabled && eval_address); + } + + void conditional_render_eval::enable_conditional_render(::rsx::thread* pthr, u32 address) + { + if (hw_cond_active) + { + verify(HERE), enabled; + pthr->end_conditional_rendering(); + } + + enabled = true; + eval_failed = false; + eval_address = address; + sync_tag = 0; + } + + void conditional_render_eval::disable_conditional_render(::rsx::thread* pthr) + { + if (hw_cond_active) + { + verify(HERE), enabled; + pthr->end_conditional_rendering(); + } + + enabled = false; + eval_failed = false; + eval_address = 0; + sync_tag = 0; + } + + void conditional_render_eval::set_sync_tag(u64 value) + { + sync_tag = value; + } + + void conditional_render_eval::set_eval_result(::rsx::thread* pthr, bool failed) + { + if (hw_cond_active) + { + verify(HERE), enabled; + pthr->end_conditional_rendering(); + } + + eval_failed = failed; + eval_address = 0; + sync_tag = 0; + } + + void conditional_render_eval::eval_result(::rsx::thread* pthr) + { + vm::ptr result = vm::cast(eval_address); + const bool failed = (result->value == 0); + set_eval_result(pthr, failed); + } } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index cd0590784326..c55dc7ce0934 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -2,6 +2,8 @@ #include #include +#include + #include "GCM.h" #include "rsx_cache.h" #include "RSXFIFO.h" @@ -338,12 +340,11 @@ namespace rsx u32 driver_handle; u32 result; u32 num_draws; - bool hint; + u64 sync_tag; + u64 timestamp; bool pending; bool active; bool owned; - - u64 sync_timestamp; }; struct queued_report_write @@ -355,8 +356,13 @@ namespace rsx vm::addr_t sink; // Memory location of the report std::vector sink_alias; // Aliased memory addresses + }; - u64 due_tsc; + struct query_search_result + { + bool found; + u32 raw_zpass_result; + occlusion_query_info* query; }; enum sync_control @@ -369,31 +375,39 @@ namespace rsx struct ZCULL_control { // Delay before a report update operation is forced to retire - const u32 max_zcull_delay_us = 500; + const u32 max_zcull_delay_us = 4000; const u32 min_zcull_delay_us = 50; + const u32 min_zcull_tick_us = 500; // Number of occlusion query slots available. Real hardware actually has far fewer units before choking - const u32 occlusion_query_count = 128; + const u32 occlusion_query_count = 1024; + const u32 max_safe_queue_depth = 892; bool active = false; bool enabled = false; - std::array m_occlusion_query_data = {}; + std::array m_occlusion_query_data = {}; + std::stack m_free_occlusion_pool; occlusion_query_info* m_current_task = nullptr; u32 m_statistics_tag_id = 0; + + // Scheduling clock. Granunlarity is min_zcull_tick value. u64 m_tsc = 0; - u32 m_cycles_delay = max_zcull_delay_us; - u32 m_backend_warn_threshold = max_zcull_delay_us / 2; + u64 m_next_tsc = 0; + + // Incremental tag used for tracking sync events. Hardware clock resolution is too low for the job. + u64 m_sync_tag = 0; + u64 m_timer = 0; std::vector m_pending_writes; std::unordered_map m_statistics_map; - ZCULL_control() = default; - ~ZCULL_control() = default; + ZCULL_control(); + ~ZCULL_control(); - void set_enabled(class ::rsx::thread* ptimer, bool state); - void set_active(class ::rsx::thread* ptimer, bool state); + void set_enabled(class ::rsx::thread* ptimer, bool state, bool flush_queue = false); + void set_active(class ::rsx::thread* ptimer, bool state, bool flush_queue = false); void write(vm::addr_t sink, u64 timestamp, u32 type, u32 value); void write(queued_report_write* writer, u64 timestamp, u32 value); @@ -404,6 +418,9 @@ namespace rsx // Sets up a new query slot and sets it to the current task void allocate_new_query(class ::rsx::thread* ptimer); + // Free a query slot in use + void free_query(occlusion_query_info* query); + // Clears current stat block and increments stat_tag_id void clear(class ::rsx::thread* ptimer); @@ -414,16 +431,19 @@ namespace rsx flags32_t read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range, flags32_t flags); // Call once every 'tick' to update, optional address provided to partially sync until address is processed - void update(class ::rsx::thread* ptimer, u32 sync_address = 0); + void update(class ::rsx::thread* ptimer, u32 sync_address = 0, bool hint = false); // Draw call notification void on_draw(); + // Sync hint notification + void on_sync_hint(); + // Check for pending writes bool has_pending() const { return !m_pending_writes.empty(); } // Search for query synchronized at address - occlusion_query_info* find_query(vm::addr_t sink_address); + query_search_result find_query(vm::addr_t sink_address); // Copies queries in range rebased from source range to destination range u32 copy_reports_to(u32 start, u32 range, u32 dest); @@ -435,6 +455,38 @@ namespace rsx virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; } virtual void discard_occlusion_query(occlusion_query_info* /*query*/) {} }; + + // Helper class for conditional rendering + struct conditional_render_eval + { + bool enabled = false; + bool eval_failed = false; + bool hw_cond_active = false; + bool reserved = false; + u32 eval_address = 0; + u64 sync_tag = 0; + + // Returns true if rendering is disabled as per conditional render test + bool disable_rendering() const; + + // Returns true if a conditional render is active but not yet evaluated + bool eval_pending() const; + + // Enable conditional rendering + void enable_conditional_render(thread* pthr, u32 address); + + // Disable conditional rendering + void disable_conditional_render(thread* pthr); + + // Sets up the zcull sync tag + void set_sync_tag(u64 value); + + // Sets evaluation result. Result is true if conditional evaluation failed + void set_eval_result(thread* pthr, bool failed); + + // Evaluates the condition by accessing memory directly + void eval_result(thread* pthr); + }; } struct frame_statistics_t @@ -489,10 +541,11 @@ namespace rsx struct backend_configuration { - bool supports_multidraw; // Draw call batching - bool supports_hw_a2c; // Alpha to coverage - bool supports_hw_renormalization; // Should be true on NV hardware which matches PS3 texture renormalization behaviour - bool supports_hw_a2one; // Alpha to one + bool supports_multidraw; // Draw call batching + bool supports_hw_a2c; // Alpha to coverage + bool supports_hw_renormalization; // Should be true on NV hardware which matches PS3 texture renormalization behaviour + bool supports_hw_a2one; // Alpha to one + bool supports_hw_conditional_render; // Conditional render }; struct sampled_image_descriptor_base; @@ -655,13 +708,12 @@ namespace rsx atomic_t async_tasks_pending{ 0 }; - u32 conditional_render_test_address = 0; - bool conditional_render_test_failed = false; - bool conditional_render_enabled = false; bool zcull_stats_enabled = false; bool zcull_rendering_enabled = false; bool zcull_pixel_cnt_enabled = false; + reports::conditional_render_eval cond_render_ctrl; + void operator()(); virtual u64 get_cycles() = 0; virtual ~thread(); @@ -708,10 +760,15 @@ namespace rsx void get_zcull_stats(u32 type, vm::addr_t sink); u32 copy_zcull_stats(u32 memory_range_start, u32 memory_range, u32 destination); + void enable_conditional_rendering(vm::addr_t ref); + void disable_conditional_rendering(); + virtual void begin_conditional_rendering(); + virtual void end_conditional_rendering(); + // sync void sync(); flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional); - virtual void sync_hint(FIFO_hint /*hint*/, u64 /*arg*/) {} + virtual void sync_hint(FIFO_hint hint, void* args); gsl::span get_raw_index_array(const draw_clause& draw_indexed_clause) const; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 7a4f6620e2fc..61f715344bdf 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -543,6 +543,9 @@ VKGSRender::VKGSRender() : GSRender() // NOTE: On NVIDIA cards going back decades (including the PS3) there is a slight normalization inaccuracy in compressed formats. // Confirmed in BLES01916 (The Evil Within) which uses RGB565 for some virtual texturing data. backend_config.supports_hw_renormalization = (vk::get_driver_vendor() == vk::driver_vendor::NVIDIA); + + // Stub + backend_config.supports_hw_conditional_render = true; } VKGSRender::~VKGSRender() @@ -935,8 +938,7 @@ void VKGSRender::begin() { rsx::thread::begin(); - if (skip_current_frame || swapchain_unavailable || - (conditional_render_enabled && conditional_render_test_failed)) + if (skip_current_frame || swapchain_unavailable || cond_render_ctrl.disable_rendering()) return; init_buffers(rsx::framebuffer_creation_context::context_draw); @@ -1202,8 +1204,7 @@ void VKGSRender::emit_geometry(u32 sub_index) void VKGSRender::end() { - if (skip_current_frame || !framebuffer_status_valid || swapchain_unavailable || - (conditional_render_enabled && conditional_render_test_failed)) + if (skip_current_frame || !framebuffer_status_valid || swapchain_unavailable || cond_render_ctrl.disable_rendering()) { execute_nop_draw(); rsx::thread::end(); @@ -1737,8 +1738,9 @@ void VKGSRender::end() u32 occlusion_id = m_occlusion_query_pool.find_free_slot(); if (occlusion_id == UINT32_MAX) { - m_tsc += 100; - update(this); + // Force flush + LOG_ERROR(RSX, "[Performance Warning] Out of free occlusion slots. Forcing hard sync."); + ZCULL_control::sync(this); occlusion_id = m_occlusion_query_pool.find_free_slot(); if (occlusion_id == UINT32_MAX) @@ -2181,8 +2183,11 @@ void VKGSRender::flush_command_queue(bool hard_sync) open_command_buffer(); } -void VKGSRender::sync_hint(rsx::FIFO_hint hint, u64 arg) +void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args) { + verify(HERE), args; + rsx::thread::sync_hint(hint, args); + // Occlusion test result evaluation is coming up, avoid a hard sync switch (hint) { @@ -2197,15 +2202,14 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, u64 arg) return; // Check if the required report is synced to this CB - if (auto occlusion_info = zcull_ctrl->find_query(vm::cast(arg))) + auto occlusion_info = static_cast(args); + auto& data = m_occlusion_map[occlusion_info->driver_handle]; + + if (data.command_buffer_to_wait == m_current_command_buffer && !data.indices.empty()) { - auto& data = m_occlusion_map[occlusion_info->driver_handle]; - if (data.command_buffer_to_wait == m_current_command_buffer && !data.indices.empty()) - { - // Confirmed hard sync coming up, post a sync request - m_flush_requests.post(false); - m_flush_requests.remove_one(); - } + // Confirmed hard sync coming up, post a sync request + m_flush_requests.post(false); + m_flush_requests.remove_one(); } break; @@ -2215,7 +2219,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, u64 arg) if (!(m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task)) return; - auto occlusion_info = reinterpret_cast(arg); + auto occlusion_info = static_cast(args); auto& data = m_occlusion_map[occlusion_info->driver_handle]; if (data.command_buffer_to_wait == m_current_command_buffer && !data.indices.empty()) @@ -3666,17 +3670,9 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* { m_flush_requests.clear_pending_flag(); } - } - - // Fast wait. Avoids heavyweight routines - while (!data.command_buffer_to_wait->poke()) - { - _mm_pause(); - if (Emu.IsStopped()) - { - return; - } + LOG_ERROR(RSX, "[Performance warning] Unexpected ZCULL read caused a hard sync"); + busy_wait(); } // Gather data diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 9152b8038543..43ec8ce658c4 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -44,7 +44,7 @@ namespace vk #define VK_FRAGMENT_CONSTANTS_BUFFER_SIZE_M 16 #define VK_INDEX_RING_BUFFER_SIZE_M 16 -#define VK_MAX_ASYNC_CB_COUNT 64 +#define VK_MAX_ASYNC_CB_COUNT 256 #define VK_MAX_ASYNC_FRAMES 2 using rsx::flags32_t; @@ -465,7 +465,7 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control void set_scissor(bool clip_viewport); void bind_viewport(); - void sync_hint(rsx::FIFO_hint hint, u64 arg) override; + void sync_hint(rsx::FIFO_hint hint, void* args) override; void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override; void end_occlusion_query(rsx::reports::occlusion_query_info* query) override; diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 332ba362f475..8fdfa4f50c81 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -6,7 +6,6 @@ #include "VKHelpers.h" #include "../Common/GLSLCommon.h" -#pragma optimize("", off) std::string VKVertexDecompilerThread::getFloatTypeName(size_t elementCount) { diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index b0893c08a25c..757f7c49962d 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -580,14 +580,11 @@ namespace rsx switch (mode) { case 1: - rsx->conditional_render_enabled = false; - rsx->conditional_render_test_failed = false; + rsx->disable_conditional_rendering(); return; case 2: - rsx->conditional_render_enabled = true; break; default: - rsx->conditional_render_enabled = false; LOG_ERROR(RSX, "Unknown render mode %d", mode); return; } @@ -597,15 +594,12 @@ namespace rsx if (!address_ptr) { - rsx->conditional_render_test_failed = false; LOG_ERROR(RSX, "Bad argument passed to NV4097_SET_RENDER_ENABLE, arg=0x%X", arg); return; } // Defer conditional render evaluation - rsx->sync_hint(FIFO_hint::hint_conditional_render_eval, address_ptr); - rsx->conditional_render_test_address = address_ptr; - rsx->conditional_render_test_failed = false; + rsx->enable_conditional_rendering(address_ptr); } void set_zcull_render_enable(thread* rsx, u32, u32 arg) From b63007cbd0116b897a67b674c6d83fd21d05289c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 7 Dec 2019 16:28:35 +0300 Subject: [PATCH 11/21] vk: Implement multithreaded command submission - A few nagging issues remain, specifically that partial command stream largely caused by poor synchronization structures for partial CS flush and also the fact that occlusion map entries wait on a command buffer and not an EID! --- rpcs3/Emu/CMakeLists.txt | 1 + rpcs3/Emu/RSX/RSXOffload.cpp | 12 +++ rpcs3/Emu/RSX/RSXOffload.h | 10 +- rpcs3/Emu/RSX/RSXThread.cpp | 2 +- rpcs3/Emu/RSX/RSXThread.h | 3 + rpcs3/Emu/RSX/VK/VKCommandStream.cpp | 36 +++++++ rpcs3/Emu/RSX/VK/VKCommandStream.h | 42 ++++++++ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 66 +++++++----- rpcs3/Emu/RSX/VK/VKGSRender.h | 29 +++--- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 35 +++---- rpcs3/Emu/RSX/VK/VKHelpers.h | 82 +++++++++++---- rpcs3/Emu/RSX/VK/VKTextureCache.h | 11 +- rpcs3/VKGSRender.vcxproj | 2 + rpcs3/VKGSRender.vcxproj.filters | 144 +++++++-------------------- 14 files changed, 280 insertions(+), 195 deletions(-) create mode 100644 rpcs3/Emu/RSX/VK/VKCommandStream.cpp create mode 100644 rpcs3/Emu/RSX/VK/VKCommandStream.h diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index db4aa1af754e..b6fa4763c089 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -386,6 +386,7 @@ target_sources(rpcs3_emu PRIVATE if(TARGET 3rdparty_vulkan) target_sources(rpcs3_emu PRIVATE + RSX/VK/VKCommandStream.cpp RSX/VK/VKCommonDecompiler.cpp RSX/VK/VKDMA.cpp RSX/VK/VKFormats.cpp diff --git a/rpcs3/Emu/RSX/RSXOffload.cpp b/rpcs3/Emu/RSX/RSXOffload.cpp index 92d89545499b..2f2d1ff41ea8 100644 --- a/rpcs3/Emu/RSX/RSXOffload.cpp +++ b/rpcs3/Emu/RSX/RSXOffload.cpp @@ -57,6 +57,9 @@ namespace rsx static_cast(m_current_job->aux_param0), m_current_job->length); break; + case callback: + rsx::get_current_renderer()->renderctl(m_current_job->aux_param0, m_current_job->src); + break; default: ASSUME(0); fmt::throw_exception("Unreachable" HERE); @@ -119,6 +122,15 @@ namespace rsx } } + // Backend callback + void dma_manager::backend_ctrl(u32 request_code, void* args) + { + verify(HERE), g_cfg.video.multithreaded_rsx; + + ++m_enqueued_count; + m_work_queue.push(request_code, args); + } + // Synchronization bool dma_manager::is_current_thread() const { diff --git a/rpcs3/Emu/RSX/RSXOffload.h b/rpcs3/Emu/RSX/RSXOffload.h index 92ba45d8f155..f436971f076e 100644 --- a/rpcs3/Emu/RSX/RSXOffload.h +++ b/rpcs3/Emu/RSX/RSXOffload.h @@ -17,7 +17,8 @@ namespace rsx { raw_copy = 0, vector_copy = 1, - index_emulate = 2 + index_emulate = 2, + callback = 3 }; struct transport_packet @@ -41,6 +42,10 @@ namespace rsx transport_packet(void *_dst, rsx::primitive_type prim, u32 len) : dst(_dst), aux_param0(static_cast(prim)), length(len), type(op::index_emulate) {} + + transport_packet(u32 command, void* args) + : aux_param0(command), src(args), type(op::callback) + {} }; lf_queue m_work_queue; @@ -67,6 +72,9 @@ namespace rsx // Vertex utilities void emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count); + // Renderer callback + void backend_ctrl(u32 request_code, void* args); + // Synchronization bool is_current_thread() const; void sync(); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index de4dc6656ffa..327b5328c916 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -446,10 +446,10 @@ namespace rsx rsx::overlays::reset_performance_overlay(); + g_dma_manager.init(); on_init_thread(); method_registers.init(); - g_dma_manager.init(); m_profiler.enabled = !!g_cfg.video.overlay; if (!zcull_ctrl) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index c55dc7ce0934..7306dfca5e22 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -753,6 +753,9 @@ namespace rsx virtual void on_invalidate_memory_range(const address_range & /*range*/, rsx::invalidation_cause) {} virtual void notify_tile_unbound(u32 /*tile*/) {} + // control + virtual void renderctl(u32 request_code, void* args) {} + // zcull void notify_zcull_info_changed(); void clear_zcull_stats(u32 type); diff --git a/rpcs3/Emu/RSX/VK/VKCommandStream.cpp b/rpcs3/Emu/RSX/VK/VKCommandStream.cpp new file mode 100644 index 000000000000..a333126bc94d --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKCommandStream.cpp @@ -0,0 +1,36 @@ +#include "stdafx.h" +#include "VKCommandStream.h" + +namespace vk +{ + // global submit guard to prevent race condition on queue submit + shared_mutex g_submit_mutex; + + void acquire_global_submit_lock() + { + g_submit_mutex.lock(); + } + + void release_global_submit_lock() + { + g_submit_mutex.unlock(); + } + + void queue_submit(VkQueue queue, const VkSubmitInfo* info, fence* pfence, VkBool32 flush) + { + if (!flush && g_cfg.video.multithreaded_rsx) + { + auto packet = new submit_packet(queue, pfence, info); + rsx::g_dma_manager.backend_ctrl(rctrl_queue_submit, packet); + } + else + { + acquire_global_submit_lock(); + vkQueueSubmit(queue, 1, info, pfence->handle); + release_global_submit_lock(); + + // Signal fence + pfence->flushed = true; + } + } +} diff --git a/rpcs3/Emu/RSX/VK/VKCommandStream.h b/rpcs3/Emu/RSX/VK/VKCommandStream.h new file mode 100644 index 000000000000..67b465389109 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKCommandStream.h @@ -0,0 +1,42 @@ +#pragma once + +#include "VKHelpers.h" + +namespace vk +{ + struct submit_packet + { + // Core components + VkQueue queue; + fence* pfence; + VkSubmitInfo submit_info; + + // Pointer redirection storage + VkSemaphore wait_semaphore; + VkSemaphore signal_semaphore; + VkFlags wait_flags; + + submit_packet(VkQueue _q, fence* _f, const VkSubmitInfo* info) : + queue(_q), pfence(_f), submit_info(*info), + wait_semaphore(0), signal_semaphore(0), wait_flags(0) + { + if (info->waitSemaphoreCount) + { + wait_semaphore = *info->pWaitSemaphores; + submit_info.pWaitSemaphores = &wait_semaphore; + } + + if (info->signalSemaphoreCount) + { + signal_semaphore = *info->pSignalSemaphores; + submit_info.pSignalSemaphores = &signal_semaphore; + } + + if (info->pWaitDstStageMask) + { + wait_flags = *info->pWaitDstStageMask; + submit_info.pWaitDstStageMask = &wait_flags; + } + } + }; +} diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 61f715344bdf..77e65e1bad68 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -9,6 +9,7 @@ #include "VKCommonDecompiler.h" #include "VKRenderPass.h" #include "VKResourceManager.h" +#include "VKCommandStream.h" namespace { @@ -2157,23 +2158,24 @@ void VKGSRender::flush_command_queue(bool hard_sync) } else { - // Mark this queue as pending + // Mark this queue as pending and proceed m_current_command_buffer->pending = true; + } - // Grab next cb in line and make it usable - m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT; - m_current_command_buffer = &m_primary_cb_list[m_current_cb_index]; + // Grab next cb in line and make it usable + // NOTE: Even in the case of a hard sync, this is required to free any waiters on the CB (ZCULL) + m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT; + m_current_command_buffer = &m_primary_cb_list[m_current_cb_index]; - if (!m_current_command_buffer->poke()) - { - LOG_ERROR(RSX, "CB chain has run out of free entries!"); - } + if (!m_current_command_buffer->poke()) + { + LOG_ERROR(RSX, "CB chain has run out of free entries!"); + } - m_current_command_buffer->reset(); + m_current_command_buffer->reset(); - // Just in case a queued frame holds a ref to this cb, drain the present queue - check_present_status(); - } + // Just in case a queued frame holds a ref to this cb, drain the present queue + check_present_status(); if (m_occlusion_query_active) { @@ -2278,6 +2280,9 @@ void VKGSRender::present(frame_context_t *ctx) { verify(HERE), ctx->present_image != UINT32_MAX; + // Partial CS flush + ctx->swap_command_buffer->flush(); + if (!swapchain_unavailable) { switch (VkResult error = m_swapchain->present(ctx->present_wait_semaphore, ctx->present_image)) @@ -2824,11 +2829,9 @@ void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool) prepare_rtts(context); } -void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkPipelineStageFlags pipeline_stage_flags) +void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkPipelineStageFlags pipeline_stage_flags) { - // Wait before sync block below - rsx::g_dma_manager.sync(); - + // NOTE: There is no need to wait for dma sync. When MTRSX is enabled, the commands are submitted in order anyway due to CSMT if (vk::test_status_interrupt(vk::heap_dirty)) { if (m_attrib_ring_info.dirty() || @@ -2881,7 +2884,7 @@ void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait m_current_command_buffer->tag(); m_current_command_buffer->submit(m_swapchain->get_graphics_queue(), - wait_semaphore, signal_semaphore, fence, pipeline_stage_flags); + wait_semaphore, signal_semaphore, pFence, pipeline_stage_flags); } void VKGSRender::open_command_buffer() @@ -3155,16 +3158,11 @@ void VKGSRender::reinitialize_swapchain() } //Will have to block until rendering is completed - VkFence resize_fence = VK_NULL_HANDLE; - VkFenceCreateInfo infos = {}; - infos.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - - vkCreateFence((*m_device), &infos, nullptr, &resize_fence); + vk::fence resize_fence(*m_device); //Flush the command buffer - close_and_submit_command_buffer(resize_fence); - vk::wait_for_fence(resize_fence); - vkDestroyFence((*m_device), resize_fence, nullptr); + close_and_submit_command_buffer(&resize_fence); + vk::wait_for_fence(&resize_fence); m_current_command_buffer->reset(); open_command_buffer(); @@ -3581,6 +3579,22 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info) rsx::thread::flip(info); } +void VKGSRender::renderctl(u32 request_code, void* args) +{ + switch (request_code) + { + case vk::rctrl_queue_submit: + { + auto packet = reinterpret_cast(args); + vk::queue_submit(packet->queue, &packet->submit_info, packet->pfence, VK_TRUE); + free(packet); + break; + } + default: + fmt::throw_exception("Unhandled request code 0x%x" HERE, request_code); + } +} + bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate) { if (swapchain_unavailable) @@ -3675,6 +3689,8 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* busy_wait(); } + data.command_buffer_to_wait->wait(); + // Gather data for (const auto occlusion_id : data.indices) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 43ec8ce658c4..63b2742b3ed5 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -67,7 +67,7 @@ enum struct command_buffer_chunk: public vk::command_buffer { - VkFence submit_fence = VK_NULL_HANDLE; + vk::fence* submit_fence = nullptr; VkDevice m_device = VK_NULL_HANDLE; std::atomic_bool pending = { false }; @@ -79,18 +79,13 @@ struct command_buffer_chunk: public vk::command_buffer void init_fence(VkDevice dev) { m_device = dev; - - VkFenceCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - CHECK_RESULT(vkCreateFence(m_device, &info, nullptr, &submit_fence)); + submit_fence = new vk::fence(dev); } void destroy() { vk::command_buffer::destroy(); - - if (submit_fence != VK_NULL_HANDLE) - vkDestroyFence(m_device, submit_fence, nullptr); + delete submit_fence; } void tag() @@ -116,13 +111,16 @@ struct command_buffer_chunk: public vk::command_buffer if (!pending) return true; - if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS) + if (!submit_fence->flushed) + return false; + + if (vkGetFenceStatus(m_device, submit_fence->handle) == VK_SUCCESS) { lock.upgrade(); if (pending) { - vk::reset_fence(&submit_fence); + vk::reset_fence(submit_fence); vk::on_event_completed(eid_tag); pending = false; @@ -146,7 +144,7 @@ struct command_buffer_chunk: public vk::command_buffer if (pending) { - vk::reset_fence(&submit_fence); + vk::reset_fence(submit_fence); vk::on_event_completed(eid_tag); pending = false; @@ -155,6 +153,11 @@ struct command_buffer_chunk: public vk::command_buffer return ret; } + + void flush() const + { + submit_fence->wait_flush(); + } }; struct occlusion_data @@ -430,7 +433,7 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control void open_command_buffer(); void close_and_submit_command_buffer( - VkFence fence = VK_NULL_HANDLE, + vk::fence* fence = nullptr, VkSemaphore wait_semaphore = VK_NULL_HANDLE, VkSemaphore signal_semaphore = VK_NULL_HANDLE, VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); @@ -486,6 +489,8 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control void on_exit() override; void flip(const rsx::display_flip_info_t& info) override; + void renderctl(u32 request_code, void* args) override; + void do_local_task(rsx::FIFO_state state) override; bool scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate) override; void notify_tile_unbound(u32 tile) override; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 1708c895db52..9ff3df1e4f0f 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -7,7 +7,10 @@ #include "VKResolveHelper.h" #include "VKResourceManager.h" #include "VKDMA.h" +#include "VKCommandStream.h" + #include "Utilities/mutex.h" +#include "Utilities/lockless.h" namespace vk { @@ -91,9 +94,6 @@ namespace vk u64 g_num_processed_frames = 0; u64 g_num_total_frames = 0; - // global submit guard to prevent race condition on queue submit - shared_mutex g_submit_mutex; - VKAPI_ATTR void* VKAPI_CALL mem_realloc(void* pUserData, void* pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) { #ifdef _MSC_VER @@ -349,16 +349,6 @@ namespace vk return &g_upload_heap; } - void acquire_global_submit_lock() - { - g_submit_mutex.lock(); - } - - void release_global_submit_lock() - { - g_submit_mutex.unlock(); - } - void reset_compute_tasks() { for (const auto &p : g_compute_tasks) @@ -836,31 +826,30 @@ namespace vk return (g_num_processed_frames > 0)? g_num_processed_frames - 1: 0; } - void reset_fence(VkFence *pFence) + void reset_fence(fence *pFence) { if (g_drv_disable_fence_reset) { - vkDestroyFence(*g_current_renderer, *pFence, nullptr); - - VkFenceCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - CHECK_RESULT(vkCreateFence(*g_current_renderer, &info, nullptr, pFence)); + delete pFence; + pFence = new fence(*g_current_renderer); } else { - CHECK_RESULT(vkResetFences(*g_current_renderer, 1, pFence)); + pFence->reset(); } } - VkResult wait_for_fence(VkFence fence, u64 timeout) + VkResult wait_for_fence(fence* pFence, u64 timeout) { + pFence->wait_flush(); + if (timeout) { - return vkWaitForFences(*g_current_renderer, 1, &fence, VK_FALSE, timeout * 1000ull); + return vkWaitForFences(*g_current_renderer, 1, &pFence->handle, VK_FALSE, timeout * 1000ull); } else { - while (auto status = vkGetFenceStatus(*g_current_renderer, fence)) + while (auto status = vkGetFenceStatus(*g_current_renderer, pFence->handle)) { switch (status) { diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index dc0def2eadcb..745209df08e1 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -107,6 +107,11 @@ namespace vk VK_REMAP_VIEW_MULTISAMPLED = 0xDEADBEEF // Special encoding for multisampled images; returns a multisampled image view }; + enum // callback commands + { + rctrl_queue_submit = 0x80000000 + }; + class context; class render_device; class swap_chain_image; @@ -119,6 +124,7 @@ namespace vk class mem_allocator_base; struct memory_type_mapping; struct gpu_formats_support; + struct fence; const vk::context *get_current_thread_ctx(); void set_current_thread_ctx(const vk::context &ctx); @@ -152,9 +158,10 @@ namespace vk memory_type_mapping get_memory_mapping(const physical_device& dev); gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev); - //Sync helpers around vkQueueSubmit + // Sync helpers around vkQueueSubmit void acquire_global_submit_lock(); void release_global_submit_lock(); + void queue_submit(VkQueue queue, const VkSubmitInfo* info, fence* pfence, VkBool32 flush = VK_FALSE); template T* get_compute_task(); @@ -222,8 +229,8 @@ namespace vk const u64 get_last_completed_frame_id(); // Fence reset with driver workarounds in place - void reset_fence(VkFence *pFence); - VkResult wait_for_fence(VkFence pFence, u64 timeout = 0ull); + void reset_fence(fence* pFence); + VkResult wait_for_fence(fence* pFence, u64 timeout = 0ull); VkResult wait_for_event(VkEvent pEvent, u64 timeout = 0ull); // Handle unexpected submit with dangling occlusion query @@ -1022,12 +1029,55 @@ namespace vk } }; + struct fence + { + volatile bool flushed = false; + VkFence handle = VK_NULL_HANDLE; + VkDevice owner = VK_NULL_HANDLE; + + fence(VkDevice dev) + { + owner = dev; + VkFenceCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + CHECK_RESULT(vkCreateFence(dev, &info, nullptr, &handle)); + } + + ~fence() + { + if (handle) + { + vkDestroyFence(owner, handle, nullptr); + handle = VK_NULL_HANDLE; + } + } + + void reset() + { + vkResetFences(owner, 1, &handle); + flushed = false; + } + + void wait_flush() + { + while (!flushed) + { + _mm_pause(); + } + } + + operator bool() const + { + return (handle != VK_NULL_HANDLE); + } + }; + class command_buffer { private: bool is_open = false; bool is_pending = false; - VkFence m_submit_fence = VK_NULL_HANDLE; + fence* m_submit_fence = nullptr; protected: vk::command_pool *pool = nullptr; @@ -1066,9 +1116,7 @@ namespace vk if (auto_reset) { - VkFenceCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - CHECK_RESULT(vkCreateFence(cmd_pool.get_owner(), &info, nullptr, &m_submit_fence)); + m_submit_fence = new fence(cmd_pool.get_owner()); } pool = &cmd_pool; @@ -1080,7 +1128,9 @@ namespace vk if (m_submit_fence) { - vkDestroyFence(pool->get_owner(), m_submit_fence, nullptr); + //vkDestroyFence(pool->get_owner(), m_submit_fence, nullptr); + delete m_submit_fence; + m_submit_fence = nullptr; } } @@ -1116,7 +1166,8 @@ namespace vk wait_for_fence(m_submit_fence); is_pending = false; - CHECK_RESULT(vkResetFences(pool->get_owner(), 1, &m_submit_fence)); + //CHECK_RESULT(vkResetFences(pool->get_owner(), 1, &m_submit_fence)); + reset_fence(m_submit_fence); CHECK_RESULT(vkResetCommandBuffer(commands, 0)); } @@ -1146,7 +1197,7 @@ namespace vk is_open = false; } - void submit(VkQueue queue, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) + void submit(VkQueue queue, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, fence* pfence, VkPipelineStageFlags pipeline_stage_flags) { if (is_open) { @@ -1157,10 +1208,10 @@ namespace vk // Check for hanging queries to avoid driver hang verify("close and submit of commandbuffer with a hanging query!" HERE), (flags & cb_has_open_query) == 0; - if (!fence) + if (!pfence) { - fence = m_submit_fence; - is_pending = (fence != VK_NULL_HANDLE); + pfence = m_submit_fence; + is_pending = bool(pfence); } VkSubmitInfo infos = {}; @@ -1181,10 +1232,7 @@ namespace vk infos.pSignalSemaphores = &signal_semaphore; } - acquire_global_submit_lock(); - CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence)); - release_global_submit_lock(); - + queue_submit(queue, &infos, pfence); clear_flags(); } }; diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 54d432ad12be..89dcf7f32114 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -1336,15 +1336,10 @@ namespace vk if (cmd.access_hint != vk::command_buffer::access_type_hint::all) { // Primary access command queue, must restart it after - VkFence submit_fence; - VkFenceCreateInfo info{}; - info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - vkCreateFence(*m_device, &info, nullptr, &submit_fence); + vk::fence submit_fence(*m_device); + cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, &submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); - cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); - - vk::wait_for_fence(submit_fence, GENERAL_WAIT_TIMEOUT); - vkDestroyFence(*m_device, submit_fence, nullptr); + vk::wait_for_fence(&submit_fence, GENERAL_WAIT_TIMEOUT); CHECK_RESULT(vkResetCommandBuffer(cmd, 0)); cmd.begin(); diff --git a/rpcs3/VKGSRender.vcxproj b/rpcs3/VKGSRender.vcxproj index 08ee0447e125..54a906d84c50 100644 --- a/rpcs3/VKGSRender.vcxproj +++ b/rpcs3/VKGSRender.vcxproj @@ -23,6 +23,7 @@ + @@ -43,6 +44,7 @@ + diff --git a/rpcs3/VKGSRender.vcxproj.filters b/rpcs3/VKGSRender.vcxproj.filters index 2409d4bd2620..a372cec0fac8 100644 --- a/rpcs3/VKGSRender.vcxproj.filters +++ b/rpcs3/VKGSRender.vcxproj.filters @@ -1,115 +1,43 @@  - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - + + + + + + + + + + + + + + + + + - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 649b26ade0e1594d4710324c23609f469bf24607 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 10 Dec 2019 07:56:44 +0300 Subject: [PATCH 12/21] vk: Implement hw conditional rendering --- rpcs3/Emu/RSX/RSXThread.cpp | 84 ++++++++++++------ rpcs3/Emu/RSX/RSXThread.h | 19 ++-- rpcs3/Emu/RSX/VK/VKCompute.h | 54 ++++++++++++ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 149 +++++++++++++++++++++++++++++++- rpcs3/Emu/RSX/VK/VKGSRender.h | 7 ++ rpcs3/Emu/RSX/VK/VKHelpers.h | 50 ++++++++++- 6 files changed, 328 insertions(+), 35 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 327b5328c916..f1f7e1d874f1 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -314,7 +314,7 @@ namespace rsx verify(HERE), !cond_render_ctrl.hw_cond_active; // Pending evaluation, use hardware test - begin_conditional_rendering(); + begin_conditional_rendering(cond_render_ctrl.eval_sources); } else { @@ -2158,13 +2158,13 @@ namespace rsx { cond_render_ctrl.enable_conditional_render(this, ref); - auto result = zcull_ctrl->find_query(ref); + auto result = zcull_ctrl->find_query(ref, true); if (result.found) { - if (result.query) + if (!result.queries.empty()) { - cond_render_ctrl.set_sync_tag(result.query->sync_tag); - sync_hint(FIFO_hint::hint_conditional_render_eval, result.query); + cond_render_ctrl.set_eval_sources(result.queries); + sync_hint(FIFO_hint::hint_conditional_render_eval, cond_render_ctrl.eval_sources.front()); } else { @@ -2183,9 +2183,10 @@ namespace rsx cond_render_ctrl.disable_conditional_render(this); } - void thread::begin_conditional_rendering() + void thread::begin_conditional_rendering(const std::vector& /*sources*/) { cond_render_ctrl.hw_cond_active = true; + cond_render_ctrl.eval_sources.clear(); } void thread::end_conditional_rendering() @@ -2709,6 +2710,12 @@ namespace rsx } ptimer->async_tasks_pending++; + + if (m_statistics_map[m_statistics_tag_id] != 0) + { + // Flush guaranteed results; only one positive is needed + update(ptimer); + } } void ZCULL_control::allocate_new_query(::rsx::thread* ptimer) @@ -2888,7 +2895,7 @@ namespace rsx // No other queries in the chain, write result write(&writer, ptimer->timestamp(), result); - if (query && ptimer->cond_render_ctrl.sync_tag == query->sync_tag) + if (query && query->sync_tag == ptimer->cond_render_ctrl.eval_sync_tag) { const bool eval_failed = (result == 0); ptimer->cond_render_ctrl.set_eval_result(ptimer, eval_failed); @@ -3083,7 +3090,7 @@ namespace rsx // No other queries in the chain, write result write(&writer, ptimer->timestamp(), result); - if (query && ptimer->cond_render_ctrl.sync_tag == query->sync_tag) + if (query && query->sync_tag == ptimer->cond_render_ctrl.eval_sync_tag) { const bool eval_failed = (result == 0); ptimer->cond_render_ctrl.set_eval_result(ptimer, eval_failed); @@ -3175,36 +3182,56 @@ namespace rsx return result_zcull_intr; } - query_search_result ZCULL_control::find_query(vm::addr_t sink_address) + query_search_result ZCULL_control::find_query(vm::addr_t sink_address, bool all) { + query_search_result result{}; u32 stat_id = 0; + for (auto It = m_pending_writes.crbegin(); It != m_pending_writes.crend(); ++It) { if (UNLIKELY(stat_id)) { if (It->counter_tag != stat_id) { - // Zcull stats were cleared between this query and the required one - return { true, 0, nullptr }; + if (result.found) + { + // Some result was found, return it instead + break; + } + + // Zcull stats were cleared between this query and the required stats, result can only be 0 + return { true, 0, {} }; } - if (It->query) + if (It->query && It->query->num_draws) { - return { true, 0, It->query }; + result.found = true; + result.queries.push_back(It->query); + + if (!all) + { + break; + } } } else if (It->sink == sink_address) { - if (It->query) + if (It->query && It->query->num_draws) { - return { true, 0, It->query }; + result.found = true; + result.queries.push_back(It->query); + + if (!all) + { + break; + } } stat_id = It->counter_tag; } } - return {}; + return result; } u32 ZCULL_control::copy_reports_to(u32 start, u32 range, u32 dest) @@ -3228,6 +3255,15 @@ namespace rsx // Conditional rendering helpers + void conditional_render_eval::reset() + { + eval_address = 0; + eval_sync_tag = 0; + eval_sources.clear(); + + eval_failed = false; + } + bool conditional_render_eval::disable_rendering() const { return (enabled && eval_failed); @@ -3246,10 +3282,10 @@ namespace rsx pthr->end_conditional_rendering(); } + reset(); + enabled = true; - eval_failed = false; eval_address = address; - sync_tag = 0; } void conditional_render_eval::disable_conditional_render(::rsx::thread* pthr) @@ -3260,15 +3296,14 @@ namespace rsx pthr->end_conditional_rendering(); } + reset(); enabled = false; - eval_failed = false; - eval_address = 0; - sync_tag = 0; } - void conditional_render_eval::set_sync_tag(u64 value) + void conditional_render_eval::set_eval_sources(std::vector& sources) { - sync_tag = value; + eval_sources = std::move(sources); + eval_sync_tag = eval_sources.front()->sync_tag; } void conditional_render_eval::set_eval_result(::rsx::thread* pthr, bool failed) @@ -3279,9 +3314,8 @@ namespace rsx pthr->end_conditional_rendering(); } + reset(); eval_failed = failed; - eval_address = 0; - sync_tag = 0; } void conditional_render_eval::eval_result(::rsx::thread* pthr) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 7306dfca5e22..9663111263df 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -362,7 +362,7 @@ namespace rsx { bool found; u32 raw_zpass_result; - occlusion_query_info* query; + std::vector queries; }; enum sync_control @@ -443,7 +443,7 @@ namespace rsx bool has_pending() const { return !m_pending_writes.empty(); } // Search for query synchronized at address - query_search_result find_query(vm::addr_t sink_address); + query_search_result find_query(vm::addr_t sink_address, bool all); // Copies queries in range rebased from source range to destination range u32 copy_reports_to(u32 start, u32 range, u32 dest); @@ -463,8 +463,13 @@ namespace rsx bool eval_failed = false; bool hw_cond_active = false; bool reserved = false; - u32 eval_address = 0; - u64 sync_tag = 0; + + std::vector eval_sources; + u32 eval_sync_tag = 0; + u32 eval_address = 0; + + // Resets common data + void reset(); // Returns true if rendering is disabled as per conditional render test bool disable_rendering() const; @@ -478,8 +483,8 @@ namespace rsx // Disable conditional rendering void disable_conditional_render(thread* pthr); - // Sets up the zcull sync tag - void set_sync_tag(u64 value); + // Sets data sources for predicate evaluation + void set_eval_sources(std::vector& sources); // Sets evaluation result. Result is true if conditional evaluation failed void set_eval_result(thread* pthr, bool failed); @@ -765,7 +770,7 @@ namespace rsx void enable_conditional_rendering(vm::addr_t ref); void disable_conditional_rendering(); - virtual void begin_conditional_rendering(); + virtual void begin_conditional_rendering(const std::vector& sources); virtual void end_conditional_rendering(); // sync diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 25b2a6d611a0..5bdc0934a382 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -784,6 +784,60 @@ namespace vk } }; + struct cs_aggregator : compute_task + { + const buffer* src = nullptr; + const buffer* dst = nullptr; + u32 block_length = 0; + u32 word_count = 0; + + cs_aggregator() + { + ssbo_count = 2; + + create(); + + m_src = + "#version 450\n" + "layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;\n\n" + + "layout(set=0, binding=0, std430) readonly buffer ssbo0{ uint src[]; };\n" + "layout(set=0, binding=1, std430) writeonly buffer ssbo1{ uint result; };\n\n" + + "void main()\n" + "{\n" + " if (gl_GlobalInvocationID.x < src.length())\n" + " {\n" + " atomicAdd(result, src[gl_GlobalInvocationID.x]);\n" + " }\n" + "}\n"; + + const std::pair syntax_replace[] = + { + { "%ws", std::to_string(optimal_group_size) }, + }; + + m_src = fmt::replace_all(m_src, syntax_replace); + } + + void bind_resources() override + { + m_program->bind_buffer({ src->value, 0, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + m_program->bind_buffer({ dst->value, 0, 4 }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + } + + void run(VkCommandBuffer cmd, const vk::buffer* dst, const vk::buffer* src, u32 num_words) + { + this->dst = dst; + this->src = src; + word_count = num_words; + block_length = num_words * 4; + + const u32 linear_invocations = rsx::aligned_div(word_count, optimal_group_size); + compute_task::run(cmd, linear_invocations); + } + }; + // TODO: Replace with a proper manager extern std::unordered_map> g_compute_tasks; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 77e65e1bad68..dbd2bcbe36f0 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -643,6 +643,7 @@ VKGSRender::~VKGSRender() //Queries m_occlusion_query_pool.destroy(); + m_cond_render_buffer.reset(); //Command buffer for (auto &cb : m_primary_cb_list) @@ -1151,6 +1152,18 @@ void VKGSRender::emit_geometry(u32 sub_index) vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); update_draw_state(); begin_render_pass(); + + if (cond_render_ctrl.hw_cond_active) + { + // It is inconvenient that conditional rendering breaks other things like compute dispatch + // TODO: If this is heavy, add refactor the resources into global and add checks around compute dispatch + VkConditionalRenderingBeginInfoEXT info{}; + info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; + info.buffer = m_cond_render_buffer->value; + + m_device->cmdBeginConditionalRenderingEXT(*m_current_command_buffer, &info); + m_current_command_buffer->flags |= vk::command_buffer::cb_has_conditional_render; + } } // Bind the new set of descriptors for use with this draw call @@ -1787,6 +1800,12 @@ void VKGSRender::end() } while (rsx::method_registers.current_draw_clause.next()); + if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render) + { + m_device->cmdEndConditionalRenderingEXT(*m_current_command_buffer); + m_current_command_buffer->flags &= ~(vk::command_buffer::cb_has_conditional_render); + } + // Close any open passes unconditionally close_render_pass(); @@ -2702,7 +2721,7 @@ void VKGSRender::load_program_env() // Vertex state const auto mem = m_vertex_env_ring_info.alloc<256>(256); - auto buf = static_cast(m_vertex_env_ring_info.map(mem, 144)); + auto buf = static_cast(m_vertex_env_ring_info.map(mem, 148)); fill_scale_offset_data(buf, false); fill_user_clip_data(buf + 64); @@ -2866,6 +2885,14 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore vk::clear_status_interrupt(vk::heap_dirty); } +#if 0 // Currently unreachable + if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render) + { + verify(HERE), m_render_pass_open; + m_device->cmdEndConditionalRenderingEXT(*m_current_command_buffer); + } +#endif + // End any active renderpasses; the caller should handle reopening if (m_render_pass_open) { @@ -3689,7 +3716,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* busy_wait(); } - data.command_buffer_to_wait->wait(); + data.command_buffer_to_wait->flush(); // Gather data for (const auto occlusion_id : data.indices) @@ -3734,6 +3761,124 @@ void VKGSRender::emergency_query_cleanup(vk::command_buffer* commands) } } +void VKGSRender::begin_conditional_rendering(const std::vector& sources) +{ + verify(HERE), !sources.empty(); + + // Flag check whether to calculate all entries or only one + bool partial_eval; + + // Try and avoid regenerating the data if its a repeat/spam + // NOTE: The incoming list is reversed with the first entry being the newest + if (m_cond_render_sync_tag == sources.front()->sync_tag) + { + // Already synched, check subdraw which is possible if last sync happened while query was active + if (!m_active_query_info || m_active_query_info != sources.front()) + { + rsx::thread::begin_conditional_rendering(sources); + return; + } + + // Partial evaluation only + partial_eval = true; + } + else + { + m_cond_render_sync_tag = sources.front()->sync_tag; + partial_eval = false; + } + + // Time to aggregate + if (!m_cond_render_buffer) + { + auto& memory_props = m_device->get_memory_mapping(); + m_cond_render_buffer = std::make_unique( + *m_device, 4, + memory_props.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0); + } + + if (sources.size() == 1) + { + const auto query = sources.front(); + const auto& query_info = m_occlusion_map[query->driver_handle]; + + if (query_info.indices.size() == 1) + { + const auto& index = query_info.indices.front(); + m_occlusion_query_pool.get_query_result_indirect(*m_current_command_buffer, index, m_cond_render_buffer->value, 0); + + vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT); + + rsx::thread::begin_conditional_rendering(sources); + return; + } + } + + auto scratch = vk::get_scratch_buffer(); + u32 dst_offset = 0; + size_t first = 0; + size_t last; + + if (LIKELY(!partial_eval)) + { + last = sources.size(); + } + else + { + last = 1; + } + + for (size_t i = first; i < last; ++i) + { + auto& query_info = m_occlusion_map[sources[i]->driver_handle]; + for (const auto& index : query_info.indices) + { + m_occlusion_query_pool.get_query_result_indirect(*m_current_command_buffer, index, scratch->value, dst_offset); + dst_offset += 4; + } + } + + if (dst_offset) + { + // Fast path should have been caught above + verify(HERE), dst_offset > 4; + + if (!partial_eval) + { + // Clear result to zero + vkCmdFillBuffer(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4, 0); + + vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT); + } + + vk::insert_buffer_memory_barrier(*m_current_command_buffer, scratch->value, 0, dst_offset, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + + vk::get_compute_task()->run(*m_current_command_buffer, m_cond_render_buffer.get(), scratch, dst_offset / 4); + + vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT); + } + else + { + LOG_ERROR(RSX, "Dubious query data pushed to cond render!, Please report to developers(q.pending=%d)", sources.front()->pending); + } + + rsx::thread::begin_conditional_rendering(sources); +} + +void VKGSRender::end_conditional_rendering() +{ + thread::end_conditional_rendering(); +} + bool VKGSRender::on_decompiler_task() { return m_prog_buffer->async_update(8, *m_device, pipeline_layout).first; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 63b2742b3ed5..8f78ad8f1ebe 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -321,6 +321,9 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control std::unique_ptr m_attachment_clear_pass; std::unique_ptr m_video_output_pass; + std::unique_ptr m_cond_render_buffer; + u64 m_cond_render_sync_tag = 0; + shared_mutex m_sampler_mutex; u64 surface_store_tag = 0; std::atomic_bool m_samplers_dirty = { true }; @@ -479,6 +482,10 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control // External callback in case we need to suddenly submit a commandlist unexpectedly, e.g in a violation handler void emergency_query_cleanup(vk::command_buffer* commands); + // Conditional rendering + void begin_conditional_rendering(const std::vector& sources) override; + void end_conditional_rendering() override; + protected: void clear_surface(u32 mask) override; void begin() override; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 745209df08e1..600919e98e25 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -545,6 +545,8 @@ namespace vk gpu_shader_types_support shader_types_support{}; VkPhysicalDeviceDriverPropertiesKHR driver_properties{}; bool stencil_export_support = false; + bool conditional_render_support = false; + bool host_query_reset_support = false; friend class render_device; private: @@ -594,6 +596,8 @@ namespace vk } stencil_export_support = device_extensions.is_supported(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); + conditional_render_support = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME); + host_query_reset_support = device_extensions.is_supported(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); } public: @@ -764,6 +768,12 @@ namespace vk std::unique_ptr m_allocator; VkDevice dev = VK_NULL_HANDLE; + public: + // Exported device endpoints + PFN_vkCmdBeginConditionalRenderingEXT cmdBeginConditionalRenderingEXT = nullptr; + PFN_vkCmdEndConditionalRenderingEXT cmdEndConditionalRenderingEXT = nullptr; + PFN_vkResetQueryPoolEXT resetQueryPoolEXT = nullptr; + public: render_device() = default; ~render_device() = default; @@ -797,6 +807,16 @@ namespace vk requested_extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } + if (pgpu->conditional_render_support) + { + requested_extensions.push_back(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME); + } + + if (pgpu->host_query_reset_support) + { + requested_extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); + } + enabled_features.robustBufferAccess = VK_TRUE; enabled_features.fullDrawIndexUint32 = VK_TRUE; enabled_features.independentBlend = VK_TRUE; @@ -880,6 +900,18 @@ namespace vk CHECK_RESULT(vkCreateDevice(*pgpu, &device, nullptr, &dev)); + // Import optional function endpoints + if (pgpu->conditional_render_support) + { + cmdBeginConditionalRenderingEXT = (PFN_vkCmdBeginConditionalRenderingEXT)vkGetDeviceProcAddr(dev, "vkCmdBeginConditionalRenderingEXT"); + cmdEndConditionalRenderingEXT = (PFN_vkCmdEndConditionalRenderingEXT)vkGetDeviceProcAddr(dev, "vkCmdEndConditionalRenderingEXT"); + } + + if (pgpu->host_query_reset_support) + { + resetQueryPoolEXT = (PFN_vkResetQueryPoolEXT)vkGetDeviceProcAddr(dev, "vkResetQueryPoolEXT"); + } + memory_map = vk::get_memory_mapping(pdev); m_formats_support = vk::get_optimal_tiling_supported_formats(pdev); @@ -979,6 +1011,16 @@ namespace vk return pgpu->features.alphaToOne != VK_FALSE; } + bool get_conditional_render_support() const + { + return pgpu->conditional_render_support; + } + + bool get_host_query_reset_support() const + { + return pgpu->host_query_reset_support; + } + mem_allocator_base* get_allocator() const { return m_allocator.get(); @@ -1097,7 +1139,8 @@ namespace vk cb_has_blit_transfer = 2, cb_has_dma_transfer = 4, cb_has_open_query = 8, - cb_load_occluson_task = 16 + cb_load_occluson_task = 16, + cb_has_conditional_render = 32 }; u32 flags = 0; @@ -3005,6 +3048,11 @@ namespace vk while (true); } + void get_query_result_indirect(vk::command_buffer &cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset) + { + vkCmdCopyQueryPoolResults(cmd, query_pool, index, 1, dst, dst_offset, 4, VK_QUERY_RESULT_WAIT_BIT); + } + void reset_query(vk::command_buffer &cmd, u32 index) { if (query_active_status[index]) From 53c496ad03e58d4e80ddf5ab1373d61dcc98e334 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 10 Dec 2019 09:10:13 +0300 Subject: [PATCH 13/21] vk: Emulate conditional rendering for AMD --- rpcs3/Emu/RSX/RSXThread.cpp | 4 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 76 +++++++++++++++++++++++----- rpcs3/Emu/RSX/VK/VKHelpers.h | 5 +- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 29 ++++++++++- rpcs3/Emu/RSX/VK/VKVertexProgram.h | 9 +++- 5 files changed, 103 insertions(+), 20 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index f1f7e1d874f1..19adf61223ca 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -319,7 +319,7 @@ namespace rsx else { zcull_ctrl->read_barrier(this, cond_render_ctrl.eval_address, 4, reports::sync_no_notify); - cond_render_ctrl.eval_result(this); + verify(HERE), !cond_render_ctrl.eval_pending(); } } @@ -2959,8 +2959,6 @@ namespace rsx { if (hint || ptimer->async_tasks_pending >= max_safe_queue_depth) { - verify(HERE), !active || !hint; - // Prepare the whole queue for reading. This happens when zcull activity is disabled or queue is too long for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); ++It) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index dbd2bcbe36f0..7981fd34df11 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -286,6 +286,13 @@ namespace idx++; + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = CONDITIONAL_RENDER_PREDICATE_SLOT; + + idx++; + for (int i = 0; i < rsx::limits::fragment_textures_count; i++) { bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; @@ -311,6 +318,12 @@ namespace push_constants[0].size = 16; push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + if (!vk::get_current_renderer()->get_conditional_render_support()) + { + // Conditional render toggle + push_constants[0].size = 20; + } + VkDescriptorSetLayoutCreateInfo infos = {}; infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; infos.pBindings = bindings.data(); @@ -439,11 +452,15 @@ VKGSRender::VKGSRender() : GSRender() m_occlusion_query_data[n].driver_handle = n; //Generate frame contexts - VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * DESCRIPTOR_MAX_DRAW_CALLS }; - VkDescriptorPoolSize uniform_texel_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS }; - VkDescriptorPoolSize texture_pool = { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 20 * DESCRIPTOR_MAX_DRAW_CALLS }; + std::vector sizes; + sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * DESCRIPTOR_MAX_DRAW_CALLS }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 20 * DESCRIPTOR_MAX_DRAW_CALLS }); - std::vector sizes{ uniform_buffer_pool, uniform_texel_pool, texture_pool }; + if (!m_device->get_conditional_render_support()) + { + sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 * DESCRIPTOR_MAX_DRAW_CALLS }); + } VkSemaphoreCreateInfo semaphore_info = {}; semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; @@ -1153,7 +1170,7 @@ void VKGSRender::emit_geometry(u32 sub_index) update_draw_state(); begin_render_pass(); - if (cond_render_ctrl.hw_cond_active) + if (cond_render_ctrl.hw_cond_active && m_device->get_conditional_render_support()) { // It is inconvenient that conditional rendering breaks other things like compute dispatch // TODO: If this is heavy, add refactor the resources into global and add checks around compute dispatch @@ -2802,6 +2819,12 @@ void VKGSRender::load_program_env() m_program->bind_uniform(m_fragment_texture_params_buffer_info, FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, m_current_frame->descriptor_set); } + if (!m_device->get_conditional_render_support()) + { + auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer()->value; + m_program->bind_buffer({ predicate, 0, 4 }, CONDITIONAL_RENDER_PREDICATE_SLOT, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + } + //Clear flags const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty); m_graphics_state &= ~handled_flags; @@ -2826,13 +2849,21 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ base_offset = 0; } - u32 draw_info[4]; + u8 data_size = 16; + u32 draw_info[5]; + draw_info[0] = vertex_info.vertex_index_base; draw_info[1] = vertex_info.vertex_index_offset; draw_info[2] = id; draw_info[3] = (id * 16) + (base_offset / 8); - vkCmdPushConstants(*m_current_command_buffer, pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 16, draw_info); + if (!m_device->get_conditional_render_support()) + { + draw_info[4] = cond_render_ctrl.hw_cond_active ? 1 : 0; + data_size = 20; + } + + vkCmdPushConstants(*m_current_command_buffer, pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, data_size, draw_info); const size_t data_offset = (id * 128) + m_vertex_layout_stream_info.offset; auto dst = m_vertex_layout_ring_info.map(data_offset, 128); @@ -3792,10 +3823,31 @@ void VKGSRender::begin_conditional_rendering(const std::vectorget_memory_mapping(); + auto usage_flags = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + + if (m_device->get_conditional_render_support()) + { + usage_flags |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT; + } + m_cond_render_buffer = std::make_unique( *m_device, 4, memory_props.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0); + usage_flags, 0); + } + + VkPipelineStageFlags dst_stage; + VkAccessFlags dst_access; + + if (m_device->get_conditional_render_support()) + { + dst_stage = VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT; + dst_access = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT; + } + else + { + dst_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + dst_access = VK_ACCESS_SHADER_READ_BIT; } if (sources.size() == 1) @@ -3809,8 +3861,8 @@ void VKGSRender::begin_conditional_rendering(const std::vectorvalue, 0); vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, - VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT); + VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage, + VK_ACCESS_TRANSFER_WRITE_BIT, dst_access); rsx::thread::begin_conditional_rendering(sources); return; @@ -3863,8 +3915,8 @@ void VKGSRender::begin_conditional_rendering(const std::vector()->run(*m_current_command_buffer, m_cond_render_buffer.get(), scratch, dst_offset / 4); vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 0, 4, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, - VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT); + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dst_stage, + VK_ACCESS_SHADER_WRITE_BIT, dst_access); } else { diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 600919e98e25..247086c2377d 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -39,8 +39,9 @@ #define FRAGMENT_STATE_BIND_SLOT 3 #define FRAGMENT_TEXTURE_PARAMS_BIND_SLOT 4 #define VERTEX_BUFFERS_FIRST_BIND_SLOT 5 -#define TEXTURES_FIRST_BIND_SLOT 8 -#define VERTEX_TEXTURES_FIRST_BIND_SLOT 24 //8+16 +#define CONDITIONAL_RENDER_PREDICATE_SLOT 8 +#define TEXTURES_FIRST_BIND_SLOT 9 +#define VERTEX_TEXTURES_FIRST_BIND_SLOT (TEXTURES_FIRST_BIND_SLOT + 16) #define VK_NUM_DESCRIPTOR_BINDINGS (VERTEX_TEXTURES_FIRST_BIND_SLOT + 4) diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 8fdfa4f50c81..5f43400a9b43 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -43,12 +43,26 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) OS << " float z_far;\n"; OS << "};\n\n"; + if (m_device_props.emulate_conditional_rendering) + { + OS << "layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n"; + OS << "{\n"; + OS << " uint conditional_rendering_predicate;\n"; + OS << "};\n\n"; + } + OS << "layout(push_constant) uniform VertexLayoutBuffer\n"; OS << "{\n"; OS << " uint vertex_base_index;\n"; OS << " uint vertex_index_offset;\n"; OS << " uint draw_id;\n"; OS << " uint layout_ptr_offset;\n"; + + if (m_device_props.emulate_conditional_rendering) + { + OS << " uint conditional_rendering_enabled;\n"; + } + OS << "};\n\n"; vk::glsl::program_input in; @@ -238,9 +252,18 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS) OS << "}\n\n"; OS << "void main ()\n"; - OS << "{\n"; + OS << "{\n\n"; + + if (m_device_props.emulate_conditional_rendering) + { + OS << " if (conditional_rendering_enabled != 0 && conditional_rendering_predicate == 0)\n"; + OS << " {\n"; + OS << " gl_Position = vec4(0.);\n"; + OS << " return;\n"; + OS << "}\n\n"; + } - OS << "\n" << " vs_main();\n\n"; + OS << " vs_main();\n\n"; for (auto &i : reg_table) { @@ -286,6 +309,8 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS) void VKVertexDecompilerThread::Task() { + m_device_props.emulate_conditional_rendering = !vk::get_current_renderer()->get_conditional_render_support(); + m_shader = Decompile(); vk_prog->SetInputs(inputs); } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.h b/rpcs3/Emu/RSX/VK/VKVertexProgram.h index 33be96ed7641..32f6f66740fe 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.h +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "../Common/VertexProgramDecompiler.h" #include "Emu/RSX/RSXVertexProgram.h" #include "Utilities/Thread.h" @@ -10,6 +10,13 @@ struct VKVertexDecompilerThread : public VertexProgramDecompiler std::string &m_shader; std::vector inputs; class VKVertexProgram *vk_prog; + + struct + { + bool emulate_conditional_rendering; + } + m_device_props; + protected: std::string getFloatTypeName(size_t elementCount) override; std::string getIntTypeName(size_t elementCount) override; From 480af304ac0876b2c46f284be64d8203cc5f8118 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 10 Dec 2019 09:14:47 +0300 Subject: [PATCH 14/21] vk: Switch occlusion pool to FIFO rather than LIFO to avoid hard stall --- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 1 + rpcs3/Emu/RSX/VK/VKHelpers.h | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 7981fd34df11..fa8fe0c2df84 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -3747,6 +3747,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* busy_wait(); } + // Allocation stack is FIFO and very long so no need to actually wait for fence signal data.command_buffer_to_wait->flush(); // Gather data diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 247086c2377d..a5cb4f3b41db 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -9,6 +9,7 @@ #include #include #include +#include #if !defined(_WIN32) && !defined(__APPLE__) #include @@ -2952,7 +2953,7 @@ namespace vk VkQueryPool query_pool = VK_NULL_HANDLE; vk::render_device* owner = nullptr; - std::stack available_slots; + std::deque available_slots; std::vector query_active_status; public: @@ -2990,7 +2991,7 @@ namespace vk for (u32 n = 0; n < count; ++n) { - available_slots.push(n); + available_slots.push_back(n); } } @@ -3061,7 +3062,7 @@ namespace vk vkCmdResetQueryPool(cmd, query_pool, index, 1); query_active_status[index] = false; - available_slots.push(index); + available_slots.push_back(index); } } @@ -3088,8 +3089,8 @@ namespace vk return ~0u; } - u32 result = available_slots.top(); - available_slots.pop(); + u32 result = available_slots.front(); + available_slots.pop_front(); verify(HERE), !query_active_status[result]; return result; From eea475a5edf505be3cd1b39efac038694107b8d7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 11 Dec 2019 19:28:57 +0300 Subject: [PATCH 15/21] vk: Refactoring and optimizations to query handling - Caches query results when looking up report availability to avoid entering driver code twice. - Minor code restructuring --- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 51 ++++++--------- rpcs3/Emu/RSX/VK/VKHelpers.h | 109 +++++++++++++++++++++----------- 2 files changed, 92 insertions(+), 68 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index fa8fe0c2df84..4093f151683b 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2226,52 +2226,43 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args) verify(HERE), args; rsx::thread::sync_hint(hint, args); + // Occlusion queries not enabled, do nothing + if (!(m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task)) + return; + + // Check if the required report is synced to this CB + auto occlusion_info = static_cast(args); + auto& data = m_occlusion_map[occlusion_info->driver_handle]; + + if (data.command_buffer_to_wait != m_current_command_buffer || data.indices.empty()) + return; + // Occlusion test result evaluation is coming up, avoid a hard sync switch (hint) { case rsx::FIFO_hint::hint_conditional_render_eval: { - // Occlusion queries not enabled, do nothing - if (!(m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task)) - return; - // If a flush request is already enqueued, do nothing if (m_flush_requests.pending()) return; - // Check if the required report is synced to this CB - auto occlusion_info = static_cast(args); - auto& data = m_occlusion_map[occlusion_info->driver_handle]; - - if (data.command_buffer_to_wait == m_current_command_buffer && !data.indices.empty()) - { - // Confirmed hard sync coming up, post a sync request - m_flush_requests.post(false); - m_flush_requests.remove_one(); - } - + // Schedule a sync on the next loop iteration + m_flush_requests.post(false); + m_flush_requests.remove_one(); break; } case rsx::FIFO_hint::hint_zcull_sync: { - if (!(m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task)) - return; - - auto occlusion_info = static_cast(args); - auto& data = m_occlusion_map[occlusion_info->driver_handle]; + // Unavoidable hard sync coming up, flush immediately + // This heavyweight hint should be used with caution + std::lock_guard lock(m_flush_queue_mutex); + flush_command_queue(); - if (data.command_buffer_to_wait == m_current_command_buffer && !data.indices.empty()) + if (m_flush_requests.pending()) { - std::lock_guard lock(m_flush_queue_mutex); - flush_command_queue(); - - if (m_flush_requests.pending()) - { - // Clear without wait - m_flush_requests.clear_pending_flag(); - } + // Clear without wait + m_flush_requests.clear_pending_flag(); } - break; } } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index a5cb4f3b41db..25e5e1447ed9 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -2950,11 +2950,62 @@ namespace vk class occlusion_query_pool { + struct query_slot_info + { + bool any_passed; + bool active; + bool ready; + }; + VkQueryPool query_pool = VK_NULL_HANDLE; vk::render_device* owner = nullptr; std::deque available_slots; - std::vector query_active_status; + std::vector query_slot_status; + + inline bool poke_query(query_slot_info& query, u32 index) + { + // Query is ready if: + // 1. Any sample has been determined to have passed the Z test + // 2. The backend has fully processed the query and found no hits + + u32 result[2] = { 0, 0 }; + switch (const auto error = vkGetQueryPoolResults(*owner, query_pool, index, 1, 8, result, 8, VK_QUERY_RESULT_PARTIAL_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)) + { + case VK_SUCCESS: + { + if (result[0]) + { + query.any_passed = true; + query.ready = true; + return true; + } + else if (result[1]) + { + query.any_passed = false; + query.ready = true; + return true; + } + + return false; + } + case VK_NOT_READY: + { + if (result[0]) + { + query.any_passed = true; + query.ready = true; + return true; + } + + return false; + } + default: + die_with_error(HERE, error); + return false; + } + } + public: void create(vk::render_device &dev, u32 num_entries) @@ -2968,7 +3019,7 @@ namespace vk owner = &dev; // From spec: "After query pool creation, each query must be reset before it is used." - query_active_status.resize(num_entries, true); + query_slot_status.resize(num_entries, {}); } void destroy() @@ -2984,10 +3035,11 @@ namespace vk void initialize(vk::command_buffer &cmd) { - const u32 count = ::size32(query_active_status); + const u32 count = ::size32(query_slot_status); vkCmdResetQueryPool(cmd, query_pool, 0, count); - std::fill(query_active_status.begin(), query_active_status.end(), false); + query_slot_info value{}; + std::fill(query_slot_status.begin(), query_slot_status.end(), value); for (u32 n = 0; n < count; ++n) { @@ -2997,14 +3049,15 @@ namespace vk void begin_query(vk::command_buffer &cmd, u32 index) { - if (query_active_status[index]) + if (query_slot_status[index].active) { //Synchronization must be done externally vkCmdResetQueryPool(cmd, query_pool, index, 1); + query_slot_status[index] = {}; } vkCmdBeginQuery(cmd, query_pool, index, 0);//VK_QUERY_CONTROL_PRECISE_BIT); - query_active_status[index] = true; + query_slot_status[index].active = true; } void end_query(vk::command_buffer &cmd, u32 index) @@ -3014,40 +3067,20 @@ namespace vk bool check_query_status(u32 index) { - u32 result[2] = {0, 0}; - switch (const auto error = vkGetQueryPoolResults(*owner, query_pool, index, 1, 8, result, 8, VK_QUERY_RESULT_PARTIAL_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)) - { - case VK_SUCCESS: - return (result[0] || result[1]); - case VK_NOT_READY: - return false; - default: - die_with_error(HERE, error); - return false; - } + return poke_query(query_slot_status[index], index); } u32 get_query_result(u32 index) { - u32 result[2] = { 0, 0 }; + // Check for cached result + auto& query_info = query_slot_status[index]; - do + while (!query_info.ready) { - switch (const auto error = vkGetQueryPoolResults(*owner, query_pool, index, 1, 8, result, 8, VK_QUERY_RESULT_PARTIAL_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)) - { - case VK_SUCCESS: - if (result[0]) return 1u; - if (result[1]) return 0u; // Partial result can return SUCCESS when unavailable - continue; - case VK_NOT_READY: - if (result[0]) return 1u; // Partial result can return NOT_READY when unavailable - continue; - default: - die_with_error(HERE, error); - return false; - } + poke_query(query_info, index); } - while (true); + + return query_info.any_passed ? 1 : 0; } void get_query_result_indirect(vk::command_buffer &cmd, u32 index, VkBuffer dst, VkDeviceSize dst_offset) @@ -3057,11 +3090,11 @@ namespace vk void reset_query(vk::command_buffer &cmd, u32 index) { - if (query_active_status[index]) + if (query_slot_status[index].active) { vkCmdResetQueryPool(cmd, query_pool, index, 1); - query_active_status[index] = false; + query_slot_status[index] = {}; available_slots.push_back(index); } } @@ -3075,9 +3108,9 @@ namespace vk void reset_all(vk::command_buffer &cmd) { - for (u32 n = 0; n < query_active_status.size(); n++) + for (u32 n = 0; n < query_slot_status.size(); n++) { - if (query_active_status[n]) + if (query_slot_status[n].active) reset_query(cmd, n); } } @@ -3092,7 +3125,7 @@ namespace vk u32 result = available_slots.front(); available_slots.pop_front(); - verify(HERE), !query_active_status[result]; + verify(HERE), !query_slot_status[result].active; return result; } }; From 202c00e4f26ce2df1c574949eb01d61521d22a87 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 11 Dec 2019 19:28:31 +0300 Subject: [PATCH 16/21] rsx: Add toggle for zcull sync behaviour - Adds a relaxed sync mode where ZCULL reports are lazily nudged into flushing and the main core does not actually wait for the event to finish before proceeding - Can drastically improve performance in cases where the game actually does not utilize the report data --- rpcs3/Emu/RSX/RSXThread.cpp | 23 +++++++++++++++-------- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 4 ++-- rpcs3/Emu/System.h | 1 + 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 19adf61223ca..e65bf9f495ad 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2196,7 +2196,18 @@ namespace rsx void thread::sync() { - zcull_ctrl->sync(this); + if (zcull_ctrl->has_pending()) + { + if (g_cfg.video.relaxed_zcull_sync) + { + // Emit zcull sync hint and update; guarantees results to be written shortly after this event + zcull_ctrl->update(this, 0, true); + } + else + { + zcull_ctrl->sync(this); + } + } // Fragment constants may have been updated m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty; @@ -2433,16 +2444,12 @@ namespace rsx Emu.Pause(); } - // Reset zcull ctrl + // Reset ZCULL ctrl + // NOTE: A semaphore release is part of RSX flip control and will handle ZCULL sync + // TODO: These routines belong in the state reset routines controlled by sys_rsx and cellGcmSetFlip zcull_ctrl->set_active(this, false, true); zcull_ctrl->clear(this); - if (zcull_ctrl->has_pending()) - { - LOG_TRACE(RSX, "Dangling reports found, discarding..."); - zcull_ctrl->sync(this); - } - // Save current state m_queued_flip.stats = m_frame_stats; m_queued_flip.push(buffer); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 4093f151683b..2aa686e33174 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -562,8 +562,8 @@ VKGSRender::VKGSRender() : GSRender() // Confirmed in BLES01916 (The Evil Within) which uses RGB565 for some virtual texturing data. backend_config.supports_hw_renormalization = (vk::get_driver_vendor() == vk::driver_vendor::NVIDIA); - // Stub - backend_config.supports_hw_conditional_render = true; + // Relaxed query synchronization + backend_config.supports_hw_conditional_render = !!g_cfg.video.relaxed_zcull_sync; } VKGSRender::~VKGSRender() diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index a3c17cbd4f99..17e98d40c0ab 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -497,6 +497,7 @@ struct cfg_root : cfg::node cfg::_bool strict_texture_flushing{this, "Strict Texture Flushing", false}; cfg::_bool disable_native_float16{this, "Disable native float16 support", false}; cfg::_bool multithreaded_rsx{this, "Multithreaded RSX", false}; + cfg::_bool relaxed_zcull_sync{this, "Relaxed ZCULL Sync", false}; cfg::_int<1, 8> consequtive_frames_to_draw{this, "Consecutive Frames To Draw", 1}; cfg::_int<1, 8> consequtive_frames_to_skip{this, "Consecutive Frames To Skip", 1}; cfg::_int<50, 800> resolution_scale_percent{this, "Resolution Scale", 100}; From 73cce2759f75309fb7b66e0ec7113db8997a8e5b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 11 Dec 2019 22:07:09 +0300 Subject: [PATCH 17/21] rsx: Zcull synchronization tuning - Also fixes a bug where sync_hint would erroneously update the sync tag even for old lookups (e.g conditional render using older query) --- rpcs3/Emu/RSX/RSXThread.cpp | 29 ++++++++++++++++------------- rpcs3/Emu/RSX/RSXThread.h | 7 +++---- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 13 +++++++------ rpcs3/Emu/RSX/VK/VKGSRender.h | 23 +++++++++++++++++++++++ 4 files changed, 49 insertions(+), 23 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index e65bf9f495ad..a61a8710cda1 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2220,9 +2220,9 @@ namespace rsx //verify (HERE), async_tasks_pending.load() == 0; } - void thread::sync_hint(FIFO_hint /*hint*/, void* /*args*/) + void thread::sync_hint(FIFO_hint /*hint*/, void* args) { - zcull_ctrl->on_sync_hint(); + zcull_ctrl->on_sync_hint(args); } void thread::flush_fifo() @@ -2639,7 +2639,7 @@ namespace rsx end_occlusion_query(m_current_task); m_current_task->active = false; m_current_task->pending = true; - m_current_task->sync_tag = ++m_timer; + m_current_task->sync_tag = m_timer++; m_current_task->timestamp = m_tsc; m_pending_writes.push_back({}); @@ -2670,7 +2670,7 @@ namespace rsx m_current_task->active = false; m_current_task->pending = true; m_current_task->timestamp = m_tsc; - m_current_task->sync_tag = ++m_timer; + m_current_task->sync_tag = m_timer++; m_pending_writes.back().query = m_current_task; allocate_new_query(ptimer); @@ -2800,12 +2800,16 @@ namespace rsx void ZCULL_control::on_draw() { if (m_current_task) + { m_current_task->num_draws++; + m_current_task->sync_tag = m_timer++; + } } - void ZCULL_control::on_sync_hint() + void ZCULL_control::on_sync_hint(void* args) { - m_sync_tag = ++m_timer; + auto query = static_cast(args); + m_sync_tag = std::max(m_sync_tag, query->sync_tag); } void ZCULL_control::write(vm::addr_t sink, u64 timestamp, u32 type, u32 value) @@ -2974,7 +2978,7 @@ namespace rsx if (It->query->num_draws && It->query->sync_tag > m_sync_tag) { ptimer->sync_hint(FIFO_hint::hint_zcull_sync, It->query); - verify(HERE), It->query->sync_tag < m_sync_tag; + verify(HERE), It->query->sync_tag <= m_sync_tag; } break; @@ -2991,20 +2995,19 @@ namespace rsx // Schedule ahead m_next_tsc = m_tsc + min_zcull_tick_us; -#if 0 // Schedule a queue flush if needed - if (front.query && front.query->num_draws && front.query->sync_tag > m_sync_tag) + if (!g_cfg.video.relaxed_zcull_sync && + front.query && front.query->num_draws && front.query->sync_tag > m_sync_tag) { const auto elapsed = m_tsc - front.query->timestamp; if (elapsed > max_zcull_delay_us) { - ptimer->sync_hint(FIFO_hint::hint_zcull_sync, reinterpret_cast(front.query)); - verify(HERE), front.query->sync_tag < m_sync_tag; + ptimer->sync_hint(FIFO_hint::hint_zcull_sync, front.query); + verify(HERE), front.query->sync_tag <= m_sync_tag; } return; } -#endif } } @@ -3176,7 +3179,7 @@ namespace rsx if (UNLIKELY(query->sync_tag > m_sync_tag)) { ptimer->sync_hint(FIFO_hint::hint_zcull_sync, query); - verify(HERE), m_sync_tag > query->sync_tag; + verify(HERE), m_sync_tag >= query->sync_tag; } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 9663111263df..72960c60ca72 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -375,9 +375,8 @@ namespace rsx struct ZCULL_control { // Delay before a report update operation is forced to retire - const u32 max_zcull_delay_us = 4000; - const u32 min_zcull_delay_us = 50; - const u32 min_zcull_tick_us = 500; + const u32 max_zcull_delay_us = 300; + const u32 min_zcull_tick_us = 100; // Number of occlusion query slots available. Real hardware actually has far fewer units before choking const u32 occlusion_query_count = 1024; @@ -437,7 +436,7 @@ namespace rsx void on_draw(); // Sync hint notification - void on_sync_hint(); + void on_sync_hint(void* args); // Check for pending writes bool has_pending() const { return !m_pending_writes.empty(); } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 2aa686e33174..10d77af2d934 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1786,7 +1786,7 @@ void VKGSRender::end() auto &data = m_occlusion_map[m_active_query_info->driver_handle]; data.indices.push_back(occlusion_id); - data.command_buffer_to_wait = m_current_command_buffer; + data.set_sync_command_buffer(m_current_command_buffer); m_current_command_buffer->flags &= ~vk::command_buffer::cb_load_occluson_task; m_current_command_buffer->flags |= (vk::command_buffer::cb_has_occlusion_task | vk::command_buffer::cb_has_open_query); @@ -2234,7 +2234,9 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args) auto occlusion_info = static_cast(args); auto& data = m_occlusion_map[occlusion_info->driver_handle]; - if (data.command_buffer_to_wait != m_current_command_buffer || data.indices.empty()) + // NOTE: Currently, a special condition exists where the indices can be empty even with active draw count. + // This is caused by async compiler and should be removed when ubershaders are added in + if (!data.is_current(m_current_command_buffer) || data.indices.empty()) return; // Occlusion test result evaluation is coming up, avoid a hard sync @@ -3709,7 +3711,7 @@ bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info if (data.indices.empty()) return true; - if (data.command_buffer_to_wait == m_current_command_buffer) + if (data.is_current(m_current_command_buffer)) return false; u32 oldest = data.indices.front(); @@ -3724,7 +3726,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* if (query->num_draws) { - if (data.command_buffer_to_wait == m_current_command_buffer) + if (data.is_current(m_current_command_buffer)) { std::lock_guard lock(m_flush_queue_mutex); flush_command_queue(); @@ -3738,8 +3740,7 @@ void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* busy_wait(); } - // Allocation stack is FIFO and very long so no need to actually wait for fence signal - data.command_buffer_to_wait->flush(); + data.sync(); // Gather data for (const auto occlusion_id : data.indices) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 8f78ad8f1ebe..beb37520e008 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -72,6 +72,7 @@ struct command_buffer_chunk: public vk::command_buffer std::atomic_bool pending = { false }; u64 eid_tag = 0; + u64 reset_id = 0; shared_mutex guard_mutex; command_buffer_chunk() = default; @@ -101,6 +102,7 @@ struct command_buffer_chunk: public vk::command_buffer if (pending) wait(FRAME_PRESENT_TIMEOUT); + ++reset_id; CHECK_RESULT(vkResetCommandBuffer(commands, 0)); } @@ -164,6 +166,27 @@ struct occlusion_data { rsx::simple_array indices; command_buffer_chunk* command_buffer_to_wait = nullptr; + u64 command_buffer_sync_id = 0; + + bool is_current(command_buffer_chunk* cmd) const + { + return (command_buffer_to_wait == cmd && command_buffer_sync_id == cmd->reset_id); + } + + void set_sync_command_buffer(command_buffer_chunk* cmd) + { + command_buffer_to_wait = cmd; + command_buffer_sync_id = cmd->reset_id; + } + + void sync() + { + if (command_buffer_to_wait->reset_id == command_buffer_sync_id) + { + // Allocation stack is FIFO and very long so no need to actually wait for fence signal + command_buffer_to_wait->flush(); + } + } }; enum frame_context_state : u32 From e734303518e1a742908899902602ecba8035ad4a Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 15 Dec 2019 13:38:42 +0300 Subject: [PATCH 18/21] rsx: Fix linux build --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 2 +- rpcs3/Emu/RSX/GL/GLOverlays.h | 2 +- rpcs3/Emu/RSX/VK/VKCompute.h | 4 ++-- rpcs3/Emu/RSX/VK/VKHelpers.h | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 51ea91c67660..0a4073d16dcb 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1110,7 +1110,7 @@ void GLGSRender::clear_surface(u32 arg) if (arg & 0xF0) ctx |= rsx::framebuffer_creation_context::context_clear_color; if (arg & 0x3) ctx |= rsx::framebuffer_creation_context::context_clear_depth; - init_buffers((rsx::framebuffer_creation_context)ctx, true); + init_buffers(static_cast(ctx), true); if (!framebuffer_status_valid) return; diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.h b/rpcs3/Emu/RSX/GL/GLOverlays.h index e8fd19217bd1..7d9aec3ea2ef 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.h +++ b/rpcs3/Emu/RSX/GL/GLOverlays.h @@ -132,7 +132,7 @@ namespace gl glBindVertexArray(old_vao); } - virtual void run(const areau& region, GLuint target_texture, bool depth_target, bool use_blending = false) + void run(const areau& region, GLuint target_texture, bool depth_target, bool use_blending = false) { if (!compiled) { diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 5bdc0934a382..009623065285 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -199,13 +199,13 @@ namespace vk vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline_layout, 0, 1, &m_descriptor_set, 0, nullptr); } - virtual void run(VkCommandBuffer cmd, u32 invocations_x, u32 invocations_y, u32 invocations_z) + void run(VkCommandBuffer cmd, u32 invocations_x, u32 invocations_y, u32 invocations_z) { load_program(cmd); vkCmdDispatch(cmd, invocations_x, invocations_y, invocations_z); } - virtual void run(VkCommandBuffer cmd, u32 num_invocations) + void run(VkCommandBuffer cmd, u32 num_invocations) { u32 invocations_x, invocations_y; if (num_invocations > max_invocations_x) diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 25e5e1447ed9..ae8887bc92e1 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -905,13 +905,13 @@ namespace vk // Import optional function endpoints if (pgpu->conditional_render_support) { - cmdBeginConditionalRenderingEXT = (PFN_vkCmdBeginConditionalRenderingEXT)vkGetDeviceProcAddr(dev, "vkCmdBeginConditionalRenderingEXT"); - cmdEndConditionalRenderingEXT = (PFN_vkCmdEndConditionalRenderingEXT)vkGetDeviceProcAddr(dev, "vkCmdEndConditionalRenderingEXT"); + cmdBeginConditionalRenderingEXT = reinterpret_cast(vkGetDeviceProcAddr(dev, "vkCmdBeginConditionalRenderingEXT")); + cmdEndConditionalRenderingEXT = reinterpret_cast(vkGetDeviceProcAddr(dev, "vkCmdEndConditionalRenderingEXT")); } if (pgpu->host_query_reset_support) { - resetQueryPoolEXT = (PFN_vkResetQueryPoolEXT)vkGetDeviceProcAddr(dev, "vkResetQueryPoolEXT"); + resetQueryPoolEXT = reinterpret_cast(vkGetDeviceProcAddr(dev, "vkResetQueryPoolEXT")); } memory_map = vk::get_memory_mapping(pdev); From 3efd5f360c269914c3f6f2bff2bff62eb91ebbf7 Mon Sep 17 00:00:00 2001 From: Eladash Date: Sun, 15 Dec 2019 10:22:32 +0200 Subject: [PATCH 19/21] PPU DisAsm: Again --- rpcs3/Emu/Cell/PPUDisAsm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/Cell/PPUDisAsm.cpp b/rpcs3/Emu/Cell/PPUDisAsm.cpp index 6196822f2857..44eff74ba90f 100644 --- a/rpcs3/Emu/Cell/PPUDisAsm.cpp +++ b/rpcs3/Emu/Cell/PPUDisAsm.cpp @@ -987,7 +987,7 @@ void PPUDisAsm::SC(ppu_opcode_t op) void PPUDisAsm::B(ppu_opcode_t op) { - const u32 li = op.li; + const u32 li = op.bt24; const u32 aa = op.aa; const u32 lk = op.lk; From 92e228f7628665c9c72101968be0a229e4055022 Mon Sep 17 00:00:00 2001 From: Eladash Date: Tue, 17 Dec 2019 19:21:29 +0200 Subject: [PATCH 20/21] Hotfix sceNpTrophyGetTrophyUnlockState --- rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp | 5 ++++- rpcs3/Emu/Cell/Modules/sceNpTrophy.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp b/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp index 0db29e53fc49..3e2a5b35e7aa 100644 --- a/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp +++ b/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp @@ -755,11 +755,14 @@ error_code sceNpTrophyGetTrophyUnlockState(u32 context, u32 handle, vm::ptrtropusr->GetTrophiesCount(); + const u32 count_ = ctxt->tropusr->GetTrophiesCount(); *count = count_; if (count_ > 128) sceNpTrophy.error("sceNpTrophyGetTrophyUnlockState: More than 128 trophies detected!"); + // Needs hw testing + *flags = {}; + // Pack up to 128 bools in u32 flag_bits[4] for (u32 id = 0; id < count_; id++) { diff --git a/rpcs3/Emu/Cell/Modules/sceNpTrophy.h b/rpcs3/Emu/Cell/Modules/sceNpTrophy.h index 7ffefac8a2be..5fbe5a9239f4 100644 --- a/rpcs3/Emu/Cell/Modules/sceNpTrophy.h +++ b/rpcs3/Emu/Cell/Modules/sceNpTrophy.h @@ -122,7 +122,7 @@ struct SceNpTrophyData struct SceNpTrophyFlagArray { - u32 flag_bits[SCE_NP_TROPHY_FLAG_SETSIZE >> SCE_NP_TROPHY_FLAG_BITS_SHIFT]; + be_t flag_bits[SCE_NP_TROPHY_FLAG_SETSIZE >> SCE_NP_TROPHY_FLAG_BITS_SHIFT]; }; enum From e380a8c279e9179fc6d0a3f6cd0531ccea5bf8fe Mon Sep 17 00:00:00 2001 From: Eladash Date: Wed, 18 Dec 2019 00:43:00 +0200 Subject: [PATCH 21/21] Fix sceNpTrophyGetGameProgress (#7103) * Fix sceNpTrophyGetGameProgress Was missing multiplication by 100. * apply requested changes * Enforce round-to-nearest rounding mode --- rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp b/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp index 0db29e53fc49..111c2e78c92c 100644 --- a/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp +++ b/rpcs3/Emu/Cell/Modules/sceNpTrophy.cpp @@ -18,6 +18,8 @@ #include "Emu/Cell/lv2/sys_event.h" #include "Emu/Cell/lv2/sys_process.h" +#include + LOG_CHANNEL(sceNpTrophy); TrophyNotificationBase::~TrophyNotificationBase() @@ -922,16 +924,12 @@ error_code sceNpTrophyGetGameProgress(u32 context, u32 handle, vm::ptr perc return SCE_NP_TROPHY_ERROR_UNKNOWN_HANDLE; } - double accuratePercentage = 0; - for (int i = ctxt->tropusr->GetTrophiesCount() - 1; i >= 0; i--) - { - if (ctxt->tropusr->GetTrophyUnlockState(i)) - { - accuratePercentage++; - } - } + const u32 unlocked = ctxt->tropusr->GetUnlockedTrophiesCount(); + const u32 trp_count = ctxt->tropusr->GetTrophiesCount(); - *percentage = static_cast(accuratePercentage / ctxt->tropusr->GetTrophiesCount()); + verify(HERE), trp_count > 0 && trp_count <= 128; + + *percentage = static_cast(std::lround((unlocked * 100.) / trp_count)); return CELL_OK; }