Skip to content

Commit

Permalink
3rdParty: update mupen64plus-video-parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
Rosalie241 committed Feb 2, 2025
1 parent c4929d9 commit 0962c69
Show file tree
Hide file tree
Showing 93 changed files with 60,607 additions and 51,328 deletions.
4 changes: 2 additions & 2 deletions Source/3rdParty/mupen64plus-video-parallel/.gitrepo
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[subrepo]
remote = [email protected]:/Rosalie241/parallel-rdp-standalone.git
branch = RMG
commit = 142aed6230b4d5faf05cf25cba711dc0f0a75810
parent = 4ab78b79c35f6807fd5194d8fc1d52996c03604e
commit = 74a9badf728fcb6a01ceb2ca9668aae5d1f1157c
parent = cb2948ec7404a7ee91e08fd5c3acb65c41629856
method = merge
cmdver = 0.4.6
1 change: 1 addition & 0 deletions Source/3rdParty/mupen64plus-video-parallel/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ set(PARALLEL_SOURCES
util/aligned_alloc.cpp
util/timer.cpp
util/timeline_trace_file.cpp
util/environment.cpp
util/thread_name.cpp
volk/volk.c
screen.c
Expand Down
2 changes: 1 addition & 1 deletion Source/3rdParty/mupen64plus-video-parallel/COMMIT
Original file line number Diff line number Diff line change
@@ -1 +1 @@
afc61e1050a79b9ab7bb53b8880b055324f7b4a2
1cecd042b2619bc505c12bfdc713808386f2b54d
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,8 @@ void MainDialog::on_buttonBox_clicked(QAbstractButton* button)
if (pushButton == okButton)
{
// screen size
QStringList trimmedScreenSize = this->screenSizeComboBox->currentText().remove(' ').split("x");
int width = trimmedScreenSize.at(0).toInt();
int height = trimmedScreenSize.at(1).toInt();
int width = this->screenSizeComboBox->currentText().split(" x ").at(0).toInt();
int height = this->screenSizeComboBox->currentText().split(" x ").at(1).toInt();
ConfigSetParameter(configVideoParallel, KEY_SCREEN_WIDTH, M64TYPE_INT, &width);
ConfigSetParameter(configVideoParallel, KEY_SCREEN_HEIGHT, M64TYPE_INT, &height);

Expand Down
1 change: 1 addition & 0 deletions Source/3rdParty/mupen64plus-video-parallel/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ PARALLEL_RDP_SOURCES_CXX := \
$(PARALLEL_RDP_IMPLEMENTATION)/util/aligned_alloc.cpp \
$(PARALLEL_RDP_IMPLEMENTATION)/util/timer.cpp \
$(PARALLEL_RDP_IMPLEMENTATION)/util/timeline_trace_file.cpp \
$(PARALLEL_RDP_IMPLEMENTATION)/util/environment.cpp \
$(PARALLEL_RDP_IMPLEMENTATION)/util/thread_name.cpp

PARALLEL_RDP_SOURCES_C := \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ enum StaticRasterizationFlagBits
RASTERIZATION_CONVERT_ONE_BIT = 1 << 22,
RASTERIZATION_BILERP_0_BIT = 1 << 23,
RASTERIZATION_BILERP_1_BIT = 1 << 24,
RASTERIZATION_NEED_NOISE_DUAL_BIT = 1 << 25,
RASTERIZATION_UPSCALING_LOG2_BIT_OFFSET = 26,
// Bit 26 and 27 holds upscaling factor in LOG2.
RASTERIZATION_NEED_NOISE_BIT = 1 << 28,
RASTERIZATION_USE_STATIC_TEXTURE_SIZE_FORMAT_BIT = 1 << 29,
RASTERIZATION_USE_SPECIALIZATION_CONSTANT_BIT = 1 << 30
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,42 +165,47 @@ bool Renderer::init_caps()
LOGI("Allow small types = %d.\n", int(allow_small_types));
}

if (!features.storage_16bit_features.storageBuffer16BitAccess)
if (!features.vk11_features.storageBuffer16BitAccess)
{
LOGE("VK_KHR_16bit_storage for SSBOs is not supported! This is a minimum requirement for paraLLEl-RDP.\n");
LOGE("16-bit storage for SSBOs is not supported! This is a minimum requirement for paraLLEl-RDP.\n");
return false;
}

if (!features.storage_8bit_features.storageBuffer8BitAccess)
if (!features.vk12_features.storageBuffer8BitAccess)
{
LOGE("VK_KHR_8bit_storage for SSBOs is not supported! This is a minimum requirement for paraLLEl-RDP.\n");
LOGE("8-bit storage for SSBOs is not supported! This is a minimum requirement for paraLLEl-RDP.\n");
return false;
}

// Driver workarounds here for 8/16-bit integer support.
if (features.supports_driver_properties && !forces_small_types)
{
if (features.driver_properties.driverID == VK_DRIVER_ID_AMD_PROPRIETARY_KHR)
if (features.driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR)
{
LOGW("Current proprietary AMD driver is known to be buggy with 8/16-bit integer arithmetic, disabling support for time being.\n");
allow_small_types = false;
}
else if (features.driver_properties.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR ||
features.driver_properties.driverID == VK_DRIVER_ID_MESA_RADV_KHR)
else if (features.driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR ||
features.driver_id == VK_DRIVER_ID_MESA_RADV_KHR)
{
LOGW("Current open-source AMD drivers are known to be slightly faster without 8/16-bit integer arithmetic.\n");
allow_small_types = false;
}
else if (features.driver_properties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR)
else if (features.driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR)
{
LOGW("Current NVIDIA driver is known to be slightly faster without 8/16-bit integer arithmetic.\n");
allow_small_types = false;
}
else if (features.driver_properties.driverID == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR)
else if (features.driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR)
{
LOGW("Current proprietary Intel Windows driver is tested to perform much better without 8/16-bit integer support.\n");
allow_small_types = false;
}
else if (features.driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY_KHR)
{
LOGW("Current proprietary Qcom driver is known to be buggy with 8/16-bit integer arithmetic, disabling support for time being.\n");
allow_small_types = false;
}

// Intel ANV *must* use small integer arithmetic, or it doesn't pass test suite.
}
Expand All @@ -209,7 +214,7 @@ bool Renderer::init_caps()
{
caps.supports_small_integer_arithmetic = false;
}
else if (features.enabled_features.shaderInt16 && features.float16_int8_features.shaderInt8)
else if (features.enabled_features.shaderInt16 && features.vk12_features.shaderInt8)
{
LOGI("Enabling 8 and 16-bit integer arithmetic support for more efficient shaders!\n");
caps.supports_small_integer_arithmetic = true;
Expand All @@ -220,7 +225,7 @@ bool Renderer::init_caps()
caps.supports_small_integer_arithmetic = false;
}

uint32_t subgroup_size = features.subgroup_properties.subgroupSize;
uint32_t subgroup_size = features.vk11_props.subgroupSize;

const VkSubgroupFeatureFlags required =
VK_SUBGROUP_FEATURE_BALLOT_BIT |
Expand All @@ -230,15 +235,15 @@ bool Renderer::init_caps()

caps.subgroup_tile_binning =
allow_subgroup &&
(features.subgroup_properties.supportedOperations & required) == required &&
(features.subgroup_properties.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) != 0 &&
(features.vk11_props.subgroupSupportedOperations & required) == required &&
(features.vk11_props.subgroupSupportedStages & VK_SHADER_STAGE_COMPUTE_BIT) != 0 &&
can_support_minimum_subgroup_size(32) && subgroup_size <= 64;

caps.subgroup_depth_blend =
caps.super_sample_readback &&
allow_subgroup &&
(features.subgroup_properties.supportedOperations & required) == required &&
(features.subgroup_properties.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) != 0;
(features.vk11_props.subgroupSupportedOperations & required) == required &&
(features.vk11_props.subgroupSupportedOperations & VK_SHADER_STAGE_COMPUTE_BIT) != 0;

return true;
}
Expand Down Expand Up @@ -716,7 +721,7 @@ static int normalize_dzpix(int dz)
else if (dz == 0)
return 1;

unsigned bit = 31 - leading_zeroes(dz);
unsigned bit = 31 - Util::leading_zeroes(dz);
return 1 << (bit + 1);
}

Expand Down Expand Up @@ -1131,7 +1136,7 @@ static bool combiner_uses_lod_frac(const StaticRasterizationState &state)
void Renderer::deduce_noise_state()
{
auto &state = stream.static_raster_state;
state.flags &= ~RASTERIZATION_NEED_NOISE_BIT;
state.flags &= ~(RASTERIZATION_NEED_NOISE_BIT | RASTERIZATION_NEED_NOISE_DUAL_BIT);

// Figure out if we need to seed noise variable for this primitive.
if ((state.dither & 3) == 2 || ((state.dither >> 2) & 3) == 2)
Expand All @@ -1144,13 +1149,18 @@ void Renderer::deduce_noise_state()
return;

if ((state.flags & RASTERIZATION_MULTI_CYCLE_BIT) != 0)
{
if (state.combiner[0].rgb.muladd == RGBMulAdd::Noise)
state.flags |= RASTERIZATION_NEED_NOISE_BIT;
}
else if (state.combiner[1].rgb.muladd == RGBMulAdd::Noise)

if (state.combiner[1].rgb.muladd == RGBMulAdd::Noise)
state.flags |= RASTERIZATION_NEED_NOISE_BIT;

// If both cycles use noise, they need to observe different values.
if ((state.flags & RASTERIZATION_MULTI_CYCLE_BIT) != 0 &&
state.combiner[0].rgb.muladd == RGBMulAdd::Noise &&
state.combiner[1].rgb.muladd == RGBMulAdd::Noise)
state.flags |= RASTERIZATION_NEED_NOISE_DUAL_BIT;

if ((state.flags & (RASTERIZATION_ALPHA_TEST_BIT | RASTERIZATION_ALPHA_TEST_DITHER_BIT)) ==
(RASTERIZATION_ALPHA_TEST_BIT | RASTERIZATION_ALPHA_TEST_DITHER_BIT))
{
Expand Down Expand Up @@ -1675,7 +1685,7 @@ void Renderer::submit_span_setup_jobs(Vulkan::CommandBuffer &cmd, bool upscale)
cmd.set_buffer_view(1, 0, *instance.gpu.span_info_jobs_view);
cmd.set_specialization_constant_mask(3);
cmd.set_specialization_constant(0, (upscale ? caps.upscaling : 1) * ImplementationConstants::DefaultWorkgroupSize);
cmd.set_specialization_constant(1, upscale ? trailing_zeroes(caps.upscaling) : 0u);
cmd.set_specialization_constant(1, upscale ? Util::trailing_zeroes(caps.upscaling) : 0u);

Vulkan::QueryPoolHandle begin_ts, end_ts;
if (caps.timestamp >= 2)
Expand Down Expand Up @@ -1775,7 +1785,7 @@ void Renderer::submit_rasterization(Vulkan::CommandBuffer &cmd, Vulkan::Buffer &
if (caps.timestamp >= 2)
start_ts = cmd.write_timestamp(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);

uint32_t scale_log2_bit = (upscaling ? trailing_zeroes(caps.upscaling) : 0u) << RASTERIZATION_UPSCALING_LOG2_BIT_OFFSET;
uint32_t scale_log2_bit = (upscaling ? Util::trailing_zeroes(caps.upscaling) : 0u) << RASTERIZATION_UPSCALING_LOG2_BIT_OFFSET;

for (size_t i = 0; i < stream.static_raster_state_cache.size(); i++)
{
Expand Down Expand Up @@ -1864,7 +1874,7 @@ void Renderer::submit_tile_binning_combined(Vulkan::CommandBuffer &cmd, bool ups
cmd.push_constants(&push, 0, sizeof(push));

auto &features = device->get_device_features();
uint32_t subgroup_size = features.subgroup_properties.subgroupSize;
uint32_t subgroup_size = features.vk11_props.subgroupSize;

Vulkan::QueryPoolHandle start_ts, end_ts;
if (caps.timestamp >= 2)
Expand All @@ -1886,7 +1896,7 @@ void Renderer::submit_tile_binning_combined(Vulkan::CommandBuffer &cmd, bool ups
if (supports_subgroup_size_control(32, subgroup_size))
{
cmd.enable_subgroup_size_control(true);
cmd.set_subgroup_size_log2(true, 5, trailing_zeroes(subgroup_size));
cmd.set_subgroup_size_log2(true, 5, Util::trailing_zeroes(subgroup_size));
}
}
else
Expand Down Expand Up @@ -2087,7 +2097,7 @@ void Renderer::submit_depth_blend(Vulkan::CommandBuffer &cmd, Vulkan::Buffer &tm
cmd.set_specialization_constant(5, Limits::MaxPrimitives);
cmd.set_specialization_constant(6, upscaled ? caps.max_width : Limits::MaxWidth);
cmd.set_specialization_constant(7, uint32_t(force_write_mask || (!is_host_coherent && !upscaled)) |
((upscaled ? trailing_zeroes(caps.upscaling) : 0u) << 1u));
((upscaled ? Util::trailing_zeroes(caps.upscaling) : 0u) << 1u));

if (upscaled)
cmd.set_storage_buffer(0, 0, *upscaling_multisampled_rdram);
Expand Down Expand Up @@ -3532,27 +3542,27 @@ void Renderer::set_primitive_color(uint8_t min_level, uint8_t prim_lod_frac, uin

bool Renderer::can_support_minimum_subgroup_size(unsigned size) const
{
return supports_subgroup_size_control(size, device->get_device_features().subgroup_properties.subgroupSize);
return supports_subgroup_size_control(size, device->get_device_features().vk11_props.subgroupSize);
}

bool Renderer::supports_subgroup_size_control(uint32_t minimum_size, uint32_t maximum_size) const
{
auto &features = device->get_device_features();

if (!features.subgroup_size_control_features.computeFullSubgroups)
if (!features.vk13_features.computeFullSubgroups)
return false;

bool use_varying = minimum_size <= features.subgroup_size_control_properties.minSubgroupSize &&
maximum_size >= features.subgroup_size_control_properties.maxSubgroupSize;
bool use_varying = minimum_size <= features.vk13_props.minSubgroupSize &&
maximum_size >= features.vk13_props.maxSubgroupSize;

if (!use_varying)
{
bool outside_range = minimum_size > features.subgroup_size_control_properties.maxSubgroupSize ||
maximum_size < features.subgroup_size_control_properties.minSubgroupSize;
bool outside_range = minimum_size > features.vk13_props.maxSubgroupSize ||
maximum_size < features.vk13_props.minSubgroupSize;
if (outside_range)
return false;

if ((features.subgroup_size_control_properties.requiredSubgroupSizeStages & VK_SHADER_STAGE_COMPUTE_BIT) == 0)
if ((features.vk13_props.requiredSubgroupSizeStages & VK_SHADER_STAGE_COMPUTE_BIT) == 0)
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ const int RASTERIZATION_USES_PIPELINED_TEXEL1_BIT = 1 << 21;
const int RASTERIZATION_CONVERT_ONE_BIT = 1 << 22;
const int RASTERIZATION_BILERP_0_BIT = 1 << 23;
const int RASTERIZATION_BILERP_1_BIT = 1 << 24;
const int RASTERIZATION_NEED_NOISE_DUAL_BIT = 1 << 25;
const int RASTERIZATION_UPSCALING_LOG2_BIT_OFFSET = 26;
const int RASTERIZATION_NEED_NOISE_BIT = 1 << 28;
const int RASTERIZATION_USE_STATIC_TEXTURE_SIZE_FORMAT_BIT = 1 << 29;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,10 @@ void store_vram_color(uint index, uint slice)
{
index &= RDRAM_MASK_8;
index += slice * RDRAM_SIZE;
vram8.data[index ^ 3u] = mem_u8(current_color.r);
uint col = (index & 1) != 0 ? current_color.g : current_color.r;
vram8.data[index ^ 3u] = mem_u8(col);
if ((index & 1u) != 0u)
hidden_vram.data[index >> 1u] = mem_u8((current_color.r & 1) * 3);
hidden_vram.data[index >> 1u] = mem_u8((col & 1) * 3);
break;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,14 @@ bool shade_pixel(int x, int y, uint primitive_index, out ShadedData shaded)
// but let's not go there ...
combined_inputs.texel1 = tmp_texel;

// Resample the noise at some arbitrary other offset.
// This only matters if both noise combiner inputs take noise (very weird).
if ((static_state_flags & RASTERIZATION_NEED_NOISE_DUAL_BIT) != 0)
{
reseed_noise(x + 1023, y + 7, primitive_index + global_constants.fb_info.base_primitive_index + 11);
combined_inputs.noise = noise_get_combiner();
}

combined = u8x4(combiner_cycle1(combined_inputs,
combiner_inputs_rgb1,
combiner_inputs_alpha1,
Expand Down
Loading

0 comments on commit 0962c69

Please sign in to comment.