Skip to content

Commit 0ba8eed

Browse files
committed
apply reduced macro definitions to benchmark ex
1 parent 3e910b5 commit 0ba8eed

File tree

4 files changed

+33
-45
lines changed

4 files changed

+33
-45
lines changed

29_Arithmetic2Bench/app_resources/benchmarkSubgroup.comp.hlsl

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,33 @@
44

55
#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
66
#include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl"
7+
#include "nbl/builtin/hlsl/subgroup2/arithmetic_params.hlsl"
78
#include "nbl/builtin/hlsl/subgroup2/arithmetic_portability.hlsl"
89
#include "nbl/builtin/hlsl/random/xoroshiro.hlsl"
910

1011
#include "shaderCommon.hlsl"
1112
#include "nbl/builtin/hlsl/workgroup2/basic.hlsl"
1213

13-
typedef vector<uint32_t, ITEMS_PER_INVOCATION> type_t;
14+
template<class Binop, class device_capabilities>
15+
using params_t = SUBGROUP_CONFIG_T;
16+
17+
NBL_CONSTEXPR_STATIC_INLINE uint32_t ItemsPerInvocation = params_t<typename arithmetic::plus<uint32_t>::base_t, device_capabilities>::ItemsPerInvocation;
18+
19+
typedef vector<uint32_t, ItemsPerInvocation> type_t;
1420

1521
uint32_t globalIndex()
1622
{
1723
return glsl::gl_WorkGroupID().x*WORKGROUP_SIZE+workgroup::SubgroupContiguousIndex();
1824
}
1925

20-
template<class Binop, uint32_t N>
26+
template<class Binop>
2127
static void subbench(NBL_CONST_REF_ARG(type_t) sourceVal)
2228
{
23-
using config_t = subgroup2::Configuration<SUBGROUP_SIZE_LOG2>;
24-
using params_t = subgroup2::ArithmeticParams<config_t, typename Binop::base_t, N, device_capabilities>;
2529
type_t value = sourceVal;
2630

2731
const uint64_t outputBufAddr = pc.pOutputBuf[Binop::BindingIndex];
2832

29-
operation_t<params_t> func;
33+
operation_t<params_t<typename Binop::base_t, device_capabilities> > func;
3034
// [unroll]
3135
for (uint32_t i = 0; i < NUM_LOOPS; i++)
3236
value = func(value);
@@ -36,13 +40,14 @@ static void subbench(NBL_CONST_REF_ARG(type_t) sourceVal)
3640

3741
void benchmark()
3842
{
43+
const uint32_t invocationIndex = globalIndex();
3944
type_t sourceVal;
4045
Xoroshiro64Star xoroshiro = Xoroshiro64Star::construct(uint32_t2(invocationIndex,invocationIndex+1));
4146
[unroll]
42-
for (uint16_t i = 0; i < Config::ItemsPerInvocation_0; i++)
47+
for (uint16_t i = 0; i < ItemsPerInvocation; i++)
4348
sourceVal[i] = xoroshiro();
4449

45-
subbench<arithmetic::plus<uint32_t>, ITEMS_PER_INVOCATION>(sourceVal);
50+
subbench<arithmetic::plus<uint32_t> >(sourceVal);
4651
}
4752

4853
[numthreads(WORKGROUP_SIZE,1,1)]

29_Arithmetic2Bench/app_resources/benchmarkWorkgroup.comp.hlsl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@
66
#include "nbl/builtin/hlsl/workgroup2/arithmetic.hlsl"
77
#include "nbl/builtin/hlsl/random/xoroshiro.hlsl"
88

9-
static const uint32_t WORKGROUP_SIZE = 1u << WORKGROUP_SIZE_LOG2;
9+
using config_t = WORKGROUP_CONFIG_T;
1010

1111
#include "shaderCommon.hlsl"
1212

13-
using config_t = workgroup2::ArithmeticConfiguration<WORKGROUP_SIZE_LOG2, SUBGROUP_SIZE_LOG2, ITEMS_PER_INVOCATION>;
14-
1513
typedef vector<uint32_t, config_t::ItemsPerInvocation_0> type_t;
1614

1715
// final (level 1/2) scan needs to fit in one subgroup exactly
@@ -119,7 +117,7 @@ void benchmark()
119117
}
120118

121119

122-
[numthreads(WORKGROUP_SIZE,1,1)]
120+
[numthreads(config_t::WorkgroupSize,1,1)]
123121
void main()
124122
{
125123
benchmark();

29_Arithmetic2Bench/app_resources/shaderCommon.hlsl

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,6 @@
33
using namespace nbl;
44
using namespace hlsl;
55

6-
// https://github.com/microsoft/DirectXShaderCompiler/issues/6144
7-
uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() {return uint32_t3(WORKGROUP_SIZE,1,1);}
8-
9-
#ifndef ITEMS_PER_INVOCATION
10-
#error "Define ITEMS_PER_INVOCATION!"
11-
#endif
12-
136
[[vk::push_constant]] PushConstantData pc;
147

158
struct device_capabilities
@@ -25,10 +18,6 @@ struct device_capabilities
2518
#error "Define OPERATION!"
2619
#endif
2720

28-
#ifndef SUBGROUP_SIZE_LOG2
29-
#error "Define SUBGROUP_SIZE_LOG2!"
30-
#endif
31-
3221
#ifndef NUM_LOOPS
3322
#error "Define NUM_LOOPS!"
3423
#endif

29_Arithmetic2Bench/main.cpp

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp"
44
#include "app_resources/common.hlsl"
55
#include "nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl"
6+
#include "nbl/builtin/hlsl/subgroup2/arithmetic_params.hlsl"
67

78
using namespace nbl;
89
using namespace core;
@@ -549,55 +550,50 @@ class ArithmeticBenchApp final : public examples::SimpleWindowedApplication, pub
549550
smart_refctd_ptr<ICPUShader> overriddenUnspecialized;
550551
if constexpr (WorkgroupBench)
551552
{
552-
const std::string definitions[7] = {
553+
const std::string definitions[4] = {
553554
"workgroup2::" + arith_name,
554-
std::to_string(workgroupSizeLog2),
555-
std::to_string(itemsPerWG),
556-
std::to_string(itemsPerInvoc),
557-
std::to_string(subgroupSizeLog2),
555+
wgConfig.getConfigTemplateStructString(),
558556
std::to_string(numLoops),
559557
std::to_string(arith_name=="reduction")
560558
};
561559

562-
const IShaderCompiler::SMacroDefinition defines[8] = {
560+
const IShaderCompiler::SMacroDefinition defines[5] = {
563561
{ "OPERATION", definitions[0] },
564-
{ "WORKGROUP_SIZE_LOG2", definitions[1] },
565-
{ "ITEMS_PER_WG", definitions[2] },
566-
{ "ITEMS_PER_INVOCATION", definitions[3] },
567-
{ "SUBGROUP_SIZE_LOG2", definitions[4] },
568-
{ "NUM_LOOPS", definitions[5] },
569-
{ "IS_REDUCTION", definitions[6] },
562+
{ "WORKGROUP_CONFIG_T", definitions[1] },
563+
{ "NUM_LOOPS", definitions[2] },
564+
{ "IS_REDUCTION", definitions[3] },
570565
{ "TEST_NATIVE", "1" }
571566
};
572567
if (UseNativeArithmetic)
573-
options.preprocessorOptions.extraDefines = { defines, defines + 8 };
568+
options.preprocessorOptions.extraDefines = { defines, defines + 5 };
574569
else
575-
options.preprocessorOptions.extraDefines = { defines, defines + 7 };
570+
options.preprocessorOptions.extraDefines = { defines, defines + 4 };
576571

577572
overriddenUnspecialized = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options);
578573
}
579574
else
580575
{
581-
const std::string definitions[5] = {
576+
hlsl::subgroup2::SArithmeticParams sgParams;
577+
sgParams.init(subgroupSizeLog2, itemsPerInvoc);
578+
579+
const std::string definitions[4] = {
582580
"subgroup2::" + arith_name,
583581
std::to_string(workgroupSize),
584-
std::to_string(itemsPerInvoc),
585-
std::to_string(subgroupSizeLog2),
582+
sgParams.getParamTemplateStructString(),
586583
std::to_string(numLoops)
587584
};
588585

589-
const IShaderCompiler::SMacroDefinition defines[6] = {
586+
const IShaderCompiler::SMacroDefinition defines[5] = {
590587
{ "OPERATION", definitions[0] },
591588
{ "WORKGROUP_SIZE", definitions[1] },
592-
{ "ITEMS_PER_INVOCATION", definitions[2] },
593-
{ "SUBGROUP_SIZE_LOG2", definitions[3] },
594-
{ "NUM_LOOPS", definitions[4] },
589+
{ "SUBGROUP_CONFIG_T", definitions[2] },
590+
{ "NUM_LOOPS", definitions[3] },
595591
{ "TEST_NATIVE", "1" }
596592
};
597593
if (UseNativeArithmetic)
598-
options.preprocessorOptions.extraDefines = { defines, defines + 6 };
599-
else
600594
options.preprocessorOptions.extraDefines = { defines, defines + 5 };
595+
else
596+
options.preprocessorOptions.extraDefines = { defines, defines + 4 };
601597

602598
overriddenUnspecialized = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options);
603599
}

0 commit comments

Comments
 (0)