Skip to content

Commit faee1a6

Browse files
committed
add simd equal
1 parent e9ba34a commit faee1a6

File tree

1 file changed

+34
-26
lines changed

1 file changed

+34
-26
lines changed

src/impl.cpp

+34-26
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ namespace {
4646
uint32_t g_installedHooks = 0U;
4747
}
4848

49+
inline bool simd_equal(const std::array<uint32_t, 4>& arr1, const uint32_t* ptr) {
50+
const __m128i v1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(arr1.data()));
51+
const __m128i v2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr));
52+
53+
const __m128i cmp = _mm_cmpeq_epi32(v1, v2);
54+
55+
return (_mm_movemask_epi8(cmp) == 0xFFFF);
56+
}
4957
DeviceProcs g_deviceProcs;
5058

5159
constexpr uint32_t HOOK_DEVICE = (1U << 0U);
@@ -88,7 +96,7 @@ HRESULT STDMETHODCALLTYPE ID3D11Device_CreateVertexShader(
8896

8997
const auto* hash = std::bit_cast<const uint32_t*>(std::bit_cast<const uint8_t*>(pShaderBytecode) + 4);
9098

91-
if (std::equal(ParticleShader1.begin(), ParticleShader1.end(), hash)) {
99+
if (simd_equal(ParticleShader1, hash)) {
92100
if (!Particle1B) {
93101
Particle1B = true;
94102
log("Particle found");
@@ -99,7 +107,7 @@ HRESULT STDMETHODCALLTYPE ID3D11Device_CreateVertexShader(
99107
return procs->CreateVertexShader(pDevice, pShaderBytecode, BytecodeLength, pClassLinkage, ppVertexShader);
100108
}
101109

102-
} else if (std::equal(ParticleShader2.begin(), ParticleShader2.end(), hash)) {
110+
} else if (simd_equal(ParticleShader2, hash)) {
103111

104112
if (!Particle2B) {
105113
Particle2B = true;
@@ -111,39 +119,39 @@ HRESULT STDMETHODCALLTYPE ID3D11Device_CreateVertexShader(
111119
return procs->CreateVertexShader(pDevice, pShaderBytecode, BytecodeLength, pClassLinkage, ppVertexShader);
112120
}
113121
}
114-
else if (std::equal(VolumeFogShader.begin(), VolumeFogShader.end(), hash) && (QualityVal < 2)) {
122+
else if (simd_equal(VolumeFogShader, hash) && (QualityVal < 2)) {
115123

116124
if (!VolumeFogB) {
117125
VolumeFogB = true;
118126
log("Volumefog found");
119127
}
120128
return procs->CreateVertexShader(pDevice, NO_VOLUMEFOG_SHADER.data(), NO_VOLUMEFOG_SHADER.size(), pClassLinkage, ppVertexShader);
121129

122-
} else if (std::equal(GrassShader.begin(), GrassShader.end(), hash) && (QualityVal < 2)) {
130+
} else if (simd_equal(GrassShader, hash) && (QualityVal < 2)) {
123131

124132
if (!GrassB) {
125133
GrassB = true;
126134
log("Grass found");
127135
}
128136
return procs->CreateVertexShader(pDevice, SIMPLIFIED_VS_GRASS_SHADER.data(), SIMPLIFIED_VS_GRASS_SHADER.size(), pClassLinkage, ppVertexShader);
129137

130-
} else if (std::equal(ShadowPlayerShader.begin(), ShadowPlayerShader.end(), hash) && (QualityVal < 2)) {
138+
} else if (simd_equal(ShadowPlayerShader, hash) && (QualityVal < 2)) {
131139

132140
if (!ShadowPlayerB) {
133141
ShadowPlayerB = true;
134142
log("Shadow Player found");
135143
}
136144
return procs->CreateVertexShader(pDevice, FIXED_PLAYER_SHADOW_SHADER.data(), FIXED_PLAYER_SHADOW_SHADER.size(), pClassLinkage, ppVertexShader);
137145

138-
} else if (std::equal(ShadowPropShader.begin(), ShadowPropShader.end(), hash) && (QualityVal < 2)) {
146+
} else if (simd_equal(ShadowPropShader, hash) && (QualityVal < 2)) {
139147

140148
if (!ShadowPropB) {
141149
ShadowPropB = true;
142150
log("Shadow Prop found");
143151
}
144152
return procs->CreateVertexShader(pDevice, FIXED_PROP_SHADOW_SHADER.data(), FIXED_PROP_SHADOW_SHADER.size(), pClassLinkage, ppVertexShader);
145153
}
146-
else if (std::equal(TerrainShader.begin(), TerrainShader.end(), hash) && (QualityVal == 2)) {
154+
else if (simd_equal(TerrainShader, hash) && (QualityVal == 2)) {
147155

148156
if (!TerrainB) {
149157
TerrainB = true;
@@ -152,7 +160,7 @@ HRESULT STDMETHODCALLTYPE ID3D11Device_CreateVertexShader(
152160
return procs->CreateVertexShader(pDevice, LOW_VS_TERRAIN_SHADER.data(), LOW_VS_TERRAIN_SHADER.size(), pClassLinkage, ppVertexShader);
153161

154162
}
155-
else if (std::equal(PlayerShader.begin(), PlayerShader.end(), hash) && (TextureVal == 0)) {
163+
else if (simd_equal(PlayerShader, hash) && (TextureVal == 0)) {
156164

157165
if (!VSPlayerB) {
158166
VSPlayerB = true;
@@ -161,7 +169,7 @@ HRESULT STDMETHODCALLTYPE ID3D11Device_CreateVertexShader(
161169
return procs->CreateVertexShader(pDevice, SIMPLIFIED_VS_PLAYER_SHADER.data(), SIMPLIFIED_VS_PLAYER_SHADER.size(), pClassLinkage, ppVertexShader);
162170

163171
}
164-
else if (std::equal(DefaultShader.begin(), DefaultShader.end(), hash) && (QualityVal == 2)) {
172+
else if (simd_equal(DefaultShader, hash) && (QualityVal == 2)) {
165173

166174
if (!DefaultB) {
167175
DefaultB = true;
@@ -170,14 +178,14 @@ HRESULT STDMETHODCALLTYPE ID3D11Device_CreateVertexShader(
170178
return procs->CreateVertexShader(pDevice, SIMPLIFIED_VS_DEFAULT_SHADER.data(), SIMPLIFIED_VS_DEFAULT_SHADER.size(), pClassLinkage, ppVertexShader);
171179

172180
}
173-
else if (std::equal(SkyBoxShader.begin(), SkyBoxShader.end(), hash)) {
181+
else if (simd_equal(SkyBoxShader, hash)) {
174182
if (!SkyBoxB) {
175183
SkyBoxB = true;
176184
log("SkyBox found");
177185
}
178186
return procs->CreateVertexShader(pDevice, VS_SKYBOX.data(), VS_SKYBOX.size(), pClassLinkage, ppVertexShader);
179187
}
180-
else if (std::equal(SkyBoxAniShader.begin(), SkyBoxAniShader.end(), hash)) {
188+
else if (simd_equal(SkyBoxAniShader, hash)) {
181189
if (!SkyBoxAniB) {
182190
SkyBoxAniB = true;
183191
log("SkyBox Ani found");
@@ -220,7 +228,7 @@ HRESULT STDMETHODCALLTYPE ID3D11Device_CreatePixelShader(
220228
const auto* hash = std::bit_cast<const uint32_t*>(std::bit_cast<const uint8_t*>(pShaderBytecode) + 4);
221229

222230

223-
if (std::equal(TexShader.begin(), TexShader.end(), hash)) {
231+
if (simd_equal(TexShader, hash)) {
224232

225233
if (!DiffVolTexB) {
226234
DiffVolTexB = true;
@@ -229,83 +237,83 @@ HRESULT STDMETHODCALLTYPE ID3D11Device_CreatePixelShader(
229237
return procs->CreatePixelShader(pDevice, SIMPLIFIED_TEX_SHADER.data(), SIMPLIFIED_TEX_SHADER.size(), pClassLinkage, ppPixelShader);
230238

231239
}
232-
else if (std::equal(RadialShader.begin(), RadialShader.end(), hash) && (QualityVal < 2)) {
240+
else if (simd_equal(RadialShader, hash) && (QualityVal < 2)) {
233241

234242
if (!RadialBlurB) {
235243
RadialBlurB = true;
236244
log("RadialBlur found");
237245
}
238246
return procs->CreatePixelShader(pDevice, NO_RADIALBLUR_SHADER.data(), NO_RADIALBLUR_SHADER.size(), pClassLinkage, ppPixelShader);
239247

240-
} else if (std::equal(GrassShader.begin(), GrassShader.end(), hash) && (QualityVal < 2)) {
248+
} else if (simd_equal(GrassShader, hash) && (QualityVal < 2)) {
241249

242250
return procs->CreatePixelShader(pDevice, SIMPLIFIED_FS_GRASS_SHADER.data(), SIMPLIFIED_FS_GRASS_SHADER.size(), pClassLinkage, ppPixelShader);
243251

244-
} else if (std::equal(ShadowShader.begin(), ShadowShader.end(), hash) && (TextureVal == 0)) {
252+
} else if (simd_equal(ShadowShader, hash) && (TextureVal == 0)) {
245253

246254
if (!FragmentShadowB) {
247255
FragmentShadowB = true;
248256
log("Fragment Shadow found");
249257
}
250258
return procs->CreatePixelShader(pDevice, SIMPLIFIED_FS_SHADOW_SHADER.data(), SIMPLIFIED_FS_SHADOW_SHADER.size(), pClassLinkage, ppPixelShader);
251259
}
252-
else if (std::equal(SphericalShader.begin(), SphericalShader.end(), hash) && (QualityVal < 2)) {
260+
else if (simd_equal(SphericalShader, hash) && (QualityVal < 2)) {
253261
if (!SphericalB) {
254262
SphericalB = true;
255263
log("Spherical Map found");
256264
}
257265
return procs->CreatePixelShader(pDevice, SIMPLIFIED_FS_HIGH_SPHERICAL_SHADER.data(), SIMPLIFIED_FS_HIGH_SPHERICAL_SHADER.size(), pClassLinkage, ppPixelShader);
258266
}
259-
else if (std::equal(TerrainShader.begin(), TerrainShader.end(), hash) && (QualityVal == 2)) {
267+
else if (simd_equal(TerrainShader, hash) && (QualityVal == 2)) {
260268

261269
return procs->CreatePixelShader(pDevice, LOW_FS_TERRAIN_SHADER.data(), LOW_FS_TERRAIN_SHADER.size(), pClassLinkage, ppPixelShader);
262270
}
263-
else if (std::equal(DefaultShader.begin(), DefaultShader.end(), hash) && (QualityVal == 2)) {
271+
else if (simd_equal(DefaultShader, hash) && (QualityVal == 2)) {
264272

265273
return procs->CreatePixelShader(pDevice, SIMPLIFIED_FS_DEFAULT_SHADER.data(), SIMPLIFIED_FS_DEFAULT_SHADER.size(), pClassLinkage, ppPixelShader);
266274
}
267-
else if (std::equal(SphericalShader.begin(), SphericalShader.end(), hash) && (QualityVal == 2)) {
275+
else if (simd_equal(SphericalShader, hash) && (QualityVal == 2)) {
268276
if (!SphericalB) {
269277
SphericalB = true;
270278
log("Spherical Map found");
271279
}
272280
return procs->CreatePixelShader(pDevice, SIMPLIFIED_FS_LOW_SPHERICAL_SHADER.data(), SIMPLIFIED_FS_LOW_SPHERICAL_SHADER.size(), pClassLinkage, ppPixelShader);
273281
}
274-
else if (std::equal(PlayerHairShader.begin(), PlayerHairShader.end(), hash) && (QualityVal == 2)) {
282+
else if (simd_equal(PlayerHairShader, hash) && (QualityVal == 2)) {
275283
if (!PlayerHairB) {
276284
PlayerHairB = true;
277285
log("Player Hair found");
278286
}
279287
return procs->CreatePixelShader(pDevice, SIMPLIFIED_FS_HAIR_PLAYER_SHADER.data(), SIMPLIFIED_FS_HAIR_PLAYER_SHADER.size(), pClassLinkage, ppPixelShader);
280288
}
281-
else if (std::equal(PlayerFaceShader.begin(), PlayerFaceShader.end(), hash) && (QualityVal == 2)) {
289+
else if (simd_equal(PlayerFaceShader, hash) && (QualityVal == 2)) {
282290
if (!PlayerFaceB) {
283291
PlayerFaceB = true;
284292
log("Player Face found");
285293
}
286294
return procs->CreatePixelShader(pDevice, SIMPLIFIED_FS_FACE_PLAYER_SHADER.data(), SIMPLIFIED_FS_FACE_PLAYER_SHADER.size(), pClassLinkage, ppPixelShader);
287295
}
288-
else if (std::equal(PlayerCostumeShader.begin(), PlayerCostumeShader.end(), hash) && (QualityVal == 2)) {
296+
else if (simd_equal(PlayerCostumeShader, hash) && (QualityVal == 2)) {
289297
if (!PlayerBodyB) {
290298
PlayerBodyB = true;
291299
log("Player Body found");
292300
}
293301
return procs->CreatePixelShader(pDevice, SIMPLIFIED_FS_COSTUME_PLAYER_SHADER.data(), SIMPLIFIED_FS_COSTUME_PLAYER_SHADER.size(), pClassLinkage, ppPixelShader);
294302
}
295-
else if (std::equal(SkyBoxShader.begin(), SkyBoxShader.end(), hash)) {
303+
else if (simd_equal(SkyBoxShader, hash)) {
296304
return procs->CreatePixelShader(pDevice, FS_SKYBOX.data(), FS_SKYBOX.size(), pClassLinkage, ppPixelShader);
297305
}
298-
else if (std::equal(SkyBoxAniShader.begin(), SkyBoxAniShader.end(), hash)) {
306+
else if (simd_equal(SkyBoxAniShader, hash)) {
299307
return procs->CreatePixelShader(pDevice, FS_SKYBOX_ANI.data(), FS_SKYBOX_ANI.size(), pClassLinkage, ppPixelShader);
300308
}
301-
else if (std::equal(DiffSphericShader.begin(), DiffSphericShader.end(), hash) && (QualityVal == 2)) {
309+
else if (simd_equal(DiffSphericShader, hash) && (QualityVal == 2)) {
302310
if (!DiffSphericB) {
303311
DiffSphericB = true;
304312
log("Diff Spheric found");
305313
}
306314
return procs->CreatePixelShader(pDevice, LOW_DIFFSPHERIC_SHADER.data(), LOW_DIFFSPHERIC_SHADER.size(), pClassLinkage, ppPixelShader);
307315
}
308-
else if (std::equal(DiffSphericShader.begin(), DiffSphericShader.end(), hash) && (QualityVal < 2)) {
316+
else if (simd_equal(DiffSphericShader, hash) && (QualityVal < 2)) {
309317
if (!DiffSphericB) {
310318
DiffSphericB = true;
311319
log("Diff Spheric found");

0 commit comments

Comments
 (0)