Skip to content

Commit b77d111

Browse files
authored
HIP: add GGML_HIP_MMQ_MFMA option to allow disableing the MFMA path. (ggml-org#14930)
This is useful for testing for regressions on GCN with CDNA hardware. With GGML_HIP_MMQ_MFMA=Off and GGML_CUDA_FORCE_MMQ=On we can conveniently test the GCN code path on CDNA. As CDNA is just GCN renamed with MFMA added and limited use ACC registers, this provides a good alternative for regression testing when GCN hardware is not available.
1 parent c7aa136 commit b77d111

File tree

3 files changed

+11
-2
lines changed

3 files changed

+11
-2
lines changed

ggml/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental,
174174
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
175175
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
176176
option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
177+
option(GGML_HIP_MMQ_MFMA "ggml: enable MFMA MMA for CDNA in MMQ" ON)
177178
option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
178179
option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
179180
option(GGML_VULKAN "ggml: use Vulkan" OFF)

ggml/src/ggml-cuda/common.cuh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ typedef float2 dfloat2;
227227
#define FP16_MMA_AVAILABLE
228228
#endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || (defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined(RDNA4)))
229229

230-
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
230+
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3) && !defined(GGML_HIP_NO_MMQ_MFMA)
231231
#define AMD_MFMA_AVAILABLE
232232
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
233233

@@ -295,7 +295,11 @@ static bool fp32_mma_hardware_available(const int cc) {
295295

296296
// AMD CDNA3 matrix cores.. Will add support for other CDNA generations later.
297297
static bool amd_mfma_available(const int cc) {
298-
return cc >= GGML_CUDA_CC_OFFSET_AMD && GGML_CUDA_CC_IS_CDNA3(cc);
298+
#if !defined(GGML_HIP_NO_MMQ_MFMA)
299+
return GGML_CUDA_CC_IS_CDNA3(cc);
300+
#else
301+
return false;
302+
#endif //!defined(GGML_HIP_NO_MMQ_MFMA)
299303
}
300304

301305
// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.

ggml/src/ggml-hip/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ if (GGML_HIP_ROCWMMA_FATTN)
113113
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN)
114114
endif()
115115

116+
if (NOT GGML_HIP_MMQ_MFMA)
117+
add_compile_definitions(GGML_HIP_NO_MMQ_MFMA)
118+
endif()
119+
116120
if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 OR ${hip_VERSION} VERSION_GREATER_EQUAL 7.0)
117121
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12)
118122
endif()

0 commit comments

Comments
 (0)