Skip to content

clFFT 8000-point FFT kernel compilation failure on Raspberry Pi compute module 5(RPi5 CM5) using V3D/CLVK #253

@atharvag1

Description

@atharvag1

When running an 8000-point FFT on a Raspberry Pi CM5 (V3D GPU using CLVK OpenCL runtime), the clFFT library fails during kernel compilation.
The host setup and clFFT API calls work correctly, but during the clfftBakePlan step or first transform execution, the following error occurs:

Running clFFT on device: V3D 7.1.10.2
error: 601: Expected input to have different bit width from Result Type: UConvert
  %309 = OpUConvert %uint %308

Following warnings are generated when running the code.

                      BUILD LOG
************************************************
clvk-uqMihz/source.cl:18:19: warning: no previous extern declaration for non-static variable 'twiddles'
  18 | __constant float2 twiddles[124] = {
     |                   ^
clvk-uqMihz/source.cl:18:12: note: declare 'static' if the variable is not intended to be used outside of this translation unit
  18 | __constant float2 twiddles[124] = {
     |            ^
clvk-uqMihz/source.cl:157:1: warning: no previous prototype for function 'FwdRad5B1'
 157 | FwdRad5B1(float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     | ^
clvk-uqMihz/source.cl:156:32: note: declare 'static' if the function is not intended to be used outside of this translation unit
 156 | __attribute__((always_inline)) void
     |                                ^
     |                                static
clvk-uqMihz/source.cl:183:1: warning: no previous prototype for function 'InvRad5B1'
 183 | InvRad5B1(float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     | ^
clvk-uqMihz/source.cl:182:32: note: declare 'static' if the function is not intended to be used outside of this translation unit
 182 | __attribute__((always_inline)) void
     |                                ^
     |                                static
clvk-uqMihz/source.cl:209:24: warning: unused parameter 'b'
 209 | FwdPass0(uint rw, uint b, uint me, uint inOffset, uint outOffset, __global float2 *bufIn, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                        ^
clvk-uqMihz/source.cl:209:1: warning: no previous prototype for function 'FwdPass0'
 209 | FwdPass0(uint rw, uint b, uint me, uint inOffset, uint outOffset, __global float2 *bufIn, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     | ^
clvk-uqMihz/source.cl:208:32: note: declare 'static' if the function is not intended to be used outside of this translation unit
 208 | __attribute__((always_inline)) void
     |                                ^
     |                                static
clvk-uqMihz/source.cl:287:24: warning: unused parameter 'b'
 287 | FwdPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                        ^
clvk-uqMihz/source.cl:287:41: warning: unused parameter 'inOffset'
 287 | FwdPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                         ^
clvk-uqMihz/source.cl:287:82: warning: unused parameter 'bufInRe'
 287 | FwdPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                                                                  ^
clvk-uqMihz/source.cl:287:106: warning: unused parameter 'bufInIm'
 287 | FwdPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                                                                                          ^
clvk-uqMihz/source.cl:287:1: warning: no previous prototype for function 'FwdPass1'
 287 | FwdPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     | ^
clvk-uqMihz/source.cl:286:32: note: declare 'static' if the function is not intended to be used outside of this translation unit
 286 | __attribute__((always_inline)) void
     |                                ^
     |                                static
clvk-uqMihz/source.cl:383:24: warning: unused parameter 'b'
 383 | FwdPass2(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                        ^
clvk-uqMihz/source.cl:383:41: warning: unused parameter 'inOffset'
 383 | FwdPass2(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                         ^
clvk-uqMihz/source.cl:383:82: warning: unused parameter 'bufInRe'
 383 | FwdPass2(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                                                                  ^
clvk-uqMihz/source.cl:383:106: warning: unused parameter 'bufInIm'
 383 | FwdPass2(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                                                                                          ^
clvk-uqMihz/source.cl:383:1: warning: no previous prototype for function 'FwdPass2'
 383 | FwdPass2(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     | ^
clvk-uqMihz/source.cl:382:32: note: declare 'static' if the function is not intended to be used outside of this translation unit
 382 | __attribute__((always_inline)) void
     |                                ^
     |                                static
clvk-uqMihz/source.cl:440:24: warning: unused parameter 'b'
 440 | InvPass0(uint rw, uint b, uint me, uint inOffset, uint outOffset, __global float2 *bufIn, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                        ^
clvk-uqMihz/source.cl:440:1: warning: no previous prototype for function 'InvPass0'
 440 | InvPass0(uint rw, uint b, uint me, uint inOffset, uint outOffset, __global float2 *bufIn, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     | ^
clvk-uqMihz/source.cl:439:32: note: declare 'static' if the function is not intended to be used outside of this translation unit
 439 | __attribute__((always_inline)) void
     |                                ^
     |                                static
clvk-uqMihz/source.cl:518:24: warning: unused parameter 'b'
 518 | InvPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                        ^
clvk-uqMihz/source.cl:518:41: warning: unused parameter 'inOffset'
 518 | InvPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                         ^
clvk-uqMihz/source.cl:518:82: warning: unused parameter 'bufInRe'
 518 | InvPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                                                                  ^
clvk-uqMihz/source.cl:518:106: warning: unused parameter 'bufInIm'
 518 | InvPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                                                                                          ^
clvk-uqMihz/source.cl:518:1: warning: no previous prototype for function 'InvPass1'
 518 | InvPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     | ^
clvk-uqMihz/source.cl:517:32: note: declare 'static' if the function is not intended to be used outside of this translation unit
 517 | __attribute__((always_inline)) void
     |                                ^
     |                                static
clvk-uqMihz/source.cl:614:24: warning: unused parameter 'b'
 614 | InvPass2(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                        ^
clvk-uqMihz/source.cl:614:41: warning: unused parameter 'inOffset'
 614 | InvPass2(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                         ^
clvk-uqMihz/source.cl:614:82: warning: unused parameter 'bufInRe'
 614 | InvPass2(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                                                                  ^
clvk-uqMihz/source.cl:614:106: warning: unused parameter 'bufInIm'
 614 | InvPass2(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     |                                                                                                          ^
clvk-uqMihz/source.cl:614:1: warning: no previous prototype for function 'InvPass2'
 614 | InvPass2(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4)
     | ^
clvk-uqMihz/source.cl:613:32: note: declare 'static' if the function is not intended to be used outside of this translation unit
 613 | __attribute__((always_inline)) void
     |                                ^
     |                                static
clvk-uqMihz/source.cl:673:49: warning: unknown attribute 'max_constant_size' ignored
 673 | void fft_fwd(__constant cb_t *cb __attribute__((max_constant_size(32))), __global const float2 * restrict gbIn, __global float2 * restrict gbOut)
     |                                                 ^~~~~~~~~~~~~~~~~~~~~
clvk-uqMihz/source.cl:693:8: warning: assigning to '__global float2 *__private' from 'const __global float2 *' discards qualifiers
 693 |         lwbIn = gbIn + iOffset;
     |               ^ ~~~~~~~~~~~~~~
clvk-uqMihz/source.cl:702:50: warning: unknown attribute 'max_constant_size' ignored
 702 | void fft_back(__constant cb_t *cb __attribute__((max_constant_size(32))), __global const float2 * restrict gbIn, __global float2 * restrict gbOut)
     |                                                  ^~~~~~~~~~~~~~~~~~~~~
clvk-uqMihz/source.cl:722:8: warning: assigning to '__global float2 *__private' from 'const __global float2 *' discards qualifiers
 722 |         lwbIn = gbIn + iOffset;
     |               ^ ~~~~~~~~~~~~~~
clvk-uqMihz/source.cl:146:9: warning: macro is not used
 146 | #define fptype float
     |         ^
clvk-uqMihz/source.cl:154:9: warning: macro is not used
 154 | #define C5QE 0.80901699437494742410229341718282f
     |         ^

************************************************
FFTGeneratedStockhamAction::compileKernels failed

Device Info

  • Device: Raspberry Pi CM5 (VideoCore VII / V3D GPU)
  • Driver: V3D 7.1.10.2
  • OpenCL Runtime: CLVK
  • clFFT from the master branch

When I set the problem size to the next nearest power of 2, (8192) clFFT generates correct output.
Additionally, when I run my code on NVIDIA RTX 3090, it generates expected outputs for both 8000 and 8192 points.
I have attached my code for your reference.
fft8k.zip

I understand this issue could be out of scope for this repository if the error is due to the limitations of CLVK and/or RPI CM5. I would appreciate if you can let me know a workaround for this issue to get FFT 8000 running with clFFT.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions