From 7bc22f9a02b14f1126f352419ead5686ffd6bc2d Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sun, 13 Apr 2025 03:45:19 +0200 Subject: [PATCH 1/2] [mlir][amdgpu] Add `amdgpu.swizzle_bitmode` op High level wrapper on top of `rocdl.ds_swizzle`. Also some DPP op cleanup while I'm at here. --- mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 43 +++++++++++++++---- mlir/test/Dialect/AMDGPU/ops.mlir | 7 +++ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index 108d7237ff703..a92ebf6d8e108 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -35,6 +35,11 @@ def AMDGPU_Dialect : Dialect { let useDefaultAttributePrinterParser = 1; } +def AnyIntegerOrFloat : AnyTypeOf<[AnySignlessInteger, AnyFloat], "Integer or Float">; + +def AnyIntegerOrFloatOr1DVector : + AnyTypeOf<[AnyIntegerOrFloat, VectorOfRankAndType<[1], [AnyIntegerOrFloat]>]>; + //===----------------------------------------------------------------------===// // AMDGPU general attribute definitions //===----------------------------------------------------------------------===// @@ -533,14 +538,15 @@ def AMDGPU_DPPPerm : I32EnumAttr<"DPPPerm", def AMDGPU_DPPPermAttr : EnumAttr; -def AMDGPU_DPPOp : AMDGPU_Op<"dpp", [SameTypeOperands, AllTypesMatch<["result", "old", "src"]>]>, +def AMDGPU_DPPOp : AMDGPU_Op<"dpp", + [Pure, SameTypeOperands, AllTypesMatch<["result", "old", "src"]>]>, Arguments<(ins AnyType:$old, - AnyType:$src, - AMDGPU_DPPPermAttr:$kind, - OptionalAttr>:$permArgument, - DefaultValuedAttr:$row_mask, - DefaultValuedAttr:$bank_mask, - DefaultValuedAttr:$bound_ctrl)> { + AnyType:$src, + AMDGPU_DPPPermAttr:$kind, + OptionalAttr>:$permArgument, + DefaultValuedAttr:$row_mask, + DefaultValuedAttr:$bank_mask, + DefaultValuedAttr:$bound_ctrl)> { let summary = "AMDGPU DPP operation"; let description = [{ This operation represents DPP functionality in a GPU program. @@ -565,6 +571,27 @@ def AMDGPU_DPPOp : AMDGPU_Op<"dpp", [SameTypeOperands, AllTypesMatch<["result", let hasVerifier = 1; } +def AMDGPU_SwizzleBitModeOp : AMDGPU_Op<"swizzle_bitmode", + [Pure, AllTypesMatch<["result", "src"]>]>, + Arguments<(ins AnyIntegerOrFloatOr1DVector:$src, + I32Attr:$and_mask, + I32Attr:$or_mask, + I32Attr:$xor_mask + )> { + let summary = "AMDGPU ds_swizzle op, bitmode variant"; + let description = [{ + High-level wrapper on bitmode `rocdl.ds_swizzle` op, masks are represented + as separate fields so user won't need to do manual bitpacking. + + Supports arbitrary int/float/vector types, which will be repacked to i32 and + one or more `rocdl.ds_swizzle` ops during lowering. + }]; + let results = (outs AnyIntegerOrFloatOr1DVector:$result); + let assemblyFormat = [{ + $src $and_mask $or_mask $xor_mask attr-dict `:` type($result) + }]; +} + def AMDGPU_LDSBarrierOp : AMDGPU_Op<"lds_barrier"> { let summary = "Barrier that includes a wait for LDS memory operations."; let description = [{ @@ -794,7 +821,7 @@ def AMDGPU_GatherToLDSOp : The `$dst`, along with its indices, points to the memory location the subgroup of this thread will write to. - + Note: only enabled for gfx942 and later. }]; let assemblyFormat = [{ diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir index 665674f2a7873..16b3193d270cb 100644 --- a/mlir/test/Dialect/AMDGPU/ops.mlir +++ b/mlir/test/Dialect/AMDGPU/ops.mlir @@ -157,3 +157,10 @@ func.func @wmma(%arg0 : vector<16xf16>, %arg1 : vector<8xf16>) -> vector<8xf16> %0 = amdgpu.wmma %arg0 * %arg0 + %arg1 : vector<16xf16>, vector<16xf16>, vector<8xf16> func.return %0 : vector<8xf16> } + +// CHECK-LABEL: func @swizzle_bitmode +func.func @swizzle_bitmode(%arg0 : f32) -> f32 { + // CHECK: amdgpu.swizzle_bitmode + %0 = amdgpu.swizzle_bitmode %arg0 1 2 4 : f32 + func.return %0 : f32 +} From 4b5e10b821fc06ef9feb30c7c7b1c90033272ec1 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Thu, 17 Apr 2025 23:49:11 +0200 Subject: [PATCH 2/2] invalid type test --- mlir/test/Dialect/AMDGPU/invalid.mlir | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir index 74a421f6dd50f..40f98ff85688c 100644 --- a/mlir/test/Dialect/AMDGPU/invalid.mlir +++ b/mlir/test/Dialect/AMDGPU/invalid.mlir @@ -150,3 +150,11 @@ func.func @fat_raw_buffer_cast_stripping_offset_affine_map(%m: memref<8xi32, aff %ret = amdgpu.fat_raw_buffer_cast %m resetOffset : memref<8xi32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<8xi32, #amdgpu.address_space> func.return %ret : memref<8xi32, #amdgpu.address_space> } + +// ----- + +func.func @swizzle_invalid_type(%arg0 : si32) -> si32 { + // expected-error@+1 {{amdgpu.swizzle_bitmode' op operand #0 must be Integer or Float or vector of Integer or Float values of ranks 1}} + %0 = amdgpu.swizzle_bitmode %arg0 1 2 4 : si32 + func.return %0 : si32 +}