Skip to content

Commit 4c597d4

Browse files
authored
[mlir][xegpu] Support boundary checks only for block instructions (#119380)
Constrains Vector lowering to apply boundary checks only to data transfers operating on block shapes. This further aligns lowering with the current Xe instructions' restrictions.
1 parent 06789cc commit 4c597d4

File tree

5 files changed

+42
-7
lines changed

5 files changed

+42
-7
lines changed

mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp

+14-5
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ static LogicalResult transferPreconditions(PatternRewriter &rewriter,
8282
xferOp, "Buffer must be contiguous in the innermost dimension");
8383

8484
unsigned vecRank = vecTy.getRank();
85+
if (xferOp.hasOutOfBoundsDim() && vecRank < 2)
86+
return rewriter.notifyMatchFailure(
87+
xferOp, "Boundary check is available only for block instructions.");
88+
8589
AffineMap map = xferOp.getPermutationMap();
8690
if (!map.isProjectedPermutation(/*allowZeroInResults=*/false))
8791
return rewriter.notifyMatchFailure(xferOp, "Unsupported permutation map");
@@ -255,9 +259,12 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
255259
if (failed(storeLoadPreconditions(rewriter, loadOp, vecTy)))
256260
return failure();
257261

262+
// Boundary check is available only for block instructions.
263+
bool boundaryCheck = vecTy.getRank() > 1;
264+
258265
auto descType = xegpu::TensorDescType::get(
259266
vecTy.getShape(), vecTy.getElementType(), /*array_length=*/1,
260-
/*boundary_check=*/true, xegpu::MemorySpace::Global);
267+
boundaryCheck, xegpu::MemorySpace::Global);
261268
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
262269
rewriter, loc, descType, loadOp.getBase(), loadOp.getIndices());
263270

@@ -285,10 +292,12 @@ struct StoreLowering : public OpRewritePattern<vector::StoreOp> {
285292
if (failed(storeLoadPreconditions(rewriter, storeOp, vecTy)))
286293
return failure();
287294

288-
auto descType =
289-
xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
290-
/*array_length=*/1, /*boundary_check=*/true,
291-
xegpu::MemorySpace::Global);
295+
// Boundary check is available only for block instructions.
296+
bool boundaryCheck = vecTy.getRank() > 1;
297+
298+
auto descType = xegpu::TensorDescType::get(
299+
vecTy.getShape(), vecTy.getElementType(),
300+
/*array_length=*/1, boundaryCheck, xegpu::MemorySpace::Global);
292301
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
293302
rewriter, loc, descType, storeOp.getBase(), storeOp.getIndices());
294303

mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vecto
1212
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
1313
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
1414
// CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
15-
// CHECK-SAME: boundary_check = true
15+
// CHECK-SAME: boundary_check = false
1616
// CHECK: %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf32>
1717
// CHECK: return %[[VEC]]
1818

mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>,
1414
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
1515
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
1616
// CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
17-
// CHECK-SAME: boundary_check = true
17+
// CHECK-SAME: boundary_check = false
1818
// CHECK: xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf32>
1919

2020
// -----

mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir

+13
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,19 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>,
119119

120120
// -----
121121

122+
func.func @no_load_out_of_bounds_1D_vector(%source: memref<8x16x32xf32>,
123+
%offset: index) -> vector<8xf32> {
124+
%c0 = arith.constant 0.0 : f32
125+
%0 = vector.transfer_read %source[%offset, %offset, %offset], %c0
126+
{in_bounds = [false]} : memref<8x16x32xf32>, vector<8xf32>
127+
return %0 : vector<8xf32>
128+
}
129+
130+
// CHECK-LABEL: @no_load_out_of_bounds_1D_vector(
131+
// CHECK: vector.transfer_read
132+
133+
// -----
134+
122135
func.func @no_load_masked(%source : memref<4xf32>,
123136
%offset : index) -> vector<4xf32> {
124137
%c0 = arith.constant 0.0 : f32

mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir

+13
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,16 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>,
164164

165165
// CHECK-LABEL: @no_store_unsupported_map(
166166
// CHECK: vector.transfer_write
167+
168+
// -----
169+
170+
func.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>,
171+
%source: memref<8x16x32xf32>, %offset: index) {
172+
vector.transfer_write %vec, %source[%offset, %offset, %offset]
173+
{in_bounds = [false]}
174+
: vector<8xf32>, memref<8x16x32xf32>
175+
return
176+
}
177+
178+
// CHECK-LABEL: @no_store_out_of_bounds_1D_vector(
179+
// CHECK: vector.transfer_write

0 commit comments

Comments
 (0)