Skip to content

Commit 9f40a1e

Browse files
mmereckiigcbot
authored andcommitted
Fix getting scalars from InsertElement instructions in VectorPreProcess
Fix code that finds vector elements by walking chains of `InsertElement` instructions. The code was not prepared for overwriting of vector elements.
1 parent 098259e commit 9f40a1e

File tree

3 files changed

+116
-2
lines changed

3 files changed

+116
-2
lines changed

IGC/Compiler/CISACodeGen/VectorPreProcess.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,10 +1498,12 @@ void VectorPreProcess::getOrGenScalarValues(
14981498
{
14991499
bool genExtract = false;
15001500
Value* V = VecVal;
1501+
IGC_ASSERT(scalars.size() == nelts);
15011502
for (uint32_t i = 0; i < nelts; ++i)
15021503
{
15031504
scalars[i] = nullptr;
15041505
}
1506+
uint32_t numEltsFound = 0;
15051507
while (InsertElementInst * IEI = dyn_cast<InsertElementInst>(V))
15061508
{
15071509
Value* ixVal = IEI->getOperand(2);
@@ -1512,10 +1514,19 @@ void VectorPreProcess::getOrGenScalarValues(
15121514
break;
15131515
}
15141516
uint32_t ix = int_cast<unsigned int>(CI->getZExtValue());
1515-
scalars[ix] = IEI->getOperand(1);
1517+
if (scalars[ix] == nullptr)
1518+
{
1519+
scalars[ix] = IEI->getOperand(1);
1520+
++numEltsFound;
1521+
}
1522+
if (numEltsFound == nelts)
1523+
{
1524+
break;
1525+
}
15161526
V = IEI->getOperand(0);
15171527
}
1518-
if (!isa<UndefValue>(V))
1528+
// Generate extractelement instructions if not all elements were found.
1529+
if (!isa<UndefValue>(V) && numEltsFound != nelts)
15191530
{
15201531
genExtract = true;
15211532
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: igc_opt --typed-pointers %s -S -o - -ocl -inputocl -platformdg2 -igc-vectorpreprocess | FileCheck %s
10+
11+
; Test finding vector elements in insertelement instructions used to construct the vector.
12+
; Test that the last inserted value is used.
13+
14+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
15+
16+
define void @test_kernel(i8 addrspace(2490368)* %bso, i32 %bufferOffset, half %a, half %b, half %c, half %d, half %e)
17+
{
18+
entry:
19+
%vec0 = insertelement <5 x half> undef, half %a, i64 0
20+
%vec1 = insertelement <5 x half> %vec0, half %b, i64 1
21+
%vec2 = insertelement <5 x half> %vec1, half %c, i64 2
22+
%vec3 = insertelement <5 x half> %vec2, half %d, i64 3
23+
%vec4.overwritten = insertelement <5 x half> %vec3, half %d, i64 4
24+
%vec4 = insertelement <5 x half> %vec4.overwritten, half %e, i64 4
25+
call void @llvm.genx.GenISA.storerawvector.indexed.p2490368i8.v5f16(i8 addrspace(2490368)* %bso, i32 %bufferOffset, <5 x half> %vec4, i32 16, i1 false)
26+
27+
ret void
28+
}
29+
30+
; CHECK-LABEL: void @test_kernel(
31+
; CHECK-SAME: i8 addrspace(2490368)* [[BSO:%.*]], i32 [[OFFSET:%.*]], half [[A:%.*]], half [[B:%.*]], half [[C:%.*]], half [[D:%.*]], half [[E:%.*]])
32+
33+
; CHECK: [[VEC0:%.*]] = insertelement <4 x half> undef, half [[A]], i32 0
34+
; CHECK: [[VEC1:%.*]] = insertelement <4 x half> [[VEC0]], half [[B]], i32 1
35+
; CHECK: [[VEC2:%.*]] = insertelement <4 x half> [[VEC1]], half [[C]], i32 2
36+
; CHECK: [[VEC3:%.*]] = insertelement <4 x half> [[VEC2]], half [[D]], i32 3
37+
; CHECK: [[OFF0:%.*]] = add i32 0, [[OFFSET]]
38+
; CHECK: call void @llvm.genx.GenISA.storerawvector.indexed.p2490368i8.v4f16(i8 addrspace(2490368)* [[BSO]], i32 [[OFF0]], <4 x half> [[VEC3]], i32 16, i1 false)
39+
; CHECK: [[OFF1:%.*]] = add i32 8, [[OFFSET]]
40+
; CHECK: call void @llvm.genx.GenISA.storerawvector.indexed.p2490368i8.f16(i8 addrspace(2490368)* [[BSO]], i32 [[OFF1]], half [[E]], i32 8, i1 false)
41+
42+
43+
; Function Desc: Write a vector to a buffer pointer at byte offset
44+
; Output:
45+
; Arg 0: buffer pointer, result of GetBufferPtr
46+
; Arg 1: offset from the base pointer, in bytes
47+
; Arg 2: value to store
48+
; Arg 3: aligment in bytes
49+
; Arg 4: volatile, must be an immediate
50+
; Function Attrs: argmemonly nounwind writeonly
51+
declare void @llvm.genx.GenISA.storerawvector.indexed.p2490368i8.v5f16(i8 addrspace(2490368)*, i32, <5 x half>, i32, i1)
52+
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: igc_opt --typed-pointers %s -S -o - -ocl -inputocl -platformdg2 -igc-vectorpreprocess | FileCheck %s
10+
11+
; Test finding vector elements in insertelement instructions used to construct the vector.
12+
; Test that the last element is extracted from the input vector.
13+
14+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
15+
16+
define void @test_kernel(i8 addrspace(2490368)* %bso, i32 %bufferOffset, <5 x half> %v, half %a, half %b, half %c, half %d)
17+
{
18+
entry:
19+
%vec0 = insertelement <5 x half> %v, half %a, i64 0
20+
%vec1 = insertelement <5 x half> %vec0, half %b, i64 1
21+
%vec2 = insertelement <5 x half> %vec1, half %c, i64 2
22+
%vec3 = insertelement <5 x half> %vec2, half %d, i64 3
23+
call void @llvm.genx.GenISA.storerawvector.indexed.p2490368i8.v5f16(i8 addrspace(2490368)* %bso, i32 %bufferOffset, <5 x half> %vec3, i32 16, i1 false)
24+
25+
ret void
26+
}
27+
28+
; CHECK-LABEL: void @test_kernel(
29+
; CHECK-SAME: i8 addrspace(2490368)* [[BSO:%.*]], i32 [[OFFSET:%.*]], <5 x half> [[V:%.*]], half [[A:%.*]], half [[B:%.*]], half [[C:%.*]], half [[D:%.*]])
30+
31+
; CHECK: [[ELT4:%.*]] = extractelement <5 x half> [[V]], i32 4
32+
; CHECK: [[VEC0:%.*]] = insertelement <4 x half> undef, half [[A]], i32 0
33+
; CHECK: [[VEC1:%.*]] = insertelement <4 x half> [[VEC0]], half [[B]], i32 1
34+
; CHECK: [[VEC2:%.*]] = insertelement <4 x half> [[VEC1]], half [[C]], i32 2
35+
; CHECK: [[VEC3:%.*]] = insertelement <4 x half> [[VEC2]], half [[D]], i32 3
36+
; CHECK: [[OFF0:%.*]] = add i32 0, [[OFFSET]]
37+
; CHECK: call void @llvm.genx.GenISA.storerawvector.indexed.p2490368i8.v4f16(i8 addrspace(2490368)* [[BSO]], i32 [[OFF0]], <4 x half> [[VEC3]], i32 16, i1 false)
38+
; CHECK: [[OFF1:%.*]] = add i32 8, [[OFFSET]]
39+
; CHECK: call void @llvm.genx.GenISA.storerawvector.indexed.p2490368i8.f16(i8 addrspace(2490368)* [[BSO]], i32 [[OFF1]], half [[ELT4]], i32 8, i1 false)
40+
41+
42+
; Function Desc: Write a vector to a buffer pointer at byte offset
43+
; Output:
44+
; Arg 0: buffer pointer, result of GetBufferPtr
45+
; Arg 1: offset from the base pointer, in bytes
46+
; Arg 2: value to store
47+
; Arg 3: aligment in bytes
48+
; Arg 4: volatile, must be an immediate
49+
; Function Attrs: argmemonly nounwind writeonly
50+
declare void @llvm.genx.GenISA.storerawvector.indexed.p2490368i8.v5f16(i8 addrspace(2490368)*, i32, <5 x half>, i32, i1)
51+

0 commit comments

Comments
 (0)