Skip to content

Commit 4e83175

Browse files
Patryk27benshi001
authored andcommitted
[AVR] Expand shifts of all types except int8 and int16
Currently our AVRShiftExpand pass expands only 32-bit shifts, with the assumption that other kinds of shifts (e.g. 64-bit ones) are automatically reduced to 8-bit ones by LLVM during ISel. However this is not always true and causes problems in the rust-lang runtime. This commit changes the logic a bit, so that instead of expanding only 32-bit shifts, we expand shifts of all types except 8-bit and 16-bit. This is not the most optimal solution, because 64-bit shifts can be expanded to 32-bit shifts which has been deeply optimized. I've checked the generated code using rustc + simavr, and all shifts seem to behave correctly. Spotted in the wild in rustc: rust-lang/compiler-builtins#523 rust-lang/rust#112140 Reviewed By: benshi001 Differential Revision: https://reviews.llvm.org/D154785
1 parent eb33db4 commit 4e83175

File tree

3 files changed

+147
-48
lines changed

3 files changed

+147
-48
lines changed

llvm/lib/Target/AVR/AVRShiftExpand.cpp

+13-11
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77
//===----------------------------------------------------------------------===//
88
//
99
/// \file
10-
/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just
11-
/// like avr-gcc. This must be done in IR because otherwise the type legalizer
12-
/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3.
10+
/// Expand non-8-bit and non-16-bit shift instructions (shl, lshr, ashr) to
11+
/// inline loops, just like avr-gcc. This must be done in IR because otherwise
12+
/// the type legalizer will turn 32-bit shifts into (non-existing) library calls
13+
/// such as __ashlsi3.
1314
//
1415
//===----------------------------------------------------------------------===//
1516

@@ -51,8 +52,9 @@ bool AVRShiftExpand::runOnFunction(Function &F) {
5152
if (!I.isShift())
5253
// Only expand shift instructions (shl, lshr, ashr).
5354
continue;
54-
if (I.getType() != Type::getInt32Ty(Ctx))
55-
// Only expand plain i32 types.
55+
if (I.getType() == Type::getInt8Ty(Ctx) || I.getType() == Type::getInt16Ty(Ctx))
56+
// Only expand non-8-bit and non-16-bit shifts, since those are expanded
57+
// directly during isel.
5658
continue;
5759
if (isa<ConstantInt>(I.getOperand(1)))
5860
// Only expand when the shift amount is not known.
@@ -75,7 +77,7 @@ bool AVRShiftExpand::runOnFunction(Function &F) {
7577
void AVRShiftExpand::expand(BinaryOperator *BI) {
7678
auto &Ctx = BI->getContext();
7779
IRBuilder<> Builder(BI);
78-
Type *Int32Ty = Type::getInt32Ty(Ctx);
80+
Type *InputTy = cast<Instruction>(BI)->getType();
7981
Type *Int8Ty = Type::getInt8Ty(Ctx);
8082
Value *Int8Zero = ConstantInt::get(Int8Ty, 0);
8183

@@ -101,7 +103,7 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
101103
Builder.SetInsertPoint(LoopBB);
102104
PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2);
103105
ShiftAmountPHI->addIncoming(ShiftAmount, BB);
104-
PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2);
106+
PHINode *ValuePHI = Builder.CreatePHI(InputTy, 2);
105107
ValuePHI->addIncoming(BI->getOperand(0), BB);
106108

107109
// Subtract the shift amount by one, as we're shifting one this loop
@@ -116,13 +118,13 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
116118
Value *ValueShifted;
117119
switch (BI->getOpcode()) {
118120
case Instruction::Shl:
119-
ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1));
121+
ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(InputTy, 1));
120122
break;
121123
case Instruction::LShr:
122-
ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
124+
ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(InputTy, 1));
123125
break;
124126
case Instruction::AShr:
125-
ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
127+
ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(InputTy, 1));
126128
break;
127129
default:
128130
llvm_unreachable("asked to expand an instruction that is not a shift");
@@ -137,7 +139,7 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
137139
// Collect the resulting value. This is necessary in the IR but won't produce
138140
// any actual instructions.
139141
Builder.SetInsertPoint(BI);
140-
PHINode *Result = Builder.CreatePHI(Int32Ty, 2);
142+
PHINode *Result = Builder.CreatePHI(InputTy, 2);
141143
Result->addIncoming(BI->getOperand(0), BB);
142144
Result->addIncoming(ValueShifted, LoopBB);
143145

llvm/test/CodeGen/AVR/shift-expand.ll

+104-33
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,17 @@
88
target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
99
target triple = "avr"
1010

11-
define i32 @shl(i32 %value, i32 %amount) addrspace(1) {
12-
; CHECK-LABEL: @shl(
11+
define i16 @shl16(i16 %value, i16 %amount) addrspace(1) {
12+
; CHECK-LABEL: @shl16(
13+
; CHECK-NEXT: [[RESULT:%.*]] = shl i16 [[VALUE:%.*]], [[AMOUNT:%.*]]
14+
; CHECK-NEXT: ret i16 [[RESULT]]
15+
;
16+
%result = shl i16 %value, %amount
17+
ret i16 %result
18+
}
19+
20+
define i32 @shl32(i32 %value, i32 %amount) addrspace(1) {
21+
; CHECK-LABEL: @shl32(
1322
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
1423
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
1524
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
@@ -28,8 +37,39 @@ define i32 @shl(i32 %value, i32 %amount) addrspace(1) {
2837
ret i32 %result
2938
}
3039

31-
define i32 @lshr(i32 %value, i32 %amount) addrspace(1) {
32-
; CHECK-LABEL: @lshr(
40+
define i40 @shl40(i40 %value, i40 %amount) addrspace(1) {
41+
; CHECK-LABEL: @shl40(
42+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8
43+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
44+
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
45+
; CHECK: shift.loop:
46+
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
47+
; CHECK-NEXT: [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
48+
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
49+
; CHECK-NEXT: [[TMP6]] = shl i40 [[TMP4]], 1
50+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
51+
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
52+
; CHECK: shift.done:
53+
; CHECK-NEXT: [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
54+
; CHECK-NEXT: ret i40 [[TMP8]]
55+
;
56+
%result = shl i40 %value, %amount
57+
ret i40 %result
58+
}
59+
60+
; ------------------------------------------------------------------------------
61+
62+
define i16 @lshr16(i16 %value, i16 %amount) addrspace(1) {
63+
; CHECK-LABEL: @lshr16(
64+
; CHECK-NEXT: [[RESULT:%.*]] = lshr i16 [[VALUE:%.*]], [[AMOUNT:%.*]]
65+
; CHECK-NEXT: ret i16 [[RESULT]]
66+
;
67+
%result = lshr i16 %value, %amount
68+
ret i16 %result
69+
}
70+
71+
define i32 @lshr32(i32 %value, i32 %amount) addrspace(1) {
72+
; CHECK-LABEL: @lshr32(
3373
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
3474
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
3575
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
@@ -48,42 +88,73 @@ define i32 @lshr(i32 %value, i32 %amount) addrspace(1) {
4888
ret i32 %result
4989
}
5090

51-
define i32 @ashr(i32 %0, i32 %1) addrspace(1) {
52-
; CHECK-LABEL: @ashr(
53-
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1:%.*]] to i8
54-
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP3]], 0
55-
; CHECK-NEXT: br i1 [[TMP4]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
91+
define i40 @lshr40(i40 %value, i40 %amount) addrspace(1) {
92+
; CHECK-LABEL: @lshr40(
93+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8
94+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
95+
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
5696
; CHECK: shift.loop:
57-
; CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[TMP2:%.*]] ], [ [[TMP7:%.*]], [[SHIFT_LOOP]] ]
58-
; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP0:%.*]], [[TMP2]] ], [ [[TMP8:%.*]], [[SHIFT_LOOP]] ]
59-
; CHECK-NEXT: [[TMP7]] = sub i8 [[TMP5]], 1
60-
; CHECK-NEXT: [[TMP8]] = ashr i32 [[TMP6]], 1
61-
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP7]], 0
62-
; CHECK-NEXT: br i1 [[TMP9]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
97+
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
98+
; CHECK-NEXT: [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
99+
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
100+
; CHECK-NEXT: [[TMP6]] = lshr i40 [[TMP4]], 1
101+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
102+
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
63103
; CHECK: shift.done:
64-
; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP0]], [[TMP2]] ], [ [[TMP8]], [[SHIFT_LOOP]] ]
65-
; CHECK-NEXT: ret i32 [[TMP10]]
104+
; CHECK-NEXT: [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
105+
; CHECK-NEXT: ret i40 [[TMP8]]
66106
;
67-
%3 = ashr i32 %0, %1
68-
ret i32 %3
107+
%result = lshr i40 %value, %amount
108+
ret i40 %result
69109
}
70110

71-
; This function is not modified because it is not an i32.
72-
define i40 @shl40(i40 %value, i40 %amount) addrspace(1) {
73-
; CHECK-LABEL: @shl40(
74-
; CHECK-NEXT: [[RESULT:%.*]] = shl i40 [[VALUE:%.*]], [[AMOUNT:%.*]]
75-
; CHECK-NEXT: ret i40 [[RESULT]]
111+
; ------------------------------------------------------------------------------
112+
113+
define i16 @ashr16(i16 %value, i16 %amount) addrspace(1) {
114+
; CHECK-LABEL: @ashr16(
115+
; CHECK-NEXT: [[RESULT:%.*]] = ashr i16 [[VALUE:%.*]], [[AMOUNT:%.*]]
116+
; CHECK-NEXT: ret i16 [[RESULT]]
76117
;
77-
%result = shl i40 %value, %amount
78-
ret i40 %result
118+
%result = ashr i16 %value, %amount
119+
ret i16 %result
79120
}
80121

81-
; This function isn't either, although perhaps it should.
82-
define i24 @shl24(i24 %value, i24 %amount) addrspace(1) {
83-
; CHECK-LABEL: @shl24(
84-
; CHECK-NEXT: [[RESULT:%.*]] = shl i24 [[VALUE:%.*]], [[AMOUNT:%.*]]
85-
; CHECK-NEXT: ret i24 [[RESULT]]
122+
define i32 @ashr32(i32 %value, i32 %amount) addrspace(1) {
123+
; CHECK-LABEL: @ashr32(
124+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8
125+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
126+
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
127+
; CHECK: shift.loop:
128+
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
129+
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
130+
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
131+
; CHECK-NEXT: [[TMP6]] = ashr i32 [[TMP4]], 1
132+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
133+
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
134+
; CHECK: shift.done:
135+
; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
136+
; CHECK-NEXT: ret i32 [[TMP8]]
137+
;
138+
%result = ashr i32 %value, %amount
139+
ret i32 %result
140+
}
141+
142+
define i40 @ashr40(i40 %value, i40 %amount) addrspace(1) {
143+
; CHECK-LABEL: @ashr40(
144+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8
145+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
146+
; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]]
147+
; CHECK: shift.loop:
148+
; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ]
149+
; CHECK-NEXT: [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ]
150+
; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1
151+
; CHECK-NEXT: [[TMP6]] = ashr i40 [[TMP4]], 1
152+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0
153+
; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]]
154+
; CHECK: shift.done:
155+
; CHECK-NEXT: [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ]
156+
; CHECK-NEXT: ret i40 [[TMP8]]
86157
;
87-
%result = shl i24 %value, %amount
88-
ret i24 %result
158+
%result = ashr i40 %value, %amount
159+
ret i40 %result
89160
}

llvm/test/CodeGen/AVR/shift.ll

+30-4
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,36 @@ define i64 @shift_i64_i64(i64 %a, i64 %b) {
5454
; CHECK: ; %bb.0:
5555
; CHECK-NEXT: push r16
5656
; CHECK-NEXT: push r17
57-
; CHECK-NEXT: mov r16, r10
58-
; CHECK-NEXT: mov r17, r11
59-
; CHECK-NEXT: andi r17, 0
60-
; CHECK-NEXT: rcall __ashldi3
57+
; CHECK-NEXT: mov r30, r10
58+
; CHECK-NEXT: mov r31, r11
59+
; CHECK-NEXT: cpi r30, 0
60+
; CHECK-NEXT: breq .LBB3_3
61+
; CHECK-NEXT: ; %bb.1: ; %shift.loop.preheader
62+
; CHECK-NEXT: mov r27, r1
63+
; CHECK-NEXT: mov r16, r1
64+
; CHECK-NEXT: mov r17, r1
65+
; CHECK-NEXT: .LBB3_2: ; %shift.loop
66+
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
67+
; CHECK-NEXT: mov r31, r21
68+
; CHECK-NEXT: lsl r31
69+
; CHECK-NEXT: mov r26, r1
70+
; CHECK-NEXT: rol r26
71+
; CHECK-NEXT: lsl r22
72+
; CHECK-NEXT: rol r23
73+
; CHECK-NEXT: rol r24
74+
; CHECK-NEXT: rol r25
75+
; CHECK-NEXT: or r24, r16
76+
; CHECK-NEXT: or r25, r17
77+
; CHECK-NEXT: or r22, r26
78+
; CHECK-NEXT: or r23, r27
79+
; CHECK-NEXT: lsl r18
80+
; CHECK-NEXT: rol r19
81+
; CHECK-NEXT: rol r20
82+
; CHECK-NEXT: rol r21
83+
; CHECK-NEXT: dec r30
84+
; CHECK-NEXT: cpi r30, 0
85+
; CHECK-NEXT: brne .LBB3_2
86+
; CHECK-NEXT: .LBB3_3: ; %shift.done
6187
; CHECK-NEXT: pop r17
6288
; CHECK-NEXT: pop r16
6389
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)