|
3 | 3 | ; rdar://12713765
|
4 | 4 | ; When realign-stack is set to false, make sure we are not creating stack
|
5 | 5 | ; objects that are assumed to be 64-byte aligned.
|
6 |
| -@T3_retval = common global <16 x float> zeroinitializer, align 16 |
7 | 6 |
|
8 | 7 | define void @test1(<16 x float>* noalias sret(<16 x float>) %agg.result) nounwind ssp "no-realign-stack" {
|
9 |
| -entry: |
10 | 8 | ; CHECK-LABEL: test1:
|
11 |
| -; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]] |
12 |
| -; CHECK: mov r[[R2:[0-9]+]], r[[R1]] |
13 |
| -; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! |
14 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
15 |
| -; CHECK: add r[[R3:[0-9]+]], r[[R1]], #32 |
16 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
17 |
| -; CHECK: add r[[R3:[0-9]+]], r[[R1]], #48 |
18 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
19 |
| -; CHECK: mov r[[R2:[0-9]+]], sp |
20 |
| -; CHECK: add r[[R3:[0-9]+]], r[[R2]], #48 |
21 |
| -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
22 |
| -; CHECK: add r[[R4:[0-9]+]], r[[R2]], #32 |
23 |
| -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R4]]:128] |
24 |
| -; CHECK: mov r[[R5:[0-9]+]], r[[R2]] |
25 |
| -; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128]! |
26 |
| -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128] |
27 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R5]]:128] |
28 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
29 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R4]]:128] |
30 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
31 |
| -; CHECK: add r[[R1:[0-9]+]], r0, #48 |
32 |
| -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
33 |
| -; CHECK: add r[[R1:[0-9]+]], r0, #32 |
34 |
| -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
35 |
| -; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]! |
36 |
| -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128] |
| 9 | +; CHECK: mov r[[PTR:[0-9]+]], r{{[0-9]+}} |
| 10 | +; CHECK: mov r[[NOTALIGNED:[0-9]+]], sp |
| 11 | +; CHECK: add r[[NOTALIGNED]], r[[NOTALIGNED]], #32 |
| 12 | +; CHECK: add r[[PTR]], r[[PTR]], #32 |
| 13 | +; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[NOTALIGNED]]:128] |
| 14 | +; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[PTR]]:128] |
| 15 | +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[PTR]]:128] |
| 16 | +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[NOTALIGNED]]:128] |
| 17 | +entry: |
37 | 18 | %retval = alloca <16 x float>, align 64
|
38 |
| - %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 |
39 |
| - store <16 x float> %0, <16 x float>* %retval |
40 |
| - %1 = load <16 x float>, <16 x float>* %retval |
41 |
| - store <16 x float> %1, <16 x float>* %agg.result, align 16 |
| 19 | + %a1 = bitcast <16 x float>* %retval to float* |
| 20 | + %a2 = getelementptr inbounds float, float* %a1, i64 8 |
| 21 | + %a3 = bitcast float* %a2 to <4 x float>* |
| 22 | + |
| 23 | + %b1 = bitcast <16 x float>* %agg.result to float* |
| 24 | + %b2 = getelementptr inbounds float, float* %b1, i64 8 |
| 25 | + %b3 = bitcast float* %b2 to <4 x float>* |
| 26 | + |
| 27 | + %0 = load <4 x float>, <4 x float>* %a3, align 16 |
| 28 | + %1 = load <4 x float>, <4 x float>* %b3, align 16 |
| 29 | + store <4 x float> %0, <4 x float>* %b3, align 16 |
| 30 | + store <4 x float> %1, <4 x float>* %a3, align 16 |
42 | 31 | ret void
|
43 | 32 | }
|
44 | 33 |
|
45 | 34 | define void @test2(<16 x float>* noalias sret(<16 x float>) %agg.result) nounwind ssp {
|
46 |
| -entry: |
47 | 35 | ; CHECK-LABEL: test2:
|
48 |
| -; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]] |
49 |
| -; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 |
50 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
51 |
| -; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32 |
52 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
53 |
| -; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]! |
54 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
55 |
| -; CHECK: mov r[[R1:[0-9]+]], sp |
56 |
| -; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #16 |
57 |
| -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
58 |
| -; CHECK: mov r[[R3:[0-9]+]], #32 |
59 |
| -; CHECK: mov r[[R9:[0-9]+]], r[[R1]] |
60 |
| -; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R9]]:128], r[[R3]] |
61 |
| -; CHECK: mov r[[R3:[0-9]+]], r[[R9]] |
62 |
| -; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]! |
63 |
| -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
64 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R9]]:128] |
65 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] |
66 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] |
67 |
| -; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
68 |
| -; CHECK: add r[[R1:[0-9]+]], r0, #48 |
69 |
| -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
70 |
| -; CHECK: add r[[R1:[0-9]+]], r0, #32 |
71 |
| -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] |
72 |
| -; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]! |
73 |
| -; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128] |
| 36 | +; CHECK: mov r[[PTR:[0-9]+]], r{{[0-9]+}} |
| 37 | +; CHECK: mov r[[ALIGNED:[0-9]+]], sp |
| 38 | +; CHECK: orr r[[ALIGNED]], r[[ALIGNED]], #32 |
| 39 | +; CHECK: add r[[PTR]], r[[PTR]], #32 |
| 40 | +; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[ALIGNED]]:128] |
| 41 | +; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[PTR]]:128] |
| 42 | +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[PTR]]:128] |
| 43 | +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[ALIGNED]]:128] |
| 44 | +entry: |
| 45 | + %retval = alloca <16 x float>, align 64 |
| 46 | + %a1 = bitcast <16 x float>* %retval to float* |
| 47 | + %a2 = getelementptr inbounds float, float* %a1, i64 8 |
| 48 | + %a3 = bitcast float* %a2 to <4 x float>* |
74 | 49 |
|
| 50 | + %b1 = bitcast <16 x float>* %agg.result to float* |
| 51 | + %b2 = getelementptr inbounds float, float* %b1, i64 8 |
| 52 | + %b3 = bitcast float* %b2 to <4 x float>* |
75 | 53 |
|
76 |
| -%retval = alloca <16 x float>, align 64 |
77 |
| - %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 |
78 |
| - store <16 x float> %0, <16 x float>* %retval |
79 |
| - %1 = load <16 x float>, <16 x float>* %retval |
80 |
| - store <16 x float> %1, <16 x float>* %agg.result, align 16 |
| 54 | + %0 = load <4 x float>, <4 x float>* %a3, align 16 |
| 55 | + %1 = load <4 x float>, <4 x float>* %b3, align 16 |
| 56 | + store <4 x float> %0, <4 x float>* %b3, align 16 |
| 57 | + store <4 x float> %1, <4 x float>* %a3, align 16 |
81 | 58 | ret void
|
82 | 59 | }
|
0 commit comments