@@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
33
#define ALPHAI $f1
34
34
#define X $r7
35
35
#define INCX $r8
36
+ #define DUMMY2 $r9
36
37
37
38
#define I $r12
38
39
#define TEMP $r13
@@ -65,6 +66,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65
66
66
67
bge $r0, N, .L999
67
68
bge $r0, INCX, .L999
69
+ ld.d DUMMY2, $sp, 0
68
70
li.d TEMP, 1
69
71
movgr2fr.d a1, $r0
70
72
FFINT a1, a1
@@ -86,24 +88,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
86
88
#endif
87
89
bne INCX, TEMP, .L22
88
90
91
+ /////// INCX == 1 ////////
89
92
.L11:
90
- bge $r0, I, .L997
91
93
CMPEQ $fcc0, ALPHAR, a1
92
94
CMPEQ $fcc1, ALPHAI, a1
93
- bceqz $fcc0, .L13
94
- b .L14
95
- .align 3
95
+ bge $r0, I, .L19
96
+ /////// INCX == 1 && N >= 4 ////////
97
+ bnez DUMMY2, .L17 // if DUMMPY2 == 1, called from c/zscal.
96
98
97
- .L13:
98
- bceqz $fcc1, .L114 //alpha_r != 0.0 && alpha_i != 0.0
99
- b .L113 //alpha_r != 0.0 && alpha_i == 0.0
99
+ bceqz $fcc0, .L17
100
100
101
- .L14:
102
- bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
103
- b .L111 //alpha_r == 0.0 && alpha_i == 0.0
104
- .align 3
101
+ bceqz $fcc1, .L17
105
102
106
- .L111 : //alpha_r == 0.0 && alpha_i == 0.0
103
+ .L15 : //alpha_r == 0.0 && alpha_i == 0.0
107
104
xvst VXZ, X, 0 * SIZE
108
105
#ifdef DOUBLE
109
106
xvst VXZ, X, 4 * SIZE
@@ -113,41 +110,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
113
110
addi.d X, X, 16 * SIZE
114
111
#endif
115
112
addi.d I, I, -1
116
- blt $r0, I, .L111
117
- b .L997
113
+ blt $r0, I, .L15
114
+ b .L19
118
115
.align 3
119
116
120
- .L113: //alpha_r != 0.0 && alpha_i == 0.0
121
- xvld VX0, X, 0 * SIZE
122
- #ifdef DOUBLE
123
- xvld VX1, X, 4 * SIZE
124
- xvpickev.d x1, VX1, VX0
125
- xvpickod.d x2, VX1, VX0
126
- xvfmul.d x3, VXAR, x1
127
- xvfmul.d x4, VXAR, x2
128
- xvilvl.d VX2, x4 ,x3
129
- xvilvh.d VX3, x4, x3
130
- xvst VX2, X, 0 * SIZE
131
- xvst VX3, X, 4 * SIZE
132
- addi.d X, X, 8 * SIZE
133
- #else
134
- xvld VX1, X, 8 * SIZE
135
- xvpickev.w x1, VX1, VX0
136
- xvpickod.w x2, VX1, VX0
137
- xvfmul.s x3, VXAR, x1
138
- xvfmul.s x4, VXAR, x2
139
- xvilvl.w VX2, x4 ,x3
140
- xvilvh.w VX3, x4, x3
141
- xvst VX2, X, 0 * SIZE
142
- xvst VX3, X, 8 * SIZE
143
- addi.d X, X, 16 * SIZE
144
- #endif
145
- addi.d I, I, -1
146
- blt $r0, I, .L113
147
- b .L997
148
- .align 3
149
-
150
- .L114: //alpha_r != 0.0 && alpha_i != 0.0
117
+ .L17:
151
118
xvld VX0, X, 0 * SIZE
152
119
#ifdef DOUBLE
153
120
xvld VX1, X, 4 * SIZE
@@ -177,29 +144,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
177
144
addi.d X, X, 16 * SIZE
178
145
#endif
179
146
addi.d I, I, -1
180
- blt $r0, I, .L114
181
- b .L997
147
+ blt $r0, I, .L17
148
+ b .L19
149
+ .align 3
150
+
151
+ /////// INCX == 1 && N < 8 ///////
152
+ .L19:
153
+ #ifdef DOUBLE
154
+ andi I, N, 3
155
+ #else
156
+ andi I, N, 7
157
+ #endif
158
+ beqz I, .L999
159
+ bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.
160
+
161
+ bceqz $fcc0, .L998
162
+
163
+ bceqz $fcc1, .L998
164
+
165
+ b .L995 // alpha_r == 0.0 && alpha_i == 0.0
182
166
.align 3
183
167
168
+ /////// INCX != 1 ////////
184
169
.L22:
185
- bge $r0, I, .L997
186
- move XX, X
187
170
CMPEQ $fcc0, ALPHAR, a1
188
171
CMPEQ $fcc1, ALPHAI, a1
189
- bceqz $fcc0, .L23
190
- b .L24
191
- .align 3
192
-
193
- .L23:
194
- bceqz $fcc1, .L224 //alpha_r != 0.0 && alpha_i != 0.0
195
- b .L223 //alpha_r != 0.0 && alpha_i == 0.0
172
+ move XX, X
173
+ bge $r0, I, .L29
174
+ bnez DUMMY2, .L25 // if DUMMPY2 == 1, called from c/zscal.
175
+ bceqz $fcc0, .L25
196
176
197
- .L24:
198
- bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
199
- b .L221 //alpha_r == 0.0 && alpha_i == 0.0
200
- .align 3
177
+ bceqz $fcc1, .L25
201
178
202
- .L221 : //alpha_r == 0.0 && alpha_i == 0.0
179
+ .L27 : //alpha_r == 0.0 && alpha_i == 0.0
203
180
#ifdef DOUBLE
204
181
xvstelm.d VXZ, X, 0 , 0
205
182
xvstelm.d VXZ, X, 1 * SIZE, 0
@@ -239,122 +216,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
239
216
#endif
240
217
add .d X, X, INCX
241
218
addi.d I, I, -1
242
- blt $r0, I, .L221
243
- b .L997
219
+ blt $r0, I, .L27
220
+ b .L29
244
221
.align 3
245
222
246
- .L223: //alpha_r != 0.0 && alpha_i == 0.0
247
- #ifdef DOUBLE
248
- ld.d t1, X, 0 * SIZE
249
- ld.d t2, X, 1 * SIZE
250
- add .d X, X, INCX
251
- ld.d t3, X, 0 * SIZE
252
- ld.d t4, X, 1 * SIZE
253
- add .d X, X, INCX
254
- xvinsgr2vr.d x1, t1, 0
255
- xvinsgr2vr.d x2, t2, 0
256
- xvinsgr2vr.d x1, t3, 1
257
- xvinsgr2vr.d x2, t4, 1
258
- ld.d t1, X, 0 * SIZE
259
- ld.d t2, X, 1 * SIZE
260
- add .d X, X, INCX
261
- ld.d t3, X, 0 * SIZE
262
- ld.d t4, X, 1 * SIZE
263
- xvinsgr2vr.d x1, t1, 2
264
- xvinsgr2vr.d x2, t2, 2
265
- xvinsgr2vr.d x1, t3, 3
266
- xvinsgr2vr.d x2, t4, 3
267
- add .d X, X, INCX
268
-
269
- xvfmul.d x3, VXAR, x1
270
- xvfmul.d x4, VXAR, x2
271
- addi.d I, I, -1
272
- xvstelm.d x3, XX, 0 * SIZE, 0
273
- xvstelm.d x4, XX, 1 * SIZE, 0
274
- add .d XX, XX, INCX
275
- xvstelm.d x3, XX, 0 * SIZE, 1
276
- xvstelm.d x4, XX, 1 * SIZE, 1
277
- add .d XX, XX, INCX
278
- xvstelm.d x3, XX, 0 * SIZE, 2
279
- xvstelm.d x4, XX, 1 * SIZE, 2
280
- add .d XX, XX, INCX
281
- xvstelm.d x3, XX, 0 * SIZE, 3
282
- xvstelm.d x4, XX, 1 * SIZE, 3
283
- #else
284
- ld.w t1, X, 0 * SIZE
285
- ld.w t2, X, 1 * SIZE
286
- add .d X, X, INCX
287
- ld.w t3, X, 0 * SIZE
288
- ld.w t4, X, 1 * SIZE
289
- add .d X, X, INCX
290
- xvinsgr2vr.w x1, t1, 0
291
- xvinsgr2vr.w x2, t2, 0
292
- xvinsgr2vr.w x1, t3, 1
293
- xvinsgr2vr.w x2, t4, 1
294
- ld.w t1, X, 0 * SIZE
295
- ld.w t2, X, 1 * SIZE
296
- add .d X, X, INCX
297
- ld.w t3, X, 0 * SIZE
298
- ld.w t4, X, 1 * SIZE
299
- xvinsgr2vr.w x1, t1, 2
300
- xvinsgr2vr.w x2, t2, 2
301
- xvinsgr2vr.w x1, t3, 3
302
- xvinsgr2vr.w x2, t4, 3
303
- add .d X, X, INCX
304
- ld.w t1, X, 0 * SIZE
305
- ld.w t2, X, 1 * SIZE
306
- add .d X, X, INCX
307
- ld.w t3, X, 0 * SIZE
308
- ld.w t4, X, 1 * SIZE
309
- add .d X, X, INCX
310
- xvinsgr2vr.w x1, t1, 4
311
- xvinsgr2vr.w x2, t2, 4
312
- xvinsgr2vr.w x1, t3, 5
313
- xvinsgr2vr.w x2, t4, 5
314
- ld.w t1, X, 0 * SIZE
315
- ld.w t2, X, 1 * SIZE
316
- add .d X, X, INCX
317
- ld.w t3, X, 0 * SIZE
318
- ld.w t4, X, 1 * SIZE
319
- xvinsgr2vr.w x1, t1, 6
320
- xvinsgr2vr.w x2, t2, 6
321
- xvinsgr2vr.w x1, t3, 7
322
- xvinsgr2vr.w x2, t4, 7
323
- add .d X, X, INCX
324
-
325
- xvfmul.s x3, VXAR, x1
326
- xvfmul.s x4, VXAR, x2
327
- addi.d I, I, -1
328
- xvstelm.w x3, XX, 0 * SIZE, 0
329
- xvstelm.w x4, XX, 1 * SIZE, 0
330
- add .d XX, XX, INCX
331
- xvstelm.w x3, XX, 0 * SIZE, 1
332
- xvstelm.w x4, XX, 1 * SIZE, 1
333
- add .d XX, XX, INCX
334
- xvstelm.w x3, XX, 0 * SIZE, 2
335
- xvstelm.w x4, XX, 1 * SIZE, 2
336
- add .d XX, XX, INCX
337
- xvstelm.w x3, XX, 0 * SIZE, 3
338
- xvstelm.w x4, XX, 1 * SIZE, 3
339
- add .d XX, XX, INCX
340
- xvstelm.w x3, XX, 0 * SIZE, 4
341
- xvstelm.w x4, XX, 1 * SIZE, 4
342
- add .d XX, XX, INCX
343
- xvstelm.w x3, XX, 0 * SIZE, 5
344
- xvstelm.w x4, XX, 1 * SIZE, 5
345
- add .d XX, XX, INCX
346
- xvstelm.w x3, XX, 0 * SIZE, 6
347
- xvstelm.w x4, XX, 1 * SIZE, 6
348
- add .d XX, XX, INCX
349
- xvstelm.w x3, XX, 0 * SIZE, 7
350
- xvstelm.w x4, XX, 1 * SIZE, 7
351
- #endif
352
- add .d XX, XX, INCX
353
- blt $r0, I, .L223
354
- b .L997
355
- .align 3
356
-
357
- .L224: //alpha_r != 0.0 && alpha_i != 0.0
223
+ .L25:
358
224
#ifdef DOUBLE
359
225
ld.d t1, X, 0 * SIZE
360
226
ld.d t2, X, 1 * SIZE
@@ -465,19 +331,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
465
331
xvstelm.w x4, XX, 1 * SIZE, 7
466
332
#endif
467
333
add .d XX, XX, INCX
468
- blt $r0, I, .L224
469
- b .L997
334
+ blt $r0, I, .L25
335
+ b .L29
470
336
.align 3
471
337
472
- .L997:
338
+ /////// INCX != 1 && N < 8 ///////
339
+ .L29:
473
340
#ifdef DOUBLE
474
- andi I, N, 3
341
+ andi I, N, 3
475
342
#else
476
- andi I, N, 7
343
+ andi I, N, 7
477
344
#endif
478
- bge $r0, I, .L999
479
- .align 3
345
+ beqz I, .L999
346
+ bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.
480
347
348
+ bceqz $fcc0, .L998
349
+
350
+ bceqz $fcc1, .L998
351
+
352
+ .L995: // alpha_r == 0.0 && alpha_i == 0.0
353
+ ST a1, X, 0 * SIZE
354
+ ST a1, X, 1 * SIZE
355
+ addi.d I, I, -1
356
+ add .d X, X, INCX
357
+ blt $r0, I, .L995
358
+ b .L999
481
359
.L998:
482
360
LD a1, X, 0 * SIZE
483
361
LD a2, X, 1 * SIZE
@@ -490,7 +368,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
490
368
ST s2, X, 1 * SIZE
491
369
add .d X, X, INCX
492
370
blt $r0, I, .L998
493
- .align 3
371
+ b .L999
494
372
495
373
.L999:
496
374
move $r4, $r12
0 commit comments