Skip to content

Commit fce3678

Browse files
committed
LoongArch: Optimize LSX vector shuffle on floating-point vector
The vec_perm expander was wrongly defined. GCC internal says: Operand 3 is the “selector”. It is an integral mode vector of the same width and number of elements as mode M. But we made operand 3 in the same mode as the shuffled vectors, so it would be a FP mode vector if the shuffled vectors are FP mode. With this mistake, the generic code manages to work around and it ends up creating some very nasty code for a simple __builtin_shuffle (a, b, c) where a and b are V4SF, c is V4SI: la.local $r12,.LANCHOR0 la.local $r13,.LANCHOR1 vld $vr1,$r12,48 vslli.w $vr1,$vr1,2 vld $vr2,$r12,16 vld $vr0,$r13,0 vld $vr3,$r13,16 vshuf.b $vr0,$vr1,$vr1,$vr0 vld $vr1,$r12,32 vadd.b $vr0,$vr0,$vr3 vandi.b $vr0,$vr0,31 vshuf.b $vr0,$vr1,$vr2,$vr0 vst $vr0,$r12,0 jr $r1 This is obviously stupid. Fix the expander definition and adjust loongarch_expand_vec_perm to handle it correctly. gcc/ChangeLog: * config/loongarch/lsx.md (vec_perm<mode:LSX>): Make the selector VIMODE. * config/loongarch/loongarch.cc (loongarch_expand_vec_perm): Use the mode of the selector (instead of the shuffled vector) for truncating it. Operate on subregs in the selector mode if the shuffled vector has a different mode (i. e. it's a floating-point vector). gcc/testsuite/ChangeLog: * gcc.target/loongarch/vect-shuf-fp.c: New test.
1 parent bd17d00 commit fce3678

File tree

3 files changed

+27
-9
lines changed

3 files changed

+27
-9
lines changed

gcc/config/loongarch/loongarch.cc

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8607,36 +8607,38 @@ void
86078607
loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
86088608
{
86098609
machine_mode vmode = GET_MODE (target);
8610+
machine_mode vimode = GET_MODE (sel);
86108611
auto nelt = GET_MODE_NUNITS (vmode);
8611-
auto round_reg = gen_reg_rtx (vmode);
8612+
auto round_reg = gen_reg_rtx (vimode);
86128613
rtx round_data[MAX_VECT_LEN];
86138614

86148615
for (int i = 0; i < nelt; i += 1)
86158616
{
86168617
round_data[i] = GEN_INT (0x1f);
86178618
}
86188619

8619-
rtx round_data_rtx = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, round_data));
8620+
rtx round_data_rtx = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (nelt, round_data));
86208621
emit_move_insn (round_reg, round_data_rtx);
86218622

8623+
if (vmode != vimode)
8624+
{
8625+
target = lowpart_subreg (vimode, target, vmode);
8626+
op0 = lowpart_subreg (vimode, op0, vmode);
8627+
op1 = lowpart_subreg (vimode, op1, vmode);
8628+
}
8629+
86228630
switch (vmode)
86238631
{
86248632
case E_V16QImode:
86258633
emit_insn (gen_andv16qi3 (sel, sel, round_reg));
86268634
emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
86278635
break;
86288636
case E_V2DFmode:
8629-
emit_insn (gen_andv2di3 (sel, sel, round_reg));
8630-
emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0));
8631-
break;
86328637
case E_V2DImode:
86338638
emit_insn (gen_andv2di3 (sel, sel, round_reg));
86348639
emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0));
86358640
break;
86368641
case E_V4SFmode:
8637-
emit_insn (gen_andv4si3 (sel, sel, round_reg));
8638-
emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0));
8639-
break;
86408642
case E_V4SImode:
86418643
emit_insn (gen_andv4si3 (sel, sel, round_reg));
86428644
emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0));

gcc/config/loongarch/lsx.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,7 @@
837837
[(match_operand:LSX 0 "register_operand")
838838
(match_operand:LSX 1 "register_operand")
839839
(match_operand:LSX 2 "register_operand")
840-
(match_operand:LSX 3 "register_operand")]
840+
(match_operand:<VIMODE> 3 "register_operand")]
841841
"ISA_HAS_LSX"
842842
{
843843
loongarch_expand_vec_perm (operands[0], operands[1],
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-mlasx -O3" } */
3+
/* { dg-final { scan-assembler "vshuf\.w" } } */
4+
5+
#define V __attribute__ ((vector_size (16)))
6+
7+
int a V;
8+
float b V;
9+
float c V;
10+
float d V;
11+
12+
void
13+
test (void)
14+
{
15+
d = __builtin_shuffle (b, c, a);
16+
}

0 commit comments

Comments
 (0)