Skip to content

Commit 68cb873

Browse files
committed
RISC-V: Support combine extend and reduce sum to widen reduce sum
This patch add combine pattern to combine extend and reduce sum to widen reduce sum. The pattern in autovec.md was adjusted as needed. Note that the current vectorization cannot generate reduce operand which is LMUL=M8, because this means that we need an LMUL=M16 for the extended operand, which is currently not possible. So I've added VI_QHS_NO_M8 and VF_HS_NO_M8 mode iterator, which exclude mode which is LMUL=M8. PR target/111381 gcc/ChangeLog: * config/riscv/autovec-opt.md (*reduc_plus_scal_<mode>): New combine pattern. (*fold_left_widen_plus_<mode>): Ditto. (*mask_len_fold_left_widen_plus_<mode>): Ditto. * config/riscv/autovec.md (reduc_plus_scal_<mode>): Change from define_expand to define_insn_and_split. (fold_left_plus_<mode>): Ditto. (mask_len_fold_left_plus_<mode>): Ditto. * config/riscv/riscv-v.cc (expand_reduction): Support widen reduction. * config/riscv/vector-iterators.md (UNSPEC_WREDUC_SUM): Add new iterators and attrs. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/widen/widen_reduc-1.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-1.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-2.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-1.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-2.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_reduc_run-1.c: New test.
1 parent 05cb873 commit 68cb873

10 files changed

+321
-27
lines changed

gcc/config/riscv/autovec-opt.md

+82
Original file line numberDiff line numberDiff line change
@@ -1196,6 +1196,88 @@
11961196
}
11971197
[(set_attr "type" "vfwmul")])
11981198

1199+
;; Combine extend + vredsum to vwredsum[u]
1200+
(define_insn_and_split "*reduc_plus_scal_<mode>"
1201+
[(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
1202+
(unspec:<V_DOUBLE_EXTEND_VEL> [
1203+
(any_extend:<V_DOUBLE_EXTEND>
1204+
(match_operand:VI_QHS_NO_M8 1 "register_operand"))
1205+
] UNSPEC_REDUC_SUM))]
1206+
"TARGET_VECTOR && can_create_pseudo_p ()"
1207+
"#"
1208+
"&& 1"
1209+
[(const_int 0)]
1210+
{
1211+
riscv_vector::expand_reduction (<WREDUC_UNSPEC>, operands,
1212+
CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
1213+
DONE;
1214+
}
1215+
[(set_attr "type" "vector")])
1216+
1217+
;; Combine extend + vfredusum to vfwredusum
1218+
(define_insn_and_split "*reduc_plus_scal_<mode>"
1219+
[(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
1220+
(unspec:<V_DOUBLE_EXTEND_VEL> [
1221+
(float_extend:<V_DOUBLE_EXTEND>
1222+
(match_operand:VF_HS_NO_M8 1 "register_operand"))
1223+
] UNSPEC_REDUC_SUM_UNORDERED))]
1224+
"TARGET_VECTOR && can_create_pseudo_p ()"
1225+
"#"
1226+
"&& 1"
1227+
[(const_int 0)]
1228+
{
1229+
riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, operands,
1230+
CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
1231+
DONE;
1232+
}
1233+
[(set_attr "type" "vector")])
1234+
1235+
;; Combine extend + vfredosum to vfwredosum
1236+
(define_insn_and_split "*fold_left_widen_plus_<mode>"
1237+
[(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
1238+
(unspec:<V_DOUBLE_EXTEND_VEL> [
1239+
(float_extend:<V_DOUBLE_EXTEND>
1240+
(match_operand:VF_HS_NO_M8 2 "register_operand"))
1241+
(match_operand:<V_DOUBLE_EXTEND_VEL> 1 "register_operand")
1242+
] UNSPEC_REDUC_SUM_ORDERED))]
1243+
"TARGET_VECTOR && can_create_pseudo_p ()"
1244+
"#"
1245+
"&& 1"
1246+
[(const_int 0)]
1247+
{
1248+
riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, operands,
1249+
operands[1],
1250+
riscv_vector::reduction_type::FOLD_LEFT);
1251+
DONE;
1252+
}
1253+
[(set_attr "type" "vector")])
1254+
1255+
;; Combine extend + mask vfredosum to mask vfwredosum
1256+
(define_insn_and_split "*mask_len_fold_left_widen_plus_<mode>"
1257+
[(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
1258+
(unspec:<V_DOUBLE_EXTEND_VEL> [
1259+
(float_extend:<V_DOUBLE_EXTEND>
1260+
(match_operand:VF_HS_NO_M8 2 "register_operand"))
1261+
(match_operand:<V_DOUBLE_EXTEND_VEL> 1 "register_operand")
1262+
(match_operand:<VM> 3 "vector_mask_operand")
1263+
(match_operand 4 "autovec_length_operand")
1264+
(match_operand 5 "const_0_operand")
1265+
] UNSPEC_REDUC_SUM_ORDERED))]
1266+
"TARGET_VECTOR && can_create_pseudo_p ()"
1267+
"#"
1268+
"&& 1"
1269+
[(const_int 0)]
1270+
{
1271+
if (rtx_equal_p (operands[4], const0_rtx))
1272+
emit_move_insn (operands[0], operands[1]);
1273+
else
1274+
riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, operands,
1275+
operands[1],
1276+
riscv_vector::reduction_type::MASK_LEN_FOLD_LEFT);
1277+
DONE;
1278+
}
1279+
[(set_attr "type" "vector")])
1280+
11991281
;; =============================================================================
12001282
;; Misc combine patterns
12011283
;; =============================================================================

gcc/config/riscv/autovec.md

+49-25
Original file line numberDiff line numberDiff line change
@@ -2086,14 +2086,20 @@
20862086
;; - vredxor.vs
20872087
;; -------------------------------------------------------------------------
20882088

2089-
(define_expand "reduc_plus_scal_<mode>"
2090-
[(match_operand:<VEL> 0 "register_operand")
2091-
(match_operand:VI 1 "register_operand")]
2092-
"TARGET_VECTOR"
2089+
(define_insn_and_split "reduc_plus_scal_<mode>"
2090+
[(set (match_operand:<VEL> 0 "register_operand")
2091+
(unspec:<VEL> [
2092+
(match_operand:VI 1 "register_operand")
2093+
] UNSPEC_REDUC_SUM))]
2094+
"TARGET_VECTOR && can_create_pseudo_p ()"
2095+
"#"
2096+
"&& 1"
2097+
[(const_int 0)]
20932098
{
20942099
riscv_vector::expand_reduction (UNSPEC_REDUC_SUM, operands, CONST0_RTX (<VEL>mode));
20952100
DONE;
2096-
})
2101+
}
2102+
[(set_attr "type" "vector")])
20972103

20982104
(define_expand "reduc_smax_scal_<mode>"
20992105
[(match_operand:<VEL> 0 "register_operand")
@@ -2173,15 +2179,21 @@
21732179
;; - vfredmin.vs
21742180
;; -------------------------------------------------------------------------
21752181

2176-
(define_expand "reduc_plus_scal_<mode>"
2177-
[(match_operand:<VEL> 0 "register_operand")
2178-
(match_operand:VF 1 "register_operand")]
2179-
"TARGET_VECTOR"
2182+
(define_insn_and_split "reduc_plus_scal_<mode>"
2183+
[(set (match_operand:<VEL> 0 "register_operand")
2184+
(unspec:<VEL> [
2185+
(match_operand:VF 1 "register_operand")
2186+
] UNSPEC_REDUC_SUM_UNORDERED))]
2187+
"TARGET_VECTOR && can_create_pseudo_p ()"
2188+
"#"
2189+
"&& 1"
2190+
[(const_int 0)]
21802191
{
21812192
riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_UNORDERED, operands,
21822193
CONST0_RTX (<VEL>mode));
21832194
DONE;
2184-
})
2195+
}
2196+
[(set_attr "type" "vector")])
21852197

21862198
(define_expand "reduc_smax_scal_<mode>"
21872199
[(match_operand:<VEL> 0 "register_operand")
@@ -2215,27 +2227,38 @@
22152227
;; -------------------------------------------------------------------------
22162228

22172229
;; Unpredicated in-order FP reductions.
2218-
(define_expand "fold_left_plus_<mode>"
2219-
[(match_operand:<VEL> 0 "register_operand")
2220-
(match_operand:<VEL> 1 "register_operand")
2221-
(match_operand:VF 2 "register_operand")]
2222-
"TARGET_VECTOR"
2230+
(define_insn_and_split "fold_left_plus_<mode>"
2231+
[(set (match_operand:<VEL> 0 "register_operand")
2232+
(unspec:<VEL> [
2233+
(match_operand:VF 2 "register_operand")
2234+
(match_operand:<VEL> 1 "register_operand")
2235+
] UNSPEC_REDUC_SUM_ORDERED))]
2236+
"TARGET_VECTOR && can_create_pseudo_p ()"
2237+
"#"
2238+
"&& 1"
2239+
[(const_int 0)]
22232240
{
22242241
riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED, operands,
22252242
operands[1],
22262243
riscv_vector::reduction_type::FOLD_LEFT);
22272244
DONE;
2228-
})
2245+
}
2246+
[(set_attr "type" "vector")])
22292247

22302248
;; Predicated in-order FP reductions.
2231-
(define_expand "mask_len_fold_left_plus_<mode>"
2232-
[(match_operand:<VEL> 0 "register_operand")
2233-
(match_operand:<VEL> 1 "register_operand")
2234-
(match_operand:VF 2 "register_operand")
2235-
(match_operand:<VM> 3 "vector_mask_operand")
2236-
(match_operand 4 "autovec_length_operand")
2237-
(match_operand 5 "const_0_operand")]
2238-
"TARGET_VECTOR"
2249+
(define_insn_and_split "mask_len_fold_left_plus_<mode>"
2250+
[(set (match_operand:<VEL> 0 "register_operand")
2251+
(unspec:<VEL> [
2252+
(match_operand:VF 2 "register_operand")
2253+
(match_operand:<VEL> 1 "register_operand")
2254+
(match_operand:<VM> 3 "vector_mask_operand")
2255+
(match_operand 4 "autovec_length_operand")
2256+
(match_operand 5 "const_0_operand")
2257+
] UNSPEC_REDUC_SUM_ORDERED))]
2258+
"TARGET_VECTOR && can_create_pseudo_p ()"
2259+
"#"
2260+
"&& 1"
2261+
[(const_int 0)]
22392262
{
22402263
if (rtx_equal_p (operands[4], const0_rtx))
22412264
emit_move_insn (operands[0], operands[1]);
@@ -2244,7 +2267,8 @@
22442267
operands[1],
22452268
riscv_vector::reduction_type::MASK_LEN_FOLD_LEFT);
22462269
DONE;
2247-
})
2270+
}
2271+
[(set_attr "type" "vector")])
22482272

22492273
;; -------------------------------------------------------------------------
22502274
;; ---- [INT,FP] Extract active element

gcc/config/riscv/riscv-v.cc

+5-2
Original file line numberDiff line numberDiff line change
@@ -3212,7 +3212,8 @@ expand_reduction (unsigned unspec, rtx *ops, rtx init, reduction_type type)
32123212
{
32133213
rtx vector = type == reduction_type::UNORDERED ? ops[1] : ops[2];
32143214
machine_mode vmode = GET_MODE (vector);
3215-
machine_mode m1_mode = get_m1_mode (vmode).require ();
3215+
machine_mode vel_mode = GET_MODE (ops[0]);
3216+
machine_mode m1_mode = get_m1_mode (vel_mode).require ();
32163217

32173218
rtx m1_tmp = gen_reg_rtx (m1_mode);
32183219
rtx scalar_move_ops[] = {m1_tmp, init};
@@ -3225,7 +3226,9 @@ expand_reduction (unsigned unspec, rtx *ops, rtx init, reduction_type type)
32253226
rtx reduc_ops[] = {m1_tmp2, vector, m1_tmp};
32263227

32273228
if (unspec == UNSPEC_REDUC_SUM_ORDERED
3228-
|| unspec == UNSPEC_REDUC_SUM_UNORDERED)
3229+
|| unspec == UNSPEC_WREDUC_SUM_ORDERED
3230+
|| unspec == UNSPEC_REDUC_SUM_UNORDERED
3231+
|| unspec == UNSPEC_WREDUC_SUM_UNORDERED)
32293232
{
32303233
insn_code icode = code_for_pred (unspec, vmode);
32313234
if (type == reduction_type::MASK_LEN_FOLD_LEFT)

gcc/config/riscv/vector-iterators.md

+51
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,14 @@
686686
RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
687687
])
688688

689+
(define_mode_iterator VI_QHS_NO_M8 [
690+
RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
691+
692+
RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
693+
694+
RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
695+
])
696+
689697
(define_mode_iterator VF_HS [
690698
(RVVM8HF "TARGET_ZVFH") (RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH")
691699
(RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH")
@@ -695,6 +703,23 @@
695703
(RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
696704
])
697705

706+
(define_mode_iterator VF_HS_NO_M8 [
707+
(RVVM4HF "TARGET_ZVFH")
708+
(RVVM2HF "TARGET_ZVFH")
709+
(RVVM1HF "TARGET_ZVFH")
710+
(RVVMF2HF "TARGET_ZVFH")
711+
(RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
712+
(RVVM4SF "TARGET_VECTOR_ELEN_FP_32")
713+
(RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
714+
(RVVM1SF "TARGET_VECTOR_ELEN_FP_32")
715+
(RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
716+
])
717+
718+
(define_mode_iterator VF_HS_M8 [
719+
(RVVM8HF "TARGET_ZVFH")
720+
(RVVM8SF "TARGET_VECTOR_ELEN_FP_32")
721+
])
722+
698723
(define_mode_iterator V_VLSI_QHS [
699724
RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
700725

@@ -1319,6 +1344,8 @@
13191344
(UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED "wredusum")
13201345
])
13211346

1347+
(define_code_attr WREDUC_UNSPEC [(zero_extend "UNSPEC_WREDUC_SUMU") (sign_extend "UNSPEC_WREDUC_SUM")])
1348+
13221349
(define_mode_attr VINDEX [
13231350
(RVVM8QI "RVVM8QI") (RVVM4QI "RVVM4QI") (RVVM2QI "RVVM2QI") (RVVM1QI "RVVM1QI")
13241351
(RVVMF2QI "RVVMF2QI") (RVVMF4QI "RVVMF4QI") (RVVMF8QI "RVVMF8QI")
@@ -1743,6 +1770,18 @@
17431770
(V1DF "DF") (V2DF "DF") (V4DF "DF") (V8DF "DF") (V16DF "DF") (V32DF "DF") (V64DF "DF") (V128DF "DF") (V256DF "DF") (V512DF "DF")
17441771
])
17451772

1773+
(define_mode_attr V_DOUBLE_EXTEND_VEL [
1774+
(RVVM4QI "HI") (RVVM2QI "HI") (RVVM1QI "HI") (RVVMF2QI "HI") (RVVMF4QI "HI") (RVVMF8QI "HI")
1775+
1776+
(RVVM4HI "SI") (RVVM2HI "SI") (RVVM1HI "SI") (RVVMF2HI "SI") (RVVMF4HI "SI")
1777+
1778+
(RVVM4SI "DI") (RVVM2SI "DI") (RVVM1SI "DI") (RVVMF2SI "DI")
1779+
1780+
(RVVM4HF "SF") (RVVM2HF "SF") (RVVM1HF "SF") (RVVMF2HF "SF") (RVVMF4HF "SF")
1781+
1782+
(RVVM4SF "DF") (RVVM2SF "DF") (RVVM1SF "DF") (RVVMF2SF "DF")
1783+
])
1784+
17461785
(define_mode_attr vel [
17471786
(RVVM8QI "qi") (RVVM4QI "qi") (RVVM2QI "qi") (RVVM1QI "qi") (RVVMF2QI "qi") (RVVMF4QI "qi") (RVVMF8QI "qi")
17481787

@@ -2101,6 +2140,18 @@
21012140
(RVVM1QI "64") (RVVMF2QI "64") (RVVMF4QI "64") (RVVMF8QI "64")
21022141
])
21032142

2143+
(define_mode_attr V_DOUBLE_EXTEND [
2144+
(RVVM4QI "RVVM8HI") (RVVM2QI "RVVM4HI") (RVVM1QI "RVVM2HI") (RVVMF2QI "RVVM1HI") (RVVMF4QI "RVVMF2HI") (RVVMF8QI "RVVMF4HI")
2145+
2146+
(RVVM4HI "RVVM8SI") (RVVM2HI "RVVM4SI") (RVVM1HI "RVVM2SI") (RVVMF2HI "RVVM1SI") (RVVMF4HI "RVVMF2SI")
2147+
2148+
(RVVM4SI "RVVM8DI") (RVVM2SI "RVVM4DI") (RVVM1SI "RVVM2DI") (RVVMF2SI "RVVM1DI")
2149+
2150+
(RVVM4HF "RVVM8SF") (RVVM2HF "RVVM4SF") (RVVM1HF "RVVM2SF") (RVVMF2HF "RVVM1SF") (RVVMF4HF "RVVMF2SF")
2151+
2152+
(RVVM4SF "RVVM8DF") (RVVM2SF "RVVM4DF") (RVVM1SF "RVVM2DF") (RVVMF2SF "RVVM1DF")
2153+
])
2154+
21042155
(define_mode_attr V_DOUBLE_TRUNC [
21052156
(RVVM8HI "RVVM4QI") (RVVM4HI "RVVM2QI") (RVVM2HI "RVVM1QI") (RVVM1HI "RVVMF2QI") (RVVMF2HI "RVVMF4QI") (RVVMF4HI "RVVMF8QI")
21062157

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/* { dg-do compile } */
2+
/* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
3+
#include <stdint-gcc.h>
4+
5+
#define TEST_TYPE(TYPE1, TYPE2, N) \
6+
__attribute__((noipa)) TYPE1 reduc_##TYPE1##_##TYPE2(TYPE2 *restrict a) { \
7+
TYPE1 sum = 0; \
8+
for (int i = 0; i < N; i += 1) \
9+
sum += a[i]; \
10+
return sum; \
11+
}
12+
13+
#define TEST_ALL(TEST) \
14+
TEST(int16_t, int8_t, 16) \
15+
TEST(int32_t, int16_t, 8) \
16+
TEST(int64_t, int32_t, 4) \
17+
TEST(uint16_t, uint8_t, 16) \
18+
TEST(uint32_t, uint16_t, 8) \
19+
TEST(uint64_t, uint32_t, 4) \
20+
TEST(float, _Float16, 8) \
21+
TEST(double, float, 4)
22+
23+
TEST_ALL(TEST_TYPE)
24+
25+
/* { dg-final { scan-assembler-times {\tvfwredusum\.vs} 2 } } */
26+
/* { dg-final { scan-assembler-times {\tvwredsum\.vs} 3 } } */
27+
/* { dg-final { scan-assembler-times {\tvwredsumu\.vs} 3 } } */
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/* { dg-do compile } */
2+
/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d --param riscv-autovec-preference=scalable -fno-vect-cost-model" } */
3+
#include <stdint-gcc.h>
4+
5+
#define TEST_TYPE(TYPE1, TYPE2) \
6+
__attribute__((noipa)) \
7+
TYPE1 reduc_##TYPE1##_##TYPE2(TYPE2 *restrict a, int n) { \
8+
TYPE1 sum = 0; \
9+
for (int i = 0; i < n; i += 1) \
10+
sum += a[i]; \
11+
return sum; \
12+
}
13+
14+
#define TEST_ALL(TEST) \
15+
TEST(float, _Float16) \
16+
TEST(double, float)
17+
18+
TEST_ALL(TEST_TYPE)
19+
20+
/* { dg-final { scan-assembler-times {\tvfwredosum\.vs} 2 } } */
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/* { dg-do compile } */
2+
/* { dg-additional-options "-march=rv64gcv_zvfh -mabi=lp64d --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model" } */
3+
#include <stdint-gcc.h>
4+
5+
#define TEST_TYPE(TYPE1, TYPE2, N) \
6+
__attribute__((noipa)) TYPE1 reduc_##TYPE1##_##TYPE2(TYPE2 *restrict a) { \
7+
TYPE1 sum = 0; \
8+
for (int i = 0; i < N; i += 1) \
9+
sum += a[i]; \
10+
return sum; \
11+
}
12+
13+
#define TEST_ALL(TEST) \
14+
TEST(float, _Float16, 8) \
15+
TEST(double, float, 4)
16+
17+
TEST_ALL(TEST_TYPE)
18+
19+
/* { dg-final { scan-assembler-times {\tvfwredosum\.vs} 2 } } */

0 commit comments

Comments
 (0)