Skip to content

Commit 83609c0

Browse files
authored
Merge pull request #103 from vacantron/opt
Implement CSE and peephole optimization
2 parents 8344804 + 3b7995e commit 83609c0

File tree

8 files changed

+211
-35
lines changed

8 files changed

+211
-35
lines changed

src/arm-codegen.c

-3
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,6 @@ void cfg_flatten()
151151

152152
ph2_ir_t *insn;
153153
for (insn = bb->ph2_ir_list.head; insn; insn = insn->next) {
154-
if (insn->op == OP_assign && insn->dest == insn->src0)
155-
continue;
156-
157154
flatten_ir = add_ph2_ir(OP_generic);
158155
memcpy(flatten_ir, insn, sizeof(ph2_ir_t));
159156

src/defs.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#define MAX_FIELDS 32
1919
#define MAX_FUNCS 256
2020
#define MAX_FUNC_TRIES 1950
21-
#define MAX_BLOCKS 1050
21+
#define MAX_BLOCKS 1150
2222
#define MAX_TYPES 64
2323
#define MAX_IR_INSTR 36864
2424
#define MAX_BB_PRED 128
@@ -299,6 +299,7 @@ typedef struct phi_operand phi_operand_t;
299299

300300
struct insn {
301301
struct insn *next;
302+
struct insn *prev;
302303
int idx;
303304
opcode_t opcode;
304305
var_t *rd;

src/globals.c

+1
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,7 @@ void add_insn(block_t *block,
576576
else
577577
bb->insn_list.tail->next = n;
578578

579+
n->prev = bb->insn_list.tail;
579580
bb->insn_list.tail = n;
580581
}
581582

src/main.c

+8
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
/* Register allocator */
3434
#include "reg-alloc.c"
3535

36+
/* Peephole optimization */
37+
#include "peephole.c"
38+
3639
/* Machine code generation. support ARMv7-A and RISC-V32I */
3740
#include "codegen.c"
3841

@@ -83,12 +86,17 @@ int main(int argc, char *argv[])
8386

8487
ssa_build(dump_ir);
8588

89+
/* SSA-based optimization */
90+
optimize();
91+
8692
/* SSA-based liveness analyses */
8793
liveness_analysis();
8894

8995
/* allocate register from IR */
9096
reg_alloc();
9197

98+
peephole();
99+
92100
/* flatten CFG to linear instruction */
93101
cfg_flatten();
94102

src/peephole.c

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* shecc - Self-Hosting and Educational C Compiler.
3+
*
4+
* shecc is freely redistributable under the BSD 2 clause license. See the
5+
* file "LICENSE" for information on usage and redistribution of this file.
6+
*/
7+
8+
int is_fusible_insn(ph2_ir_t *ph2_ir)
9+
{
10+
switch (ph2_ir->op) {
11+
case OP_add:
12+
case OP_sub:
13+
case OP_mul:
14+
case OP_div:
15+
case OP_mod:
16+
case OP_lshift:
17+
case OP_rshift:
18+
case OP_bit_and:
19+
case OP_bit_or:
20+
case OP_bit_xor:
21+
case OP_log_and:
22+
case OP_log_or:
23+
case OP_log_not:
24+
case OP_negate:
25+
case OP_load:
26+
case OP_global_load:
27+
case OP_load_data_address:
28+
return 1;
29+
default:
30+
return 0;
31+
}
32+
}
33+
34+
void insn_fusion(ph2_ir_t *ph2_ir)
35+
{
36+
ph2_ir_t *next = ph2_ir->next;
37+
if (!next)
38+
return;
39+
40+
if (next->op == OP_assign) {
41+
/* eliminate {ALU rn, rs1, rs2; mv rd, rn;} */
42+
if (!is_fusible_insn(ph2_ir))
43+
return;
44+
if (ph2_ir->dest == next->src0) {
45+
ph2_ir->dest = next->dest;
46+
ph2_ir->next = next->next;
47+
return;
48+
}
49+
}
50+
/* other insn fusions */
51+
}
52+
53+
/* FIXME: release detached basic blocks */
54+
void peephole()
55+
{
56+
fn_t *fn;
57+
for (fn = FUNC_LIST.head; fn; fn = fn->next) {
58+
basic_block_t *bb;
59+
for (bb = fn->bbs; bb; bb = bb->rpo_next) {
60+
ph2_ir_t *ph2_ir;
61+
for (ph2_ir = bb->ph2_ir_list.head; ph2_ir; ph2_ir = ph2_ir->next) {
62+
ph2_ir_t *next = ph2_ir->next;
63+
if (!next)
64+
continue;
65+
if (next->op == OP_assign && next->dest == next->src0) {
66+
ph2_ir->next = next->next;
67+
continue;
68+
}
69+
insn_fusion(ph2_ir);
70+
}
71+
}
72+
}
73+
}

src/reg-alloc.c

+36-14
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,17 @@ void spill_var(basic_block_t *bb, var_t *var, int idx)
7373
REGS[idx].polluted = 0;
7474
}
7575

76+
/* Return the index of register for given variable. Otherwise, return -1. */
77+
int find_in_regs(var_t *var)
78+
{
79+
int i;
80+
for (i = 0; i < REG_CNT; i++) {
81+
if (REGS[i].var == var)
82+
return i;
83+
}
84+
return -1;
85+
}
86+
7687
void load_var(basic_block_t *bb, var_t *var, int idx)
7788
{
7889
ph2_ir_t *ir = var->is_global ? bb_add_ph2_ir(bb, OP_global_load)
@@ -85,11 +96,9 @@ void load_var(basic_block_t *bb, var_t *var, int idx)
8596

8697
int prepare_operand(basic_block_t *bb, var_t *var, int operand_0)
8798
{
88-
int i;
89-
for (i = 0; i < REG_CNT; i++) {
90-
if (REGS[i].var == var)
91-
return i;
92-
}
99+
int i = find_in_regs(var);
100+
if (i > -1)
101+
return i;
93102

94103
for (i = 0; i < REG_CNT; i++) {
95104
if (!REGS[i].var) {
@@ -125,12 +134,11 @@ int prepare_operand(basic_block_t *bb, var_t *var, int operand_0)
125134

126135
int prepare_dest(basic_block_t *bb, var_t *var, int operand_0, int operand_1)
127136
{
128-
int i;
129-
for (i = 0; i < REG_CNT; i++)
130-
if (REGS[i].var == var) {
131-
REGS[i].polluted = 1;
132-
return i;
133-
}
137+
int i = find_in_regs(var);
138+
if (i > -1) {
139+
REGS[i].polluted = 1;
140+
return i;
141+
}
134142

135143
for (i = 0; i < REG_CNT; i++) {
136144
if (!REGS[i].var) {
@@ -328,7 +336,7 @@ void reg_alloc()
328336
func_t *func;
329337
ph2_ir_t *ir;
330338
int dest, src0, src1;
331-
int i, sz;
339+
int i, sz, clear_reg;
332340

333341
refresh(bb, insn);
334342

@@ -404,9 +412,19 @@ void reg_alloc()
404412
ir->dest = dest;
405413
break;
406414
case OP_assign:
407-
src0 = prepare_operand(bb, insn->rs1, -1);
415+
src0 = find_in_regs(insn->rs1);
416+
417+
/* If operand is loaded from stack, clear the original slot
418+
* after moving.
419+
*/
420+
if (src0 > -1)
421+
clear_reg = 0;
422+
else {
423+
clear_reg = 1;
424+
src0 = prepare_operand(bb, insn->rs1, -1);
425+
}
408426
dest = prepare_dest(bb, insn->rd, src0, -1);
409-
ir = bb_add_ph2_ir(bb, insn->opcode);
427+
ir = bb_add_ph2_ir(bb, OP_assign);
410428
ir->src0 = src0;
411429
ir->dest = dest;
412430

@@ -417,6 +435,10 @@ void reg_alloc()
417435
ir->src1 = insn->rd->offset;
418436
REGS[dest].polluted = 0;
419437
}
438+
439+
if (clear_reg)
440+
REGS[src0].var = NULL;
441+
420442
break;
421443
case OP_read:
422444
src0 = prepare_operand(bb, insn->rs1, -1);

src/riscv-codegen.c

+5-7
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,6 @@ void cfg_flatten()
123123

124124
ph2_ir_t *insn;
125125
for (insn = bb->ph2_ir_list.head; insn; insn = insn->next) {
126-
if (insn->op == OP_assign && insn->dest == insn->src0)
127-
continue;
128-
129126
flatten_ir = add_ph2_ir(OP_generic);
130127
memcpy(flatten_ir, insn, sizeof(ph2_ir_t));
131128

@@ -146,6 +143,7 @@ void emit(int code)
146143

147144
void emit_ph2_ir(ph2_ir_t *ph2_ir)
148145
{
146+
func_t *func;
149147
int rd = ph2_ir->dest + 10;
150148
int rs1 = ph2_ir->src0 + 10;
151149
int rs2 = ph2_ir->src1 + 10;
@@ -249,16 +247,16 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
249247
emit(__jal(__zero, ph2_ir->next_bb->elf_offset - elf_code_idx));
250248
return;
251249
case OP_call:
252-
emit(__jal(__ra, find_func(ph2_ir->func_name)->fn->bbs->elf_offset -
253-
elf_code_idx));
250+
func = find_func(ph2_ir->func_name);
251+
emit(__jal(__ra, func->fn->bbs->elf_offset - elf_code_idx));
254252
return;
255253
case OP_load_data_address:
256254
emit(__lui(rd, rv_hi(elf_data_start + ph2_ir->src0)));
257255
emit(__addi(rd, rd, rv_lo(elf_data_start + ph2_ir->src0)));
258256
return;
259257
case OP_address_of_func:
260-
ofs =
261-
elf_code_start + find_func(ph2_ir->func_name)->fn->bbs->elf_offset;
258+
func = find_func(ph2_ir->func_name);
259+
ofs = elf_code_start + func->fn->bbs->elf_offset;
262260
emit(__lui(__t0, rv_hi(ofs)));
263261
emit(__addi(__t0, __t0, rv_lo(ofs)));
264262
emit(__sw(__t0, rs1, 0));

src/ssa.c

+86-10
Original file line numberDiff line numberDiff line change
@@ -651,18 +651,16 @@ void append_unwound_phi_insn(basic_block_t *bb, var_t *dest, var_t *rs)
651651
} else {
652652
/* insert it before branch instruction */
653653
if (tail->opcode == OP_branch) {
654-
insn_t *prev = bb->insn_list.head;
655-
if (!prev->next) {
654+
if (tail->prev) {
655+
tail->prev->next = n;
656+
n->prev = tail->prev;
657+
} else
656658
bb->insn_list.head = n;
657-
n->next = prev;
658-
} else {
659-
while (prev->next != tail)
660-
prev = prev->next;
661-
prev->next = n;
662-
n->next = tail;
663-
}
659+
660+
n->next = tail;
661+
tail->prev = n;
664662
} else {
665-
bb->insn_list.tail->next = n;
663+
tail->next = n;
666664
bb->insn_list.tail = n;
667665
}
668666
}
@@ -1051,6 +1049,84 @@ void ssa_build(int dump_ir)
10511049
unwind_phi();
10521050
}
10531051

1052+
/* Common Subexpression Elimination (CSE) */
1053+
/* TODO: simplify with def-use chain */
1054+
/* TODO: release detached insns node */
1055+
int cse(insn_t *insn, basic_block_t *bb)
1056+
{
1057+
if (insn->opcode != OP_read)
1058+
return 0;
1059+
1060+
insn_t *prev = insn->prev;
1061+
1062+
if (!prev)
1063+
return 0;
1064+
if (prev->opcode != OP_add)
1065+
return 0;
1066+
if (prev->rd != insn->rs1)
1067+
return 0;
1068+
1069+
var_t *def = NULL, *base = prev->rs1, *idx = prev->rs2;
1070+
basic_block_t *b;
1071+
insn_t *i = prev;
1072+
for (b = bb;; b = b->idom) {
1073+
if (!i)
1074+
i = b->insn_list.tail;
1075+
1076+
for (; i; i = i->prev) {
1077+
if (i == prev)
1078+
continue;
1079+
if (i->opcode != OP_add)
1080+
continue;
1081+
if (!i->next)
1082+
continue;
1083+
if (i->next->opcode != OP_read)
1084+
continue;
1085+
if (i->rs1 != base || i->rs2 != idx)
1086+
continue;
1087+
def = i->next->rd;
1088+
}
1089+
if (def)
1090+
break;
1091+
if (b->idom == b)
1092+
break;
1093+
}
1094+
1095+
if (!def)
1096+
return 0;
1097+
1098+
if (prev->prev) {
1099+
insn->prev = prev->prev;
1100+
prev->next = insn;
1101+
} else {
1102+
bb->insn_list.head = insn;
1103+
insn->prev = NULL;
1104+
}
1105+
1106+
insn->opcode = OP_assign;
1107+
insn->rs1 = def;
1108+
return 1;
1109+
}
1110+
1111+
void optimize()
1112+
{
1113+
int changed = 0;
1114+
fn_t *fn;
1115+
for (fn = FUNC_LIST.head; fn; fn = fn->next) {
1116+
/* basic block level (control flow) optimizations */
1117+
1118+
basic_block_t *bb;
1119+
for (bb = fn->bbs; bb; bb = bb->rpo_next) {
1120+
/* instruction level optimizations */
1121+
insn_t *insn;
1122+
for (insn = bb->insn_list.head; insn; insn = insn->next) {
1123+
changed |= cse(insn, bb);
1124+
/* more optimizations */
1125+
}
1126+
}
1127+
}
1128+
}
1129+
10541130
void bb_index_reversed_rpo(fn_t *fn, basic_block_t *bb)
10551131
{
10561132
bb->rpo_r = fn->bb_cnt++;

0 commit comments

Comments
 (0)