Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 4b91633

Browse files
author
Aba
committedJul 23, 2024·
Minor cleanup
1 parent 6d36dcf commit 4b91633

File tree

3 files changed

+51
-88
lines changed

3 files changed

+51
-88
lines changed
 

‎deepsocflow/c/runtime.h

+36-48
Original file line numberDiff line numberDiff line change
@@ -86,25 +86,21 @@ typedef struct {
8686

8787
Memory_st mem;
8888

89-
static inline void flush_cache(void *addr, uint32_t bytes) {
90-
// Do nothing
91-
}
92-
9389
// Get and set config are done by sv
9490
extern EXT_C u32 get_config(u32);
9591
extern EXT_C void set_config(u32, u32);
92+
static inline void flush_cache(void *addr, uint32_t bytes) {} // Do nothing
9693

9794
#else
9895
#define sim_fprintf(...)
9996
#define mem (*(Memory_st* restrict)MEM_BASEADDR)
10097

10198
inline volatile u32 get_config(u32 offset){
102-
return *(volatile u32 *)(CONFIG_BASEADDR + offset);
99+
return *(volatile u32 *)(CONFIG_BASEADDR + offset*4);
103100
}
104101

105102
inline void set_config(u32 offset, u32 data){
106-
volatile u32 *Addr = (volatile u32 *restrict)(CONFIG_BASEADDR + offset);
107-
*Addr = data;
103+
*(volatile u32 *restrict)(CONFIG_BASEADDR + offset*4) = data;
108104
}
109105
#endif
110106

@@ -120,6 +116,7 @@ typedef struct {
120116
// Helper functions
121117

122118
static inline void print_output () {
119+
flush_cache(&mem.y, sizeof(mem.y));
123120
for (int i=0; i<O_WORDS; i++){
124121
printf("y[%d]: %f \n", i, (float)mem.y[i]);
125122
}
@@ -161,22 +158,20 @@ static inline void write_x(i8 val, i8 *restrict p_out_buffer, i32 ib, i32 ixp, i
161158

162159
i32 p_offset = (ixp == 0) ? 0 : (pb_out->cm_p0 + (ixp-1)*pb_out->cm) * pb_out->xp_words;
163160
i32 flat_index_n2r = (((ixn*pb_out->l + ixl)*pb_out->w + ixw)*xcm + ixcm)*(PE_ROWS+pb_out->x_pad) + ixr; // multidim_index -> flat_index [n,l,w,cm,r]
161+
i32 flat_index = p_offset + flat_index_n2r;
164162

165163
#ifdef XDEBUG
166-
i32 flat_index = p_offset + flat_index_n2r;
167164
mem.debug_tiled[flat_index] = val;
168165
#endif
169166

170167
// Pack bits and store
171-
div_t packed_idx = div(p_offset + flat_index_n2r, X_WORDS_PER_BYTE);
168+
div_t packed_idx = div(flat_index, X_WORDS_PER_BYTE);
172169
assert_printf (packed_idx.quot , <, bundles[ib].o_bytes, "write_x", WRITEX_DEBUG_INFO);
173170

174171
u8 packed_val = ((u8)val & X_BITS_MASK) << (packed_idx.rem * X_BITS);
175172
u8 mem_val = p_out_buffer[packed_idx.quot];
176173
u8 mem_val_cleaned = X_POSITION_INVERTED_MASKS[packed_idx.rem] & mem_val;
177174
write_flush_u8((u8*)(p_out_buffer + packed_idx.quot), mem_val_cleaned | packed_val);
178-
179-
// if (ib==1 && packed_idx.quot >= 356) debug_printf("index:%d, final_val:%d --- position:%d value:%d packed_val:%d, mem_val:%d, mem_val_cleaned:%d, clean_mask:%d, pos_mask:%d \n", packed_index, mem.debug_packed[packed_index], packed_position, val, packed_val, mem_val, mem_val_cleaned, X_BITS_MASK, X_POSITION_INVERTED_MASKS[packed_position]);
180175
}
181176

182177

@@ -285,7 +280,7 @@ extern EXT_C u8 model_run() {
285280
#endif
286281

287282
debug_printf("Starting model_run()\n");
288-
set_config(4*A_START, 1);
283+
set_config(A_START, 1);
289284

290285
for (ib = 0; ib < N_BUNDLES; ib++) {
291286

@@ -307,7 +302,7 @@ extern EXT_C u8 model_run() {
307302
#ifdef SIM
308303
DMA_WAIT:
309304
// if sim return, so SV can pass time, and call again, which will jump to DMA_WAIT again
310-
if (!get_config(4*(A_DONE_WRITE + ocm_bank)))
305+
if (!get_config(A_DONE_WRITE + ocm_bank))
311306
return 1;
312307

313308
char f_path_raw [1000], f_path_sum [1000]; // make sure full f_path_raw is shorter than 1000
@@ -316,13 +311,13 @@ extern EXT_C u8 model_run() {
316311
FILE *fp_raw = fopen(f_path_raw, "a");
317312
FILE *fp_sum = fopen(f_path_sum, "a");
318313
#else
319-
while (!get_config(4*(A_DONE_WRITE + ocm_bank))){
314+
while (!get_config(A_DONE_WRITE + ocm_bank)){
320315
// in FPGA, wait for write done
321316
};
317+
flush_cache(&ocm[ocm_bank], PE_ROWS*PE_COLS*sizeof(Y_TYPE)) ;
322318
usleep(0);
323319
#endif
324-
set_config(4*(A_DONE_WRITE + ocm_bank), 0);
325-
flush_cache(&ocm[ocm_bank], PE_ROWS*PE_COLS*sizeof(Y_TYPE)) ;
320+
set_config(A_DONE_WRITE + ocm_bank, 0);
326321

327322
w_last = iw_kw2 == pb->w_kw2-1 ? pb->kw/2+1 : 1;
328323
sram_addr=0;
@@ -508,7 +503,7 @@ extern EXT_C u8 model_run() {
508503
fclose(fp_sum);
509504
fclose(fp_raw);
510505
#endif
511-
set_config(4*(A_DONE_READ + ocm_bank), 1);
506+
set_config(A_DONE_READ + ocm_bank, 1);
512507
debug_printf("-------- iw_kw2 0x%x done \n", iw_kw2);
513508
} // iw_kw2
514509
iw_kw2 = 0;
@@ -554,11 +549,10 @@ extern EXT_C u8 model_run() {
554549
fclose(fp_packed);
555550
}
556551
#endif
557-
set_config(4*A_BUNDLE_DONE, 1);
552+
set_config(A_BUNDLE_DONE, 1);
558553
} // ib
559554
ib = 0;
560555
debug_printf("done all bundles!!\n");
561-
flush_cache(&mem.y, sizeof(mem.y));
562556
#ifdef SIM
563557
is_first_call = 1;
564558
#endif
@@ -569,19 +563,6 @@ extern EXT_C u8 model_run() {
569563
// Rest of the helper functions used in simulation.
570564
#ifdef SIM
571565

572-
extern EXT_C void sim_fill_memory (){
573-
FILE *fp;
574-
char f_path [1000];
575-
576-
sprintf(f_path, "%s/wbx.bin", DATA_DIR);
577-
fp = fopen(f_path, "rb");
578-
debug_printf("DEBUG: Reading from file %s \n", f_path);
579-
if(!fp)
580-
debug_printf("ERROR! File not found: %s \n", f_path);
581-
int bytes = fread(mem.w, 1, WB_BYTES+X_BYTES, fp);
582-
fclose(fp);
583-
}
584-
585566
extern EXT_C u32 addr_64to32(void* restrict addr){
586567
u64 offset = (u64)addr - (u64)&mem;
587568
return (u32)offset + 0x20000000;
@@ -613,24 +594,31 @@ u32 addr_64to32 (void* addr){
613594
extern EXT_C void model_setup(){
614595

615596
#ifdef SIM
616-
sim_fill_memory();
597+
FILE *fp;
598+
char f_path [1000];
599+
sprintf(f_path, "%s/wbx.bin", DATA_DIR);
600+
fp = fopen(f_path, "rb");
601+
debug_printf("DEBUG: Reading from file %s \n", f_path);
602+
if(!fp) debug_printf("ERROR! File not found: %s \n", f_path);
603+
int bytes = fread(mem.w, 1, WB_BYTES+X_BYTES, fp);
604+
fclose(fp);
617605
#endif
618606
flush_cache(&mem.w, WB_BYTES+X_BYTES); // force transfer to DDR, starting addr & length
619607

620608
// Write registers in controller
621-
set_config(4*A_START, 0); // Start
622-
set_config(4*(A_DONE_READ+0), 1); // Done read ocm bank 0
623-
set_config(4*(A_DONE_READ+1), 1); // Done read ocm bank 1
624-
set_config(4*(A_DONE_WRITE+0), 0); // Done write ocm bank 0
625-
set_config(4*(A_DONE_WRITE+1), 0); // Done write ocm bank 1
626-
set_config(4*(A_OCM_BASE+0), addr_64to32(ocm[0])); // Base addr ocm bank 0
627-
set_config(4*(A_OCM_BASE+1), addr_64to32(ocm[1])); // Base addr ocm bank 1
628-
set_config(4*A_WEIGHTS_BASE, addr_64to32(mem.w)); // Base adddr weights
629-
set_config(4*A_BUNDLE_DONE, 1); // Bundle done (?)
630-
set_config(4*A_N_BUNDLES_1, N_BUNDLES); // Number of bundles
631-
set_config(4*A_W_DONE, 0); // Weigths done
632-
set_config(4*A_X_DONE, 0); // Bundle done
633-
set_config(4*A_O_DONE, 0); // Output done
609+
set_config(A_START , 0); // Start
610+
set_config(A_DONE_READ +0, 1); // Done read ocm bank 0
611+
set_config(A_DONE_READ +1, 1); // Done read ocm bank 1
612+
set_config(A_DONE_WRITE+0, 0); // Done write ocm bank 0
613+
set_config(A_DONE_WRITE+1, 0); // Done write ocm bank 1
614+
set_config(A_OCM_BASE +0, addr_64to32(ocm[0])); // Base addr ocm bank 0
615+
set_config(A_OCM_BASE +1, addr_64to32(ocm[1])); // Base addr ocm bank 1
616+
set_config(A_WEIGHTS_BASE, addr_64to32(mem.w)); // Base adddr weights
617+
set_config(A_BUNDLE_DONE , 1); // Bundle done writing (pixel dma waits for this)
618+
set_config(A_N_BUNDLES_1 , N_BUNDLES); // Number of bundles
619+
set_config(A_W_DONE , 0); // Weigths done
620+
set_config(A_X_DONE , 0); // Bundle done
621+
set_config(A_O_DONE , 0); // Output done
634622

635623
// Write into BRAM the config for controller
636624
i32 parameters[8*N_BUNDLES];
@@ -640,14 +628,14 @@ extern EXT_C void model_setup(){
640628
parameters[8*var+2] = bundles[var].x_bpt; // x_bpt
641629
parameters[8*var+3] = bundles[var].w_bpt_p0; // w_bpt0
642630
parameters[8*var+4] = bundles[var].w_bpt; // w_bpt
631+
643632
assert_printf(bundles[var].p, <, 1<<16, "", "P should be less than 2**16 for bundle:%x", var);
644633
assert_printf(bundles[var].t, <, 1<<16, "", "T should be less than 2**16 for bundle:%x", var);
645634
parameters[8*var+5] = (bundles[var].t << 16) + bundles[var].p; // max p
646-
647635
parameters[8*var+6] = ((u32*)&bundles[var].header)[0];
648636
parameters[8*var+7] = ((u32*)&bundles[var].header)[1];
649637
}
650638
for (int var = 0; var < 8*N_BUNDLES; var++){
651-
set_config(4*(16+var), parameters[var]);
639+
set_config(16+var, parameters[var]);
652640
}
653641
}

‎deepsocflow/test/sv/axi_sys_tb.sv

+4-4
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,14 @@ module axi_sys_tb;
6060

6161

6262
function automatic int get_config(input int offset);
63-
if (offset < 16*4) return dut.OC_TOP.CONTROLLER.cfg[offset/4];
64-
else return dut.OC_TOP.CONTROLLER.sdp_ram.RAM[offset/4-16];
63+
if (offset < 16) return dut.OC_TOP.CONTROLLER.cfg [offset ];
64+
else return dut.OC_TOP.CONTROLLER.sdp_ram.RAM[offset-16];
6565
endfunction
6666

6767

6868
function automatic set_config(input int offset, input int data);
69-
if (offset < 16*4) dut.OC_TOP.CONTROLLER.cfg[offset/4] <= data;
70-
else dut.OC_TOP.CONTROLLER.sdp_ram.RAM[offset/4-16] <= data;
69+
if (offset < 16) dut.OC_TOP.CONTROLLER.cfg [offset ] <= data;
70+
else dut.OC_TOP.CONTROLLER.sdp_ram.RAM[offset-16] <= data;
7171
endfunction
7272

7373

‎run/param_test.py

+11-36
Original file line numberDiff line numberDiff line change
@@ -54,76 +54,51 @@ def __init__(self, sys_bits, x_int_bits, *args, **kwargs):
5454
super().__init__(sys_bits, x_int_bits, *args, **kwargs)
5555

5656
self.b1 = XBundle(
57-
core=XConvBN(
58-
k_int_bits=0,
59-
b_int_bits=0,
60-
filters=8,
61-
kernel_size=7,
62-
strides=(2,1),
57+
core=XConvBN(
58+
k_int_bits=0, b_int_bits=0, filters=8, kernel_size=7, strides=(2,1),
6359
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)),
6460
pool=XPool(
65-
type='avg',
66-
pool_size=(3,4),
67-
strides=(2,3),
68-
padding='same',
61+
type='avg', pool_size=(3,4), strides=(2,3), padding='same',
6962
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type=None),)
7063
)
7164

7265
self.b2 = XBundle(
7366
core=XConvBN(
74-
k_int_bits=0,
75-
b_int_bits=0,
76-
filters=8,
77-
kernel_size=1,
67+
k_int_bits=0, b_int_bits=0, filters=8, kernel_size=1,
7868
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type=None)),
7969
add_act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0.125)
8070
)
8171

8272
self.b3 = XBundle(
83-
core=XConvBN(
84-
k_int_bits=0,
85-
b_int_bits=0,
86-
filters=8,
87-
kernel_size=7,
73+
core=XConvBN(
74+
k_int_bits=0, b_int_bits=0, filters=8, kernel_size=7,
8875
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type=None),),
8976
add_act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)
9077
)
9178

9279
self.b4 = XBundle(
93-
core=XConvBN(
94-
k_int_bits=0,
95-
b_int_bits=0,
96-
filters=8,
97-
kernel_size=5,
80+
core=XConvBN(
81+
k_int_bits=0, b_int_bits=0, filters=8, kernel_size=5,
9882
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type=None),),
9983
add_act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0)
10084
)
10185

10286
self.b5 = XBundle(
10387
core=XConvBN(
104-
k_int_bits=0,
105-
b_int_bits=0,
106-
filters=24,
107-
kernel_size=3,
88+
k_int_bits=0, b_int_bits=0, filters=24, kernel_size=3,
10889
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),),
10990
)
11091

11192
self.b6 = XBundle(
11293
core=XConvBN(
113-
k_int_bits=0,
114-
b_int_bits=0,
115-
filters=10,
116-
kernel_size=1,
94+
k_int_bits=0, b_int_bits=0, filters=10, kernel_size=1,
11795
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type='relu', slope=0),),
11896
flatten=True
11997
)
12098

12199
self.b7 = XBundle(
122100
core=XDense(
123-
k_int_bits=0,
124-
b_int_bits=0,
125-
units=NB_CLASSES,
126-
use_bias=False,
101+
k_int_bits=0, b_int_bits=0, units=NB_CLASSES, use_bias=False,
127102
act=XActivation(sys_bits=sys_bits, o_int_bits=0, type=None),),
128103
softmax=True
129104
)

0 commit comments

Comments
 (0)
Please sign in to comment.