@@ -86,25 +86,21 @@ typedef struct {
86
86
87
87
Memory_st mem ;
88
88
89
- static inline void flush_cache (void * addr , uint32_t bytes ) {
90
- // Do nothing
91
- }
92
-
93
89
// Get and set config are done by sv
94
90
extern EXT_C u32 get_config (u32 );
95
91
extern EXT_C void set_config (u32 , u32 );
92
+ static inline void flush_cache (void * addr , uint32_t bytes ) {} // Do nothing
96
93
97
94
#else
98
95
#define sim_fprintf (...)
99
96
#define mem (*(Memory_st* restrict)MEM_BASEADDR)
100
97
101
98
inline volatile u32 get_config (u32 offset ){
102
- return * (volatile u32 * )(CONFIG_BASEADDR + offset );
99
+ return * (volatile u32 * )(CONFIG_BASEADDR + offset * 4 );
103
100
}
104
101
105
102
inline void set_config (u32 offset , u32 data ){
106
- volatile u32 * Addr = (volatile u32 * restrict)(CONFIG_BASEADDR + offset );
107
- * Addr = data ;
103
+ * (volatile u32 * restrict)(CONFIG_BASEADDR + offset * 4 ) = data ;
108
104
}
109
105
#endif
110
106
@@ -120,6 +116,7 @@ typedef struct {
120
116
// Helper functions
121
117
122
118
static inline void print_output () {
119
+ flush_cache (& mem .y , sizeof (mem .y ));
123
120
for (int i = 0 ; i < O_WORDS ; i ++ ){
124
121
printf ("y[%d]: %f \n" , i , (float )mem .y [i ]);
125
122
}
@@ -161,22 +158,20 @@ static inline void write_x(i8 val, i8 *restrict p_out_buffer, i32 ib, i32 ixp, i
161
158
162
159
i32 p_offset = (ixp == 0 ) ? 0 : (pb_out -> cm_p0 + (ixp - 1 )* pb_out -> cm ) * pb_out -> xp_words ;
163
160
i32 flat_index_n2r = (((ixn * pb_out -> l + ixl )* pb_out -> w + ixw )* xcm + ixcm )* (PE_ROWS + pb_out -> x_pad ) + ixr ; // multidim_index -> flat_index [n,l,w,cm,r]
161
+ i32 flat_index = p_offset + flat_index_n2r ;
164
162
165
163
#ifdef XDEBUG
166
- i32 flat_index = p_offset + flat_index_n2r ;
167
164
mem .debug_tiled [flat_index ] = val ;
168
165
#endif
169
166
170
167
// Pack bits and store
171
- div_t packed_idx = div (p_offset + flat_index_n2r , X_WORDS_PER_BYTE );
168
+ div_t packed_idx = div (flat_index , X_WORDS_PER_BYTE );
172
169
assert_printf (packed_idx .quot , < , bundles [ib ].o_bytes , "write_x" , WRITEX_DEBUG_INFO );
173
170
174
171
u8 packed_val = ((u8 )val & X_BITS_MASK ) << (packed_idx .rem * X_BITS );
175
172
u8 mem_val = p_out_buffer [packed_idx .quot ];
176
173
u8 mem_val_cleaned = X_POSITION_INVERTED_MASKS [packed_idx .rem ] & mem_val ;
177
174
write_flush_u8 ((u8 * )(p_out_buffer + packed_idx .quot ), mem_val_cleaned | packed_val );
178
-
179
- // if (ib==1 && packed_idx.quot >= 356) debug_printf("index:%d, final_val:%d --- position:%d value:%d packed_val:%d, mem_val:%d, mem_val_cleaned:%d, clean_mask:%d, pos_mask:%d \n", packed_index, mem.debug_packed[packed_index], packed_position, val, packed_val, mem_val, mem_val_cleaned, X_BITS_MASK, X_POSITION_INVERTED_MASKS[packed_position]);
180
175
}
181
176
182
177
@@ -285,7 +280,7 @@ extern EXT_C u8 model_run() {
285
280
#endif
286
281
287
282
debug_printf ("Starting model_run()\n" );
288
- set_config (4 * A_START , 1 );
283
+ set_config (A_START , 1 );
289
284
290
285
for (ib = 0 ; ib < N_BUNDLES ; ib ++ ) {
291
286
@@ -307,7 +302,7 @@ extern EXT_C u8 model_run() {
307
302
#ifdef SIM
308
303
DMA_WAIT :
309
304
// if sim return, so SV can pass time, and call again, which will jump to DMA_WAIT again
310
- if (!get_config (4 * ( A_DONE_WRITE + ocm_bank ) ))
305
+ if (!get_config (A_DONE_WRITE + ocm_bank ))
311
306
return 1 ;
312
307
313
308
char f_path_raw [1000 ], f_path_sum [1000 ]; // make sure full f_path_raw is shorter than 1000
@@ -316,13 +311,13 @@ extern EXT_C u8 model_run() {
316
311
FILE * fp_raw = fopen (f_path_raw , "a" );
317
312
FILE * fp_sum = fopen (f_path_sum , "a" );
318
313
#else
319
- while (!get_config (4 * ( A_DONE_WRITE + ocm_bank ) )){
314
+ while (!get_config (A_DONE_WRITE + ocm_bank )){
320
315
// in FPGA, wait for write done
321
316
};
317
+ flush_cache (& ocm [ocm_bank ], PE_ROWS * PE_COLS * sizeof (Y_TYPE )) ;
322
318
usleep (0 );
323
319
#endif
324
- set_config (4 * (A_DONE_WRITE + ocm_bank ), 0 );
325
- flush_cache (& ocm [ocm_bank ], PE_ROWS * PE_COLS * sizeof (Y_TYPE )) ;
320
+ set_config (A_DONE_WRITE + ocm_bank , 0 );
326
321
327
322
w_last = iw_kw2 == pb -> w_kw2 - 1 ? pb -> kw /2 + 1 : 1 ;
328
323
sram_addr = 0 ;
@@ -508,7 +503,7 @@ extern EXT_C u8 model_run() {
508
503
fclose (fp_sum );
509
504
fclose (fp_raw );
510
505
#endif
511
- set_config (4 * ( A_DONE_READ + ocm_bank ) , 1 );
506
+ set_config (A_DONE_READ + ocm_bank , 1 );
512
507
debug_printf ("-------- iw_kw2 0x%x done \n" , iw_kw2 );
513
508
} // iw_kw2
514
509
iw_kw2 = 0 ;
@@ -554,11 +549,10 @@ extern EXT_C u8 model_run() {
554
549
fclose (fp_packed );
555
550
}
556
551
#endif
557
- set_config (4 * A_BUNDLE_DONE , 1 );
552
+ set_config (A_BUNDLE_DONE , 1 );
558
553
} // ib
559
554
ib = 0 ;
560
555
debug_printf ("done all bundles!!\n" );
561
- flush_cache (& mem .y , sizeof (mem .y ));
562
556
#ifdef SIM
563
557
is_first_call = 1 ;
564
558
#endif
@@ -569,19 +563,6 @@ extern EXT_C u8 model_run() {
569
563
// Rest of the helper functions used in simulation.
570
564
#ifdef SIM
571
565
572
- extern EXT_C void sim_fill_memory (){
573
- FILE * fp ;
574
- char f_path [1000 ];
575
-
576
- sprintf (f_path , "%s/wbx.bin" , DATA_DIR );
577
- fp = fopen (f_path , "rb" );
578
- debug_printf ("DEBUG: Reading from file %s \n" , f_path );
579
- if (!fp )
580
- debug_printf ("ERROR! File not found: %s \n" , f_path );
581
- int bytes = fread (mem .w , 1 , WB_BYTES + X_BYTES , fp );
582
- fclose (fp );
583
- }
584
-
585
566
extern EXT_C u32 addr_64to32 (void * restrict addr ){
586
567
u64 offset = (u64 )addr - (u64 )& mem ;
587
568
return (u32 )offset + 0x20000000 ;
@@ -613,24 +594,31 @@ u32 addr_64to32 (void* addr){
613
594
extern EXT_C void model_setup (){
614
595
615
596
#ifdef SIM
616
- sim_fill_memory ();
597
+ FILE * fp ;
598
+ char f_path [1000 ];
599
+ sprintf (f_path , "%s/wbx.bin" , DATA_DIR );
600
+ fp = fopen (f_path , "rb" );
601
+ debug_printf ("DEBUG: Reading from file %s \n" , f_path );
602
+ if (!fp ) debug_printf ("ERROR! File not found: %s \n" , f_path );
603
+ int bytes = fread (mem .w , 1 , WB_BYTES + X_BYTES , fp );
604
+ fclose (fp );
617
605
#endif
618
606
flush_cache (& mem .w , WB_BYTES + X_BYTES ); // force transfer to DDR, starting addr & length
619
607
620
608
// Write registers in controller
621
- set_config (4 * A_START , 0 ); // Start
622
- set_config (4 * ( A_DONE_READ + 0 ) , 1 ); // Done read ocm bank 0
623
- set_config (4 * ( A_DONE_READ + 1 ) , 1 ); // Done read ocm bank 1
624
- set_config (4 * ( A_DONE_WRITE + 0 ) , 0 ); // Done write ocm bank 0
625
- set_config (4 * ( A_DONE_WRITE + 1 ) , 0 ); // Done write ocm bank 1
626
- set_config (4 * ( A_OCM_BASE + 0 ) , addr_64to32 (ocm [0 ])); // Base addr ocm bank 0
627
- set_config (4 * ( A_OCM_BASE + 1 ) , addr_64to32 (ocm [1 ])); // Base addr ocm bank 1
628
- set_config (4 * A_WEIGHTS_BASE , addr_64to32 (mem .w )); // Base adddr weights
629
- set_config (4 * A_BUNDLE_DONE , 1 ); // Bundle done (? )
630
- set_config (4 * A_N_BUNDLES_1 , N_BUNDLES ); // Number of bundles
631
- set_config (4 * A_W_DONE , 0 ); // Weigths done
632
- set_config (4 * A_X_DONE , 0 ); // Bundle done
633
- set_config (4 * A_O_DONE , 0 ); // Output done
609
+ set_config (A_START , 0 ); // Start
610
+ set_config (A_DONE_READ + 0 , 1 ); // Done read ocm bank 0
611
+ set_config (A_DONE_READ + 1 , 1 ); // Done read ocm bank 1
612
+ set_config (A_DONE_WRITE + 0 , 0 ); // Done write ocm bank 0
613
+ set_config (A_DONE_WRITE + 1 , 0 ); // Done write ocm bank 1
614
+ set_config (A_OCM_BASE + 0 , addr_64to32 (ocm [0 ])); // Base addr ocm bank 0
615
+ set_config (A_OCM_BASE + 1 , addr_64to32 (ocm [1 ])); // Base addr ocm bank 1
616
+ set_config (A_WEIGHTS_BASE , addr_64to32 (mem .w )); // Base adddr weights
617
+ set_config (A_BUNDLE_DONE , 1 ); // Bundle done writing (pixel dma waits for this )
618
+ set_config (A_N_BUNDLES_1 , N_BUNDLES ); // Number of bundles
619
+ set_config (A_W_DONE , 0 ); // Weigths done
620
+ set_config (A_X_DONE , 0 ); // Bundle done
621
+ set_config (A_O_DONE , 0 ); // Output done
634
622
635
623
// Write into BRAM the config for controller
636
624
i32 parameters [8 * N_BUNDLES ];
@@ -640,14 +628,14 @@ extern EXT_C void model_setup(){
640
628
parameters [8 * var + 2 ] = bundles [var ].x_bpt ; // x_bpt
641
629
parameters [8 * var + 3 ] = bundles [var ].w_bpt_p0 ; // w_bpt0
642
630
parameters [8 * var + 4 ] = bundles [var ].w_bpt ; // w_bpt
631
+
643
632
assert_printf (bundles [var ].p , < , 1 <<16 , "" , "P should be less than 2**16 for bundle:%x" , var );
644
633
assert_printf (bundles [var ].t , < , 1 <<16 , "" , "T should be less than 2**16 for bundle:%x" , var );
645
634
parameters [8 * var + 5 ] = (bundles [var ].t << 16 ) + bundles [var ].p ; // max p
646
-
647
635
parameters [8 * var + 6 ] = ((u32 * )& bundles [var ].header )[0 ];
648
636
parameters [8 * var + 7 ] = ((u32 * )& bundles [var ].header )[1 ];
649
637
}
650
638
for (int var = 0 ; var < 8 * N_BUNDLES ; var ++ ){
651
- set_config (4 * ( 16 + var ) , parameters [var ]);
639
+ set_config (16 + var , parameters [var ]);
652
640
}
653
641
}
0 commit comments