diff --git a/.gitignore b/.gitignore index c3c6d00..26323b0 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ wave.vcd # Misc log files logs/ +log #la_code la_code diff --git a/README.md b/README.md index de9d9f5..c97b059 100644 --- a/README.md +++ b/README.md @@ -83,4 +83,8 @@ git checkout -b > 引用和致谢 +[cva5](https://github.com/openhwgroup/cva5) 项目和 Eric Matthews + - [乘除法器](https://github.com/risclite/rv32m-multiplier-and-divider) +- [参数化的LFSR](https://github.com/openhwgroup/cva5/blob/master/core/lfsr.sv) +- [基于ROM的Priority Encoder](https://github.com/openhwgroup/cva5/blob/master/core/priority_encoder.sv) \ No newline at end of file diff --git a/src/vsrc/AXI/README.md b/src/vsrc/AXI/README.md index 3705999..e115b10 100644 --- a/src/vsrc/AXI/README.md +++ b/src/vsrc/AXI/README.md @@ -21,30 +21,24 @@ 来自cpu和输出到cpu的信号,其中只有inst_cpu_data_o,data_cpu_data_o和inst_stallreq,data_stallreq是输出的。 //icache/IF input wire [`ADDR]inst_cpu_addr_i, - input wire inst_cpu_ce_i, input wire [`Data]inst_cpu_data_i, - input wire inst_cpu_we_i , - input wire [3:0]inst_cpu_sel_i, - input wire inst_stall_i, input wire inst_flush_i, output reg [`Data]inst_cpu_data_o, - output wire inst_stallreq, input wire [3:0]inst_id,//决定是读数据还是取指令,默认4’b0000 //icache 读请求的类型,3’b100表示一次性读取1个cache行(一个cache行默认4*32bit的数据,inst[addr],inst[addr+4],inst[addr+8],inst[addr+12]);其他值表示一次读取1*32bit数据 input wire [2:0]icache_rd_type_i, - + input wire icache_rd_req_i,//读请求使能信号,高位有效 + output reg icache_rd_rdy_o,//读请求可被接受 + output reg icache_ret_valid_o,//读数据有效 + output reg [1:0]icache_ret_last_o,//最后一个读数据 //dcache/MEM input wire [`ADDR]data_cpu_addr_i, - input wire data_cpu_ce_i, input wire [`Data]data_cpu_data_i, - input wire data_cpu_we_i , input wire [3:0]data_cpu_sel_i, - input wire data_stall_i, input wire data_flush_i, output reg [`Data]data_cpu_data_o, - output wire data_stallreq, input wire [3:0]data_id,//决定是读数据还是取指令,默认4'b0001 // 同icache_rd_type_i input wire [2:0]dcache_rd_type_i, @@ -52,6 +46,15 @@ input wire [2:0]dcache_wr_type_i,//decache write type //4*32bit的写入数据,如果只想写一个数据,只需要保证31:0是正确的写入数据即可 input wire [`BurstData]dcache_wr_data,//data from dcache + input wire [2:0]dcache_rd_type_i, + input wire dcache_rd_req_i, + output reg dcache_rd_rdy_o, + output reg dcache_ret_valid_o, + output reg [1:0]dcache_ret_last_o, + input wire dcache_wr_req_i,//写使能,高为有效 + output reg dcache_wr_rdy,//写请求可被接受 + + AXI标准信号接口,输出到从机或从从机输入,无需关心内部逻辑,照着接线就好,s是前缀。 //Slave @@ -103,16 +106,16 @@ output reg s_bready ``` +# 本版本为面向cache的版本,面向CPU的版本,请见5.9号提交的版本。AXI对cache有特定要求务必要仔细阅读说明。 + ## 使用说明 1. 把axi_Master主机接口放到cpuTop中实例化 - * 实现仲裁的axi接口(不支持同种请求的连续发送和突发传输,支持同时送取指和取数),取值id接口`4’b0000`,取数id接口`4'b0001` + * 实现仲裁的axi接口(不支持同种请求的连续发送,支持同时送取指和取数核突发传送),取值id接口`4’b0000`,取数id接口`4'b0001` * 支持写操作,读指令和读数据。若同时发出取指和取数,会并行执行(指同时发送两种请求,若先取指后取数或先取数后取指都无法并行) * 如果连续发送两次读请求,则会等待第一个读请求结束在处理第二个读请求 * 先写后读,写请求结束后才会处理读请求 * 所有的请求在请求结束前,都需要保证来自cpu的输入信号不变 * dcache/icache_rd/wr_type_i表示一次性读或写的数据量。`3'b100`表示一次性读/写连续四个地址的数据;其他值表示只读/写一个数据,推荐直接写0 - * 即使没有cache。icache和dcache开头的信号都要接 - * 下面的说明,不适用突发传输,icache/dcache_rd/wr_type_i照着抄就好,不需要改。需要注意的是dcache_wr_data需要是128bit的数据,如果只想写一个的话,需要再前面添加96个0,例如写data[31:0],则dcache_wr_data({{96{1'b0}}},data[31:0]) ``` wire aresetn=~rst; @@ -128,27 +131,36 @@ .aresetn(aresetn), //low is valid //icache/IF .inst_cpu_addr_i(inst_pc), - .inst_cpu_ce_i(inst_chip_enable), - .inst_cpu_we_i(0) , .inst_cpu_sel_i(4'b1111), .inst_flush_i(0), .inst_cpu_data_o(inst_data_from_axi), - .inst_stallreq(stallreq_from_if), .inst_id(4'b0000),//决定是读数据还是取指令 - .icache_rd_type_i(0),//3'b100开启连续读4个数据;0只读一个数据 - + .icache_rd_type_i(3'b100),//3'b100开启连续读4个数据;0只读一个数据 + .icache_rd_req_i(),//接读使能 + .icache_rd_rdy_o(),//接读请求握手信号 + .icache_ret_valid_o(),//接读有效信号 + .icache_ret_last_o(),//接最后一个读数据信号 + + //dacache/MEM .data_cpu_addr_i(data_pc), - .data_cpu_ce_i(data_chip_enable), - .data_cpu_we_i(data_we) , + .data_cpu_data_i(data), .data_cpu_sel_i(4'b1111), .data_flush_i(0), .data_cpu_data_o(mem_data_from_axi), - .data_stallreq(stallreq_from_mem), .data_id(4'b0001),//决定是读数据还是取指令 - .dcache_rd_type_i(0),//同icache - .dcache_wr_type_i(0),//写的数据量,3'b100表示连续写四个数据至相邻的地址;0表示只写一个数据 - .dcache_wr_data({{96{1'b0}},data[31:0]}),//128bit的写入数据,如果只想写一个那么只需要保证31:0正确 + .dcache_rd_type_i(),//同icache + .dcache_wr_type_i(),//写的数据量,3'b100表示连续写四个数据至相邻的地址;0表示只写一个数据 + .dcache_wr_data(),//128bit的写入数据,如果只想写一个那么只需要保证31:0正确 + .dcache_rd_req_i(), + .dcache_rd_rdy_o(), + .dcache_ret_valid_o(), + .dcache_ret_last_o(), + .dcache_wr_data(),//data from dcache + .dcache_wr_req_i(),//write enable signal + .dcache_wr_rdy(),//write can receive + + //ar .s_arid(i_arid), //arbitration .s_araddr(i_araddr), @@ -203,4 +215,6 @@ 3. xxx_cpu_sel_i为字节选通使能,用来实现store类型。 -4. stallreq_if和stallreq_mem为暂停请求,因为AXI直接面向CPU,所以,在AXI进行读写数据时,CPU必须暂停,等到AXI完成读写数据的操作。 +4. 关于cache给AXI的信号。在AXI完成写或读请求前,cache的信号必须要持续的拉高。对于读指令,当ret_last拉高时,才能更新输出给AXI的信号(req,type,addr);对于写,和wr_rdy正常握手就好,输出给AXI的信号只需要保存一个时钟周期。 + +5. cache何时给信号。cache只要发出读或写请求就立刻给出所有信号(req,type,addr,data)。`重点`addr,data,type,req是同时给到AXI,而不是等到cache与rdy握手后才给addr和data,这样AXI就无法接受数据。握手是指握手后接收方立刻把数据存到寄存器里。 \ No newline at end of file diff --git a/src/vsrc/AXI/axi_defines.sv b/src/vsrc/AXI/axi_defines.sv index 1642a42..fd4de6b 100644 --- a/src/vsrc/AXI/axi_defines.sv +++ b/src/vsrc/AXI/axi_defines.sv @@ -7,7 +7,7 @@ `define Lock 1:0 `define Cache 3:0 `define Prot 2:0 -`define Data 31:0 +`define Data 127:0 `define Resp 1:0 `define BurstData 127:0 diff --git a/src/vsrc/AXI/axi_master.sv b/src/vsrc/AXI/axi_master.sv index a8711c1..1a3080c 100644 --- a/src/vsrc/AXI/axi_master.sv +++ b/src/vsrc/AXI/axi_master.sv @@ -1,28 +1,32 @@ `include "AXI/axi_defines.sv" module axi_master ( input wire aclk, - input wire aresetn, //low is valid + input wire aresetn, //low is valid + //inst input wire [`ADDR] inst_cpu_addr_i, - input wire inst_cpu_ce_i, - input wire inst_cpu_we_i, - input wire [3:0] inst_cpu_sel_i, output reg [`Data] inst_cpu_data_o, - output wire inst_stallreq, input wire [3:0] inst_id, //决定是读数据还是取指令 input wire [2:0] icache_rd_type_i, //icahce read type + input wire icache_rd_req_i, //read enable signal + output reg icache_rd_rdy_o, //read can receive + output reg icache_ret_valid_o, //read data is valid + output reg [1:0] icache_ret_last_o, // read is over //data input wire [`ADDR] data_cpu_addr_i, - input wire data_cpu_ce_i, - input wire data_cpu_we_i, - input wire [3:0] data_cpu_sel_i, + input wire [15:0] data_cpu_sel_i, output reg [`Data] data_cpu_data_o, - output wire data_stallreq, input wire [3:0] data_id, //决定是读数据还是取指令 input wire [2:0] dcache_rd_type_i, // dacache read type + input wire dcache_rd_req_i, + output reg dcache_rd_rdy_o, + output reg dcache_ret_valid_o, + output reg [1:0] dcache_ret_last_o, input wire [2:0] dcache_wr_type_i, //decache write type input wire [`BurstData] dcache_wr_data, //data from dcache + input wire dcache_wr_req_i, //write enable signal + output reg dcache_wr_rdy, //write can receive //Slave @@ -61,7 +65,7 @@ module axi_master ( //w output wire [`ID] s_wid, output reg [`Data] s_wdata, - output wire [3:0] s_wstrb, //字节选通位和sel差不多 + output reg [15:0] s_wstrb, //字节选通位和sel差不多 output reg s_wlast, output reg s_wvalid, input wire s_wready, @@ -74,23 +78,16 @@ module axi_master ( ); reg write_wait_enable; - //read instruction stall - reg inst_stall_req_r; - assign inst_stallreq = inst_stall_req_r; - reg [31:0] inst_buffer; + reg [`Data] inst_buffer; //read and write data stall - reg stall_req_w; - reg data_stall_req_r; - reg [31:0] data_buffer; - assign data_stallreq = data_stall_req_r || stall_req_w; + reg [`Data] data_buffer; reg [3:0] inst_r_state; reg [3:0] data_r_state; //fetch instruction before fetch data reg is_fetching_inst; - reg is_fetch_inst_OK; //read instruction signal to slave reg [`ID] inst_s_arid; //arbitration @@ -112,48 +109,62 @@ module axi_master ( //改变输出 always @(*) begin if (!aresetn) begin - inst_stall_req_r = 0; - inst_cpu_data_o = 0; + inst_cpu_data_o = 0; is_fetching_inst = 0; + icache_rd_rdy_o = 1; + icache_ret_valid_o = 0; + icache_ret_last_o = 0; end else begin case (inst_r_state) `R_FREE: begin - if (inst_cpu_ce_i && inst_cpu_we_i == 0) begin - inst_stall_req_r = 1; - inst_cpu_data_o = 0; - //is_fetching_inst=1; + if (icache_rd_req_i) begin + inst_cpu_data_o = 0; is_fetching_inst = 0; + + icache_rd_rdy_o = 1; + icache_ret_valid_o = 0; + icache_ret_last_o = 0; end else begin - inst_stall_req_r = 0; - inst_cpu_data_o = 0; + inst_cpu_data_o = 0; is_fetching_inst = 0; + + icache_rd_rdy_o = 1; + icache_ret_valid_o = 0; + icache_ret_last_o = 0; end end `R_ADDR: begin - inst_stall_req_r = 1; - inst_cpu_data_o = 0; + inst_cpu_data_o = 0; is_fetching_inst = 1; + + icache_rd_rdy_o = 0; + icache_ret_valid_o = 0; + icache_ret_last_o = 0; end `R_DATA: begin - //use id to judge the s_rdata type if (s_rvalid && s_rlast && s_rid[0] == 0) begin - inst_stall_req_r = 0; - inst_cpu_data_o = s_rdata; + inst_cpu_data_o = s_rdata; is_fetching_inst = 0; + + icache_rd_rdy_o = 1; + icache_ret_valid_o = 1; + icache_ret_last_o = 1; end else if (s_rvalid && s_rready && s_rid[0] == 0) begin - inst_stall_req_r = 1; - inst_cpu_data_o = s_rdata; + inst_cpu_data_o = s_rdata; is_fetching_inst = 1; + + icache_rd_rdy_o = 0; + icache_ret_valid_o = 1; + icache_ret_last_o = 0; end else begin - inst_stall_req_r = 1; - inst_cpu_data_o = 0; + inst_cpu_data_o = 0; is_fetching_inst = 1; + icache_rd_rdy_o = 0; + icache_ret_valid_o = 0; + icache_ret_last_o = 0; end end default: begin - inst_stall_req_r = 0; - inst_cpu_data_o = 0; - is_fetching_inst = 0; end endcase end @@ -177,12 +188,12 @@ module axi_master ( `R_FREE: begin if (write_wait_enable == 0) begin - if((inst_cpu_ce_i&&(inst_cpu_we_i==0))&&(!(data_cpu_ce_i&&(data_cpu_we_i==0))))//fetch inst but don't fetch data + if((icache_rd_req_i)&&(!(dcache_rd_req_i)))//fetch inst but don't fetch data begin inst_r_state <= `R_ADDR; inst_s_arid <= inst_id; inst_s_araddr <= inst_cpu_addr_i; - inst_s_arsize <= 3'b010; + inst_s_arsize <= 3'b100; inst_buffer <= 0; inst_s_arlen <= inst_real_s_arlen; inst_s_rready <= 0; @@ -190,14 +201,14 @@ module axi_master ( inst_s_arvalid <= 1; end - else if((inst_cpu_ce_i&&(inst_cpu_we_i==0))&&(data_cpu_ce_i&&(data_cpu_we_i==0)))//fetch inst and fetch data + else if((icache_rd_req_i)&&(dcache_rd_req_i))//fetch inst and fetch data begin //wait for fetch data request run into R_DATA state if (data_r_state == `R_DATA) begin inst_r_state <= `R_ADDR; inst_s_arid <= inst_id; inst_s_araddr <= inst_cpu_addr_i; - inst_s_arsize <= 3'b010; + inst_s_arsize <= 3'b100; inst_buffer <= 0; inst_s_arlen <= inst_real_s_arlen; inst_s_rready <= 0; @@ -271,7 +282,6 @@ module axi_master ( /** R **/ `R_DATA: begin if (s_rvalid && s_rlast && s_rid[0] == 0) begin - inst_r_state <= `R_FREE; inst_buffer <= s_rdata; inst_s_rready <= 0; @@ -290,7 +300,6 @@ module axi_master ( inst_s_arsize <= inst_s_arsize; inst_s_arlen <= 0; end - end default: begin @@ -319,33 +328,47 @@ module axi_master ( //改变输出 always @(*) begin if (!aresetn) begin - data_stall_req_r = 0; - data_cpu_data_o = 0; + data_cpu_data_o = 0; + dcache_rd_rdy_o = 1; + dcache_ret_valid_o = 0; + dcache_ret_last_o = 0; end else begin case (data_r_state) `R_FREE: begin - if (data_cpu_ce_i && data_cpu_we_i == 0) begin - data_stall_req_r = 1; - data_cpu_data_o = 0; + if (dcache_rd_req_i) begin + data_cpu_data_o = 0; + dcache_rd_rdy_o = 1; + dcache_ret_valid_o = 0; + dcache_ret_last_o = 0; end else begin - data_stall_req_r = 0; - data_cpu_data_o = 0; + data_cpu_data_o = 0; + dcache_rd_rdy_o = 1; + dcache_ret_valid_o = 0; + dcache_ret_last_o = 0; end end `R_ADDR: begin - data_stall_req_r = 1; - data_cpu_data_o = 0; + data_cpu_data_o = 0; + dcache_rd_rdy_o = 0; + dcache_ret_valid_o = 0; + dcache_ret_last_o = 0; end `R_DATA: begin if (s_rvalid && s_rlast && s_rid[0] == 1) begin - data_stall_req_r = 0; - data_cpu_data_o = s_rdata; + data_cpu_data_o = s_rdata; + dcache_rd_rdy_o = 1; + dcache_ret_valid_o = 1; + dcache_ret_last_o = 1; end else if (s_rvalid && s_rready && s_rid[0] == 1) begin - data_stall_req_r = 1; - data_cpu_data_o = s_rdata; + data_cpu_data_o = s_rdata; + dcache_rd_rdy_o = 0; + dcache_ret_valid_o = 1; + dcache_ret_last_o = 0; end else begin - data_stall_req_r = 1; - data_cpu_data_o = 0; + data_cpu_data_o = 0; + dcache_rd_rdy_o = 0; + dcache_ret_valid_o = 0; + dcache_ret_last_o = 0; end end default: begin @@ -372,12 +395,12 @@ module axi_master ( `R_FREE: begin - if(data_cpu_ce_i&&(data_cpu_we_i==0)&&(is_fetching_inst==0)&&(write_wait_enable==0)) + if(dcache_rd_req_i&&(is_fetching_inst==0)&&(write_wait_enable==0)) begin data_r_state <= `R_ADDR; data_s_arid <= data_id; data_s_araddr <= data_cpu_addr_i; - data_s_arsize <= 3'b010; + data_s_arsize <= 3'b100; data_buffer <= 0; data_s_arlen <= data_real_s_arlen; data_s_rready <= 0; @@ -446,7 +469,6 @@ module axi_master ( data_s_arsize <= 0; data_s_arlen <= 0; end - end default: begin @@ -468,6 +490,7 @@ module axi_master ( //write + reg [15:0] write_wstrb_buffer; reg [`BurstData] write_buffer; @@ -490,33 +513,35 @@ module axi_master ( //改变输出 always @(*) begin if (!aresetn) begin - stall_req_w = 0; write_wait_enable = 0; + dcache_wr_rdy = 1; end else begin case (w_state) `W_FREE: begin - if (data_cpu_ce_i && (data_cpu_we_i)) begin - stall_req_w = 1; + if (dcache_wr_req_i) begin write_wait_enable = 1; + dcache_wr_rdy = 1; end else begin - stall_req_w = 0; write_wait_enable = 0; + dcache_wr_rdy = 1; end end `W_ADDR, `W_DATA: begin - stall_req_w = 1; write_wait_enable = 1; + dcache_wr_rdy = 0; end `W_RESP: begin if (s_bvalid && s_bready) begin - stall_req_w = 0; write_wait_enable = 0; + dcache_wr_rdy = 1; end else begin - stall_req_w = 1; write_wait_enable = 1; + dcache_wr_rdy = 0; end end default: begin + write_wait_enable = 0; + dcache_wr_rdy = 0; end endcase end @@ -530,9 +555,11 @@ module axi_master ( s_awsize <= 0; s_awvalid <= 0; + s_wstrb <= 0; s_wdata <= 0; s_wvalid <= 0; s_bready <= 0; + write_wstrb_buffer <= 0; write_buffer <= 0; s_wlast <= 0; end else begin @@ -540,26 +567,29 @@ module axi_master ( `W_FREE: begin - if (data_cpu_ce_i && (data_cpu_we_i)) begin + if (dcache_wr_req_i) begin w_state <= `W_ADDR; s_awaddr <= data_cpu_addr_i; - s_awsize <= 3'b010; + s_awsize <= 3'b100; s_awvalid <= 1; + s_wstrb <= 0; s_wdata <= 0; s_wvalid <= 0; s_bready <= 0; + write_wstrb_buffer <= data_cpu_sel_i; write_buffer <= dcache_wr_data; s_wlast <= 0; end else begin w_state <= w_state; s_awaddr <= 0; s_awsize <= 0; - s_awvalid <= 0; + s_wstrb <= 0; s_wdata <= 0; s_wvalid <= 0; s_bready <= 0; + write_wstrb_buffer <= 0; write_buffer <= 0; s_wlast <= 0; end @@ -575,7 +605,8 @@ module axi_master ( s_awvalid <= 0; s_wvalid <= 1; s_bready <= 1; - s_wdata <= write_buffer[31:0]; + s_wstrb <= write_wstrb_buffer; + s_wdata <= write_buffer; write_buffer <= {{32{1'b0}}, write_buffer[127:32]}; if (s_awlen == 0) s_wlast <= 1; @@ -588,26 +619,29 @@ module axi_master ( s_awvalid <= s_awvalid; s_wvalid <= s_wvalid; s_bready <= s_bready; + s_wstrb <= 0; s_wdata <= 0; write_buffer <= write_buffer; s_wlast <= s_wlast; end end - /** W **/ `W_DATA: begin if (s_wvalid && s_wready) begin if (cnt == s_awlen) begin w_state <= `W_RESP; + s_wstrb <= 0; s_wdata <= 0; s_wvalid <= 0; + write_wstrb_buffer <= 0; write_buffer <= 0; s_wlast <= 0; end else begin - w_state <= w_state; - s_wdata <= write_buffer[31:0]; - write_buffer <= {{32{1'b0}}, write_buffer[127:32]}; + w_state <= w_state; + s_wstrb <= write_wstrb_buffer; + s_wdata <= write_buffer; + // write_buffer <= {{32{1'b0}}, write_buffer[127:32]}; s_wvalid <= 1; if (cnt == s_awlen - 1) s_wlast <= 1; else s_wlast <= 0; @@ -618,7 +652,7 @@ module axi_master ( w_state <= w_state; s_wdata <= s_wdata; s_wvalid <= s_wvalid; - s_wlast = s_wlast; + s_wlast <= s_wlast; end end @@ -651,9 +685,6 @@ module axi_master ( assign s_awcache = 0; assign s_awprot = 0; assign s_wid = 0; - assign s_wstrb = data_cpu_sel_i; - // assign s_wlast=1; - //set axi signal assign s_arid = inst_s_arid | data_s_arid; @@ -661,4 +692,5 @@ module axi_master ( assign s_arsize = inst_s_arsize | data_s_arsize; assign s_arvalid = inst_s_arvalid | data_s_arvalid; assign s_rready = inst_s_rready | data_s_rready; + endmodule diff --git a/src/vsrc/cpu_top.sv b/src/vsrc/cpu_top.sv index 6ad20e8..efedd81 100644 --- a/src/vsrc/cpu_top.sv +++ b/src/vsrc/cpu_top.sv @@ -8,7 +8,8 @@ `include "AXI/axi_master.sv" `include "frontend/frontend.sv" `include "instr_buffer.sv" -`include "dummy_icache.sv" +`include "icache.sv" +`include "dummy_dcache.sv" `include "ctrl.sv" `include "pipeline_defines.sv" `include "pipeline/1_decode/id.sv" @@ -39,7 +40,7 @@ module cpu_top ( input arready, // read back input [ 3:0] rid, - input [31:0] rdata, + input [127:0] rdata, input [ 1:0] rresp, input rlast, input rvalid, @@ -57,8 +58,8 @@ module cpu_top ( input awready, // write data output [ 3:0] wid, - output [31:0] wdata, - output [ 3:0] wstrb, + output [127:0] wdata, + output [ 15:0] wstrb, output wlast, output wvalid, input wready, @@ -92,49 +93,55 @@ module cpu_top ( assign rst = ~rst_n; // ICache <-> AXI Controller - logic axi_busy; - logic [`RegBus] axi_data; - logic [`RegBus] axi_addr; + logic icache_axi_rreq; + logic axi_icache_rdy, axi_icache_rvalid; + logic [127:0] axi_icache_data; // 128b + logic [`RegBus] icache_axi_addr; // MEM <-> AXI Controller - // TODO: replace with DCache - logic data_axi_we; - logic [`DataAddrBus] data_axi_addr; - logic [`RegBus] data_axi_data; - logic [`RegBus] axi_mem_data; - logic data_axi_busy; - logic [3:0] data_axi_sel; // Byte selection - - mem_axi_struct mem_axi_signal[2]; - - assign data_axi_we = mem_axi_signal[0].we | mem_axi_signal[1].we; - assign data_axi_addr = mem_axi_signal[0].ce ? mem_axi_signal[0].addr : mem_axi_signal[1].ce ? mem_axi_signal[1].addr : 32'b0; - assign data_axi_data = mem_axi_signal[0].ce ? mem_axi_signal[0].data : mem_axi_signal[1].ce ? mem_axi_signal[1].data : 32'b0; - assign data_axi_sel = mem_axi_signal[0].ce ? mem_axi_signal[0].sel : mem_axi_signal[1].ce ? mem_axi_signal[1].sel : 4'b0; + logic dcache_axi_rreq; // Read handshake + logic axi_dcache_rd_rdy; + logic axi_dcache_rvalid; + logic dcache_axi_wreq; // Write handshake + logic axi_dcache_wr_rdy; + logic [`DataAddrBus] dcache_axi_raddr; + logic [`DataAddrBus] dcache_axi_waddr; + logic [`DataAddrBus] dcache_axi_addr; + assign dcache_axi_addr = dcache_axi_rreq ? dcache_axi_raddr : dcache_axi_wreq ? dcache_axi_waddr : 0; + logic [127:0] axi_dcache_data; + logic [127:0] dcache_axi_data; + logic [`RegBus] cache_mem_data; + logic mem_data_ok,mem_addr_ok; + logic [15:0] dcache_axi_wstrb; // Byte selection axi_master u_axi_master ( .aclk (aclk), .aresetn(aresetn), // <-> ICache - .inst_cpu_addr_i(axi_addr), - .inst_cpu_ce_i(axi_addr != 0), // FIXME: ce should not be used as valid? - .inst_cpu_sel_i(4'b1111), - .inst_cpu_data_o(axi_data), - .inst_stallreq(axi_busy), - .inst_id(4'b0000), // Read Instruction only, TODO: move this from AXI to cache - - // <-> MEM Stage - .data_cpu_addr_i(data_axi_addr), - .data_cpu_ce_i(data_axi_addr != 0), // FIXME: ce should not be used as valid? - .data_cpu_we_i(data_axi_we), // FIXME: Write enable - .data_cpu_sel_i(data_axi_sel), - .data_cpu_data_o(axi_mem_data), - .data_stallreq(data_axi_busy), + .inst_cpu_addr_i(icache_axi_addr), + .inst_cpu_data_o(axi_icache_data), + .inst_id(4'b0000), // Read Instruction only + .icache_rd_type_i(3'b000), // Read 128b for 1 time + .icache_rd_req_i(icache_axi_rreq), + .icache_rd_rdy_o(axi_icache_rdy), + .icache_ret_valid_o(axi_icache_rvalid), + .icache_ret_last_o(), // Used in burst transfer, currently unused + + // <-> DCache + .data_cpu_addr_i(dcache_axi_addr), + .data_cpu_sel_i(dcache_axi_wstrb), + .data_cpu_data_o(axi_dcache_data), .data_id(4'b0001), + .dcache_rd_req_i(dcache_axi_rreq), .dcache_rd_type_i(3'b000), // For [31:0] + .dcache_rd_rdy_o(axi_dcache_rd_rdy), + .dcache_ret_valid_o(axi_dcache_rvalid), + .dcache_ret_last_o(), // same as ICache + .dcache_wr_req_i(dcache_axi_wreq), .dcache_wr_type_i(3'b000), - .dcache_wr_data({{96{1'b0}},data_axi_data}), + .dcache_wr_data(dcache_axi_data), + .dcache_wr_rdy(axi_dcache_wr_rdy), // External AXI signals @@ -176,42 +183,85 @@ module cpu_top ( .s_bready(bready) ); - // FETCH_WIDTH is 2 - localparam FETCH_WIDTH = 2; - // Frontend -> ICache - logic [`InstAddrBus] frontend_icache_addr[FETCH_WIDTH]; - - // ICache -> Frontend - logic icache_frontend_stallreq; - logic icache_frontend_valid[FETCH_WIDTH]; - logic [`InstAddrBus] icache_frontend_addr[FETCH_WIDTH]; - logic [`RegBus] icache_frontend_data[FETCH_WIDTH]; + mem_cache_struct mem_cache_signal[2]; + logic mem_cache_we,mem_cache_ce; + logic [3:0] mem_cache_sel; + logic [31:0] mem_cache_addr,mem_cache_data; + + assign mem_cache_ce = mem_cache_signal[0].ce | mem_cache_signal[1].ce; + assign mem_cache_we = mem_cache_signal[0].we | mem_cache_signal[1].we; + assign mem_cache_sel = mem_cache_signal[0].we ? mem_cache_signal[0].sel : mem_cache_signal[1].we ? mem_cache_signal[1].sel : 0; + assign mem_cache_addr = mem_cache_signal[0].addr | mem_cache_signal[1].addr; + assign mem_cache_data = mem_cache_signal[0].we ? mem_cache_signal[0].data : mem_cache_signal[1].we ? mem_cache_signal[1].data : 0; + + dummy_dcache u_dcache( + .clk (clk ), + .rst (rst ), + + .valid (mem_cache_ce), + .op (mem_cache_we), + .uncache (1'b0), + .index (mem_cache_addr[11:4]), + .tag (mem_cache_addr[31:12]), + .offset (mem_cache_addr[3:0]), + .wstrb (mem_cache_sel), + .wdata (mem_cache_data), + .addr_ok (mem_addr_ok), + .data_ok (mem_data_ok), + .rdata (cache_mem_data), + // <-> AXI Controller + .rd_req (dcache_axi_rreq), + .rd_type (), + .rd_addr (dcache_axi_raddr), + .rd_rdy (axi_dcache_rd_rdy), + .ret_valid (axi_dcache_rvalid), + .ret_last (), + .ret_data (axi_dcache_data), + .wr_req (dcache_axi_wreq), + .wr_type (), + .wr_addr (dcache_axi_waddr), + .wr_wstrb (dcache_axi_wstrb), + .wr_data (dcache_axi_data), + .wr_rdy (axi_dcache_wr_rdy) + ); + - dummy_icache #( - .ADDR_WIDTH(`RegWidth), - .DATA_WIDTH(`RegWidth) - ) u_dummy_icache ( - .clk(clk), - .rst(rst), + // FETCH_WIDTH is 4 + localparam FETCH_WIDTH = 4; - // <-> Frontend - .flush(backend_flush), - .raddr_1_i (frontend_icache_addr[0]), - .raddr_2_i (frontend_icache_addr[1]), - .stallreq_o(icache_frontend_stallreq), - .rvalid_1_o(icache_frontend_valid[0]), - .rvalid_2_o(icache_frontend_valid[1]), - .raddr_1_o (icache_frontend_addr[0]), - .raddr_2_o (icache_frontend_addr[1]), - .rdata_1_o (icache_frontend_data[0]), - .rdata_2_o (icache_frontend_data[1]), + // Frontend -> ICache + logic [1:0] frontend_icache_rreq; + logic [1:0][`InstAddrBus] frontend_icache_addr; - // <-> AXI Controller - .axi_addr_o(axi_addr), - .axi_data_i(axi_data), - .axi_busy_i(axi_busy) - ); + // ICache -> Frontend + logic [1:0]icache_frontend_valid; + logic [1:0][127:0] icache_frontend_data; // Cacheline is 128b + + icache u_icache( + .clk (clk ), + .rst (rst ), + + // Port A + .rreq_1_i (frontend_icache_rreq[0]), + .raddr_1_i (frontend_icache_addr[0]), + .rvalid_1_o (icache_frontend_valid[0]), + .rdata_1_o (icache_frontend_data[0]), + // Port B + .rreq_2_i (frontend_icache_rreq[1]), + .raddr_2_i (frontend_icache_addr[1]), + .rvalid_2_o (icache_frontend_valid[1]), + .rdata_2_o (icache_frontend_data[1]), + + // <-> AXI Controller + .axi_addr_o (icache_axi_addr), + .axi_rreq_o (icache_axi_rreq), + .axi_rdy_i (axi_icache_rdy), + .axi_rvalid_i (axi_icache_rvalid), + .axi_rlast_i (), + .axi_data_i (axi_icache_data) + ); + // Frontend <-> Instruction Buffer logic ib_frontend_stallreq; @@ -220,6 +270,7 @@ module cpu_top ( // Frontend <-> Backend logic backend_flush; + logic [1:0] is_last_in_block; // <- WB, suggest whether last instr in basic block is committed // All frontend structures frontend u_frontend ( @@ -228,15 +279,15 @@ module cpu_top ( // <-> ICache .icache_read_addr_o(frontend_icache_addr), // -> ICache - .icache_stallreq_i(icache_frontend_stallreq), // <- ICache, I$ cannot accept more addr requests + .icache_read_req_o(frontend_icache_rreq), .icache_read_valid_i(icache_frontend_valid), // <- ICache - .icache_read_addr_i(icache_frontend_addr), // <- ICache .icache_read_data_i(icache_frontend_data), // <- ICache // <-> Backend .branch_update_info_i(), // branch update signals, <- EXE Stage, unused .backend_next_pc_i (next_pc), // backend PC, <- pc_gen .backend_flush_i (backend_flush), // backend flush, usually come with next_pc + .backend_commit_i (is_last_in_block[0] | is_last_in_block[1]), // <-> Instruction Buffer .instr_buffer_stallreq_i(ib_frontend_stallreq), // instruction buffer is full @@ -503,7 +554,7 @@ module cpu_top ( assign csr_mem_signal = {csr_pg,csr_da,csr_dmw0,csr_dmw1,csr_plv,csr_datm}; //assign tlb_mem_signal = {data_tlb_found,data_tlb_index,data_tlb_v,data_tlb_d,data_tlb_mat,data_tlb_plv}; - logic wb_LLbit_we_i[2],wb_LLbit_value_i[2]; + logic wb_LLbit_we_i[2],wb_LLbit_value_i[2],data_fetch; generate for (genvar i = 0; i < 2; i++) begin : mem mem u_mem ( @@ -513,12 +564,14 @@ module cpu_top ( .signal_o(mem_signal_o[i]), - // -> AXI Controller - .signal_axi_o(mem_axi_signal[i]), + // -> cache + .signal_cache_o(mem_cache_signal[i]), // <- AXI Controller - .axi_busy_i(data_axi_busy), - .mem_data_i(axi_mem_data), + .addr_ok(mem_addr_ok), + .data_ok(mem_data_ok), + .data_fetch(data_fetch), + .mem_data_i(cache_mem_data), // -> Ctrl .stallreq(mem_stallreq[i]), @@ -529,20 +582,11 @@ module cpu_top ( .LLbit_we_o(mem_wb_LLbit_we[i]), .LLbit_value_o(mem_wb_LLbit_value[i]), - .csr_mem_signal(csr_mem_signal), - .disable_cache(1'b0), - // Data forward // -> Dispatch // -> EX - .mem_data_forward_o(mem_data_forward[i]), + .mem_data_forward_o(mem_data_forward[i]) - .data_addr_trans_en(mem_data_addr_trans_en[i]), - .dmw0_en(mem_data_dmw0_en[i]), - .dmw1_en(mem_data_dmw1_en[i]), - .cacop_op_mode_di(cacop_op_mode_di[i]), - - .tlb_mem_signal(tlb_mem_signal) ); end @@ -566,8 +610,22 @@ module cpu_top ( .flush(flush), + .csr_mem_signal(csr_mem_signal), + .disable_cache(1'b0), + + //<- tlb + .data_addr_trans_en(mem_data_addr_trans_en[i]), + .dmw0_en(mem_data_dmw0_en[i]), + .dmw1_en(mem_data_dmw1_en[i]), + .cacop_op_mode_di(cacop_op_mode_di[i]), + //-> tlb + .tlb_mem_signal(tlb_mem_signal), + //to ctrl - .wb_ctrl_signal(wb_ctrl_signal[i]) + .wb_ctrl_signal(wb_ctrl_signal[i]), + + // -> Frontend + .is_last_in_block(is_last_in_block[i]) ); end endgenerate @@ -716,6 +774,8 @@ module cpu_top ( assign data_addr_trans_en = mem_data_addr_trans_en[0] | mem_data_addr_trans_en[1]; assign tlb_data_i.dmw0_en = mem_data_dmw0_en[0] | mem_data_dmw0_en[1]; assign tlb_data_i.dmw1_en = mem_data_dmw1_en[0] | mem_data_dmw1_en[1]; + assign tlb_data_i.vaddr = mem_cache_addr; + assign tlb_data_i.fetch = data_fetch; inst_tlb_struct tlb_inst_i; tlb_inst_struct tlb_inst_o; diff --git a/src/vsrc/dcache.sv b/src/vsrc/dcache.sv new file mode 100644 index 0000000..c860741 --- /dev/null +++ b/src/vsrc/dcache.sv @@ -0,0 +1,360 @@ +`timescale 1ns / 1ps +////////////////////////////////////////////////////////////////////////////////// +// Company: +// Engineer: +// +// Create Date: 2022/04/21 17:24:48 +// Design Name: +// Module Name: dcache +// Project Name: +// Target Devices: +// Tool Versions: +// Description: +// +// Dependencies: +// +// Revision: +// Revision 0.01 - File Created +// Additional Comments: +// +////////////////////////////////////////////////////////////////////////////////// + +//cache共2组,每组4k。 +//V、D、Tag、Data=1+1+20+128=150 + + +module dcache ( + input logic clk, + input logic rst, + + //cache与CPU流水线的交互接 + input logic valid, //表明请求有效 + input logic op, // 1:write 0: read + input logic uncache, //标志uncache指令,高位有效 + input logic [7:0] index, // 地址的index域(addr[11:4]) + input logic [19:0] tag, //从TLB查到的pfn形成的tag + input logic [3:0] offset, //地址的offset域addr[3:0] + input logic [3:0] wstrb, //写字节使能信号 + input logic [31:0] wdata, //写数据 + output logic addr_ok, //该次请求的地址传输OK,读:地址被接收;写:地址和数据被接收 + output logic data_ok, //该次请求的数据传输Ok,读:数据返回;写:数据写入完成 + output logic [31:0] rdata, //读Cache的结果 + + //cache与AXI总线的交互接口 + output logic rd_req, //读请求有效信号。高电平有效 + output logic[3:0] rd_type, //读请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 + output logic [31:0] rd_addr, //读请求起始地址 + input logic rd_rdy, //读请求能否被接收的握手信号。高电平有效 + input logic ret_valid, //返回数据有效。高电平有效。 + input logic ret_last, //返回数据是一次读请求对应的最后一个返回数据 + input logic [31:0] ret_data, //读返回数据 + output logic wr_req, //写请求有效信号。高电平有效 + output logic[2:0] wr_type, //写请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 + output logic [31:0] wr_addr, //写请求起始地址 + output logic[3:0] wr_wstrb, //写操作的字节掩码。仅在写请求类型为:3'b000: 字节;3'b001: 半字;3'b010:字的情况下才有意义 + output logic [127:0] wr_data, //写数据 + input logic wr_rdy //写请求能否被接受的握手信号。具体见p2234. + + + //还需对类SRAM-AXI转接桥模块进行调整,随后确定实现 +); + + //主状态机包括五个状态, + //IDLE:Cache模块当前没有任何操作 + //LOOKUP:Cache模块当前正在执行一个操作并且得到了它的查询结果 + //MISS:Cache模块当前处理的操作Cache缺失,且正在等待AXI总线的wr_rdy信号 + //REPLACE:待替换的Cache行已经从Cache中读出,且正在等待AXI总线的rd_rdy信号 + //REFILL:Cache缺失的访存请求已发出,准备/正在将缺失的Cache行数据写入Cache中 + enum int { + IDLE, + LOOKUP, + MISS, + REPLACE, + REFILL, + WRITE + } + state, next_state, wr_state, wr_next_state; + //Write Buffer状态机包括两个状态 + //IDLE: Write Buffer状态机当前没有待写的数据 + //WRITE: 将待写的数据写入Cache中。在主状态机处于LOOKUP状态且发现Store操作命中Cache时,触发Write Buffer状态机进入Write状态 + //同时Write Buffer会寄存Store要写入的Index、路号、offset、写使能(写32位数据里的那些字节)和写数据。 + + + parameter V = 149; + parameter D = 148; + parameter TagMSB = 147; + parameter TagLSB = 128; + parameter BlockMSB = 127; + parameter BlockLSB = 0; + + logic [511:0][149:0] cache_data; + logic hit1; + logic hit2; + logic hit; + logic way; //若hit,则way无意义,若miss,则way表示分配的那一路 + logic write_op; //hit write 执行标志,高电平有效 + logic miss_way_r; //缺失路的写使能 + + //虚地址共32位,[31:12]为Tag,[11:4]为Cache组索引index, [3:0]:offset,Cache行内偏移 + logic [ 7:0] cpu_req_index; + logic [ 19:0] cpu_req_tag; + logic [ 3:0] cpu_req_offset; + + //wire cpu_req_uncache; + logic cpu_req_valid; + logic cpu_req_op; + logic [ 3:0] cpu_req_wstrb; + logic [ 31:0] cpu_req_wdata; + + logic cpu_rd_rdy; + logic cpu_wr_rdy; + logic cpu_ret_valid; + logic cpu_ret_last; + logic [ 31:0] cpu_ret_data; + + ////虚地址共32位,[31:12]为Tag,[11:4]为Cache组索引index, [3:0]:offset,Cache行内偏移 + //logic [7:0]cpu_req_index; + //logic [19:0]cpu_req_tag; + //logic [3:0]cpu_req_offset; + + ////logic cpu_req_uncache; + //logic cpu_req_valid; + //logic cpu_req_op; + //logic[3:0] cpu_req_wstrb; + //logic[31:0] cpu_req_wdata; + + //logic cpu_rd_rdy; + //logic cpu_wr_rdy; + //logic cpu_ret_valid; + //logic[1:0] cpu_ret_last; + //logic[31:0] cpu_ret_data; + + //hit write 冲突 高位有效 + logic hit_conflict = 0; + + assign cpu_req_valid = valid; + assign cpu_req_op = op; + assign cpu_req_uncache = uncache; + assign cpu_req_offset = offset; + assign cpu_req_index = index; + assign cpu_req_tag = tag; + assign cpu_req_wstrb = wstrb; + assign cpu_req_wdata = wdata; + assign cpu_rd_rdy = rd_rdy; + assign cpu_wr_rdy = wr_rdy; + assign cpu_ret_valid = ret_valid; + assign cpu_ret_last = ret_last; + assign cpu_ret_data = ret_data; + + + //读写访问Cache的执行过程 + //初始化cache + initial begin + for (integer i = 0; i < 512; i = i + 1) cache_data[i] = 0; + end + + + always_ff @(posedge clk) begin : state_ff + if (rst) begin + state <= IDLE; + wr_state <= IDLE; + end else begin + state <= next_state; + wr_state <= wr_next_state; + end + end + + //state change + always_comb begin + case (state) + IDLE: begin + if (!cpu_req_valid || (cpu_req_valid && hit_conflict)) next_state = IDLE; + else next_state = LOOKUP; + end + LOOKUP: begin + if ((hit && !cpu_req_valid) || (hit && (cpu_req_valid && hit_conflict))) begin + // Read hit or Write hit + next_state = IDLE; + end else if (hit && cpu_req_valid) begin + // Hit and have request + next_state = LOOKUP; + end else if (!hit) begin + // Anything miss enters miss state + next_state = MISS; + end + end + MISS: begin + if (cpu_wr_rdy == 0) next_state = MISS; + else if (cpu_wr_rdy == 1) next_state = REPLACE; + end + REPLACE: begin + if (cpu_rd_rdy == 0) next_state = REPLACE; + else next_state = REFILL; + end + REFILL: begin + if (cpu_ret_valid == 1 && cpu_ret_last == 1) next_state = IDLE; + else next_state = REFILL; + end + default: next_state = IDLE; + endcase + end + + logic wr_buffer; + //Write buffer state change + always_comb begin + case (wr_state) + IDLE: + if (hit && cpu_req_op && cpu_req_valid) begin + wr_next_state = WRITE; + end else begin + wr_next_state = IDLE; + end + WRITE: + if ((hit) && (cpu_req_op)) //若hit + wr_next_state = WRITE; + else wr_next_state = IDLE; + + default: wr_next_state = IDLE; + endcase + end + + + //Tag compare + //hit1 + always @(*) begin + if (state == LOOKUP) + if(cache_data[2*cpu_req_index][V]==1'b1&&cache_data[2*cpu_req_index][TagMSB:TagLSB] == cpu_req_tag)begin + hit1 = 1'b1; + if (cpu_req_op == 1) begin + if (index == cpu_req_index && tag == cpu_req_tag) begin + hit_conflict = 1; + end + end + end else hit1 = 1'b0; + else hit1 = 1'b0; + end + //hit2 + always @(*) begin + if (state == LOOKUP) + if(cache_data[2*cpu_req_index+1][V]==1'b1&&cache_data[2*cpu_req_index+1][TagMSB:TagLSB] == cpu_req_tag)begin + hit2 = 1'b1; + if (cpu_req_op == 1) begin + if (index == cpu_req_index && tag == cpu_req_tag) begin + hit_conflict = 1; + end + end + end else hit2 = 1'b0; + else hit2 = 1'b0; + end + + + //LOOKUP模块: Cache命中后的读写操作---Data Select + always @(posedge clk) begin + if (state == LOOKUP && hit) + if( op==1'b0) //read hit + begin + addr_ok <= 1'b1; + if (hit1) begin + rdata = cache_data[2*cpu_req_index][8*cpu_req_offset+:32]; + end else begin + rdata = cache_data[2*cpu_req_index+1][8*cpu_req_offset+:32]; + end + end + + else if(wr_state == WRITE && hit) //write hit + begin + addr_ok <= 1'b1; + data_ok <= 1'b1; + if (hit1) begin + cache_data[2*cpu_req_index][8*cpu_req_offset+:32] = wdata; + cache_data[2*cpu_req_index][D] = 1'b1; + end else begin + cache_data[2*cpu_req_index+1][8*cpu_req_offset+:32] = wdata; + cache_data[2*cpu_req_index+1][D] = 1'b1; + end + if (cpu_req_op == 0) begin + if (cpu_req_offset[3:2] == offset[3:2]) begin + hit_conflict = 1; + end + end + end + end + + //way LFSB --Miss Buffer + always @(*) begin + if (state == MISS) begin //未命中 + case ({ + cache_data[2*cpu_req_index][V], cache_data[2*cpu_req_index+1][V] + }) + 2'b01: way = 1'b0; //第0路可用 + 2'b10: way = 1'b1; //第1路可用 + 2'b00: way = 1'b0; //第0、1路均可用 + 2'b11: way = 1'b0; //第0、1路均不可用,默认替换第0路 + default: way = 1'b0; + endcase + miss_way_r = 1; + end + end + + logic [1:0] rt_offset; + //对AXI接口的写操作 + always @(*) begin + if (state == MISS) begin // 存储要写的数据还有地址等信息 + // if(cpu_req_op == 1)begin + // if(cache_data[2*cpu_req_index + way][D])begin + + // end + + // end + rd_addr = {cpu_req_tag[19:0], cpu_req_index[7:0], cpu_req_offset}; + rd_type = 3'b000; + // addr_ok = 1'b1; + // data_ok <= 1'b1; + end else if (state == REPLACE) begin + //将被替换行的Cache数据写入主存中 + if (wr_rdy) begin + if (cache_data[2*cpu_req_index+way][V:D] == 2'b11) begin + wr_req = 1'b1; + wr_addr = { + cache_data[2*cpu_req_index+way][TagMSB:TagLSB], cpu_req_index, 4'b0000 + }; + wr_wstrb = wstrb; + wr_data = cache_data[2*cpu_req_index+way][BlockMSB:BlockLSB]; + end + end else begin + wr_req = 1'b0; + end + rd_req = 1'b1; + end else begin + wr_req = 1'b0; + rd_req = 1'b0; + end + end + //Miss Buffer + always @(*) begin + if (state == REFILL) begin + if (cpu_req_op == 0) begin + cache_data[2*cpu_req_index+way][149:128] = {2'b10, cpu_req_tag}; + cache_data[2*cpu_req_index+way][rt_offset*32+:32] = ret_data; + if (ret_last) begin + // rt_offset = 0; + rd_req = 1'b0; + rdata = cache_data[2*cpu_req_index+way][cpu_req_index*8+:32]; + end + end + if (cpu_req_op == 1) begin + cache_data[2*cpu_req_index+way][149:128] = {2'b11, cpu_req_tag}; + cache_data[2*cpu_req_index+way][rt_offset*8+:32] = ret_data; + if (ret_last) begin + // rt_offset = 0; + cache_data[2*cpu_req_index+way][cpu_req_index*8+:32] = cpu_req_wdata; + end + end + // rt_offset = rt_offset + 1; + end + end + +endmodule + + + diff --git a/src/vsrc/dummy_dcache.sv b/src/vsrc/dummy_dcache.sv new file mode 100644 index 0000000..ddc5e43 --- /dev/null +++ b/src/vsrc/dummy_dcache.sv @@ -0,0 +1,174 @@ +module dummy_dcache ( + input logic clk, + input logic rst, + + //cache与CPU流水线的交互接 + input logic valid, //表明请求有效 + input logic op, // 1:write 0: read + input logic uncache, //标志uncache指令,高位有效 + input logic [7:0] index, // 地址的index域(addr[11:4]) + input logic [19:0] tag, //从TLB查到的pfn形成的tag + input logic [3:0] offset, //地址的offset域addr[3:0] + input logic [3:0] wstrb, //写字节使能信号 + input logic [31:0] wdata, //写数据 + output logic addr_ok, //该次请求的地址传输OK,读:地址被接收;写:地址和数据被接收 + output logic data_ok, //该次请求的数据传输Ok,读:数据返回;写:数据写入完成 + output logic [31:0] rdata, //读Cache的结果 + + //cache与AXI总线的交互接口 + output logic rd_req, //读请求有效信号。高电平有效 + output logic [2:0] rd_type, //读请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 + output logic [31:0] rd_addr, //读请求起始地址 + input logic rd_rdy, //读请求能否被接收的握手信号。高电平有效 + input logic ret_valid, //返回数据有效。高电平有效。 + input logic ret_last, //返回数据是一次读请求对应的最后一个返回数据 + input logic [127:0] ret_data, //读返回数据 + output logic wr_req, //写请求有效信号。高电平有效 + output logic [2:0] wr_type, //写请求类型:3'b000: 字节;3'b001: 半字;3'b010: 字;3'b100:Cache行 + output logic [31:0] wr_addr, //写请求起始地址 + output logic [15:0] wr_wstrb, //写操作的字节掩码。16bits for AXI128 + output logic [127:0] wr_data, //写数据 + input logic wr_rdy //写请求能否被接受的握手信号。具体见p2234. + + + //还需对类SRAM-AXI转接桥模块进行调整,随后确定实现 +); + + enum int { + IDLE, + READ_REQ, + READ_WAIT, + WRITE_REQ + } + state, next_state; + + always_ff @(posedge clk) begin + if (rst) state <= 0; + else state <= next_state; + end + + // State transition + always_comb begin + case (state) + IDLE: begin + if (valid) begin + if (op) next_state = WRITE_REQ; + else next_state = READ_REQ; + end else next_state = IDLE; + end + READ_REQ: begin + if (rd_rdy) next_state = READ_WAIT; // If AXI ready, send request + else next_state = READ_REQ; + end + READ_WAIT: begin + if (ret_valid) next_state = IDLE; // If return valid, back to IDLE + else next_state = READ_WAIT; + end + WRITE_REQ: begin + if (wr_rdy) + next_state = IDLE; // If AXI is ready, then write req is accept this cycle, back to IDLE + else next_state = WRITE_REQ; + end + default: begin + next_state = IDLE; + end + endcase + end + + logic [31:0] cpu_addr; + assign cpu_addr = {tag, index, offset}; + + logic rd_req_r; + logic [31:0] rd_addr_r; + + // Handshake with AXI + always_ff @(posedge clk) begin + case (state) + READ_REQ: begin + if (rd_rdy) begin + rd_req_r <= 1; + rd_addr_r <= {cpu_addr[31:4], 4'b0}; // Keep addr aligned + end + end + endcase + end + + //delay wr_rdy one cycle + logic reg_wr_rdy; + always_ff @(posedge clk)begin + if(rst) reg_wr_rdy<=0; + else reg_wr_rdy<=wr_rdy; + end + + assign rd_type = 3'b010; // word + assign wr_type = 3'b010; // word + always_comb begin + // Default signal + rd_addr = 0; + rd_req = 0; + wr_addr = 0; + wr_data = 0; + wr_req = 0; + wr_wstrb = 0; + + case (state) + READ_REQ: begin + if (rd_rdy) begin + rd_req = 1; + rd_addr = {cpu_addr[31:4], 4'b0}; // Keep addr aligned + end + end + READ_WAIT: begin + rd_req = rd_req_r; + rd_addr = rd_addr_r; + end + WRITE_REQ: begin + if (reg_wr_rdy) begin + wr_req = 1; + wr_addr = {cpu_addr[31:4], 4'b0}; // Keep addr aligned + case (cpu_addr[3:2]) + 2'b00: begin + wr_data = {{96{1'b0}}, wdata}; + wr_wstrb = {12'b0, wstrb}; + end + 2'b01: begin + wr_data = {{64{1'b0}}, wdata, {32{1'b0}}}; + wr_wstrb = {8'b0, wstrb, 4'b0}; + end + 2'b10: begin + wr_data = {32'b0, wdata, {64{1'b0}}}; + wr_wstrb = {4'b0, wstrb, 8'b0}; + end + 2'b11: begin + wr_data = {wdata, {96{1'b0}}}; + wr_wstrb = {wstrb, 12'b0}; + end + endcase + end + end + endcase + end + + // Handshake with CPU + always_comb begin + addr_ok = 0; + data_ok = 0; + rdata = 0; + case (state) + READ_WAIT: begin + if (ret_valid) begin + addr_ok = 1; + data_ok = 1; + rdata = ret_data[cpu_addr[3:2]*32+:32]; + end + end + WRITE_REQ: begin + if (reg_wr_rdy) begin + addr_ok = 1; + data_ok = 1; + end + end + endcase + end + +endmodule diff --git a/src/vsrc/dummy_icache.sv b/src/vsrc/dummy_icache.sv deleted file mode 100644 index 407cbab..0000000 --- a/src/vsrc/dummy_icache.sv +++ /dev/null @@ -1,165 +0,0 @@ -`include "defines.sv" - -/* dummy_icache -* hold output until AXI returns value -*/ -module dummy_icache #( - parameter ADDR_WIDTH = 32, - parameter DATA_WIDTH = 32 -) ( - input logic clk, - input logic rst, - - // <-> IF - // All signals are 1 cycle valid - input flush, - // all 0 means invalid - input logic [ADDR_WIDTH-1:0] raddr_1_i, - input logic [ADDR_WIDTH-1:0] raddr_2_i, - // Require IF stage not to send more instr addr - // stallreq is pull up the next clk when queue is full - // and pull down then next clk when queue can accept addr - output logic stallreq_o, - // rvalid is 1 when output is valid - output logic rvalid_1_o, - output logic rvalid_2_o, - // Must return the addr as well - output logic [ADDR_WIDTH-1:0] raddr_1_o, - output logic [ADDR_WIDTH-1:0] raddr_2_o, - output logic [DATA_WIDTH-1:0] rdata_1_o, - output logic [DATA_WIDTH-1:0] rdata_2_o, - - // <-> AXI Controller - output logic [ADDR_WIDTH-1:0] axi_addr_o, - - // Assume busy is pull down the same cycle when data is ready - input logic [DATA_WIDTH-1:0] axi_data_i, - input logic axi_busy_i -); - - // Reset signal - logic rst_n; - assign rst_n = ~rst; - - logic [ADDR_WIDTH-1:0] raddrs[2]; // Accept two addr - - // States - enum int unsigned { - ACCEPT_ADDR = 0, - IN_TRANSACTION_1 = 1, - IN_TRANSACTION_2 = 2 - } - state, next_state; - - always_ff @(posedge clk or negedge rst_n) begin : state_ff - if (!rst_n || flush) begin - state <= ACCEPT_ADDR; - end else begin - state <= next_state; - end - end - - always_comb begin : transition_comb - case (state) - ACCEPT_ADDR: begin - if ((raddr_1_i != 0 || raddr_2_i != 0) & ~axi_busy_i) begin - next_state = IN_TRANSACTION_1; - end else begin - next_state = ACCEPT_ADDR; - end - end - IN_TRANSACTION_1: begin - if (axi_busy_i == 0) begin - next_state = IN_TRANSACTION_2; - end else begin - next_state = IN_TRANSACTION_1; - end - end - IN_TRANSACTION_2: begin - if (axi_busy_i == 0) begin - next_state = ACCEPT_ADDR; - end else begin - next_state = IN_TRANSACTION_2; - end - end - default: begin - next_state = ACCEPT_ADDR; - end - endcase - end - - always_ff @(posedge clk or negedge rst_n) begin : raddrs_ff - if (!rst_n) begin - raddrs[0] <= 0; - raddrs[1] <= 0; - end else begin - case (state) - ACCEPT_ADDR: begin - raddrs[0] <= raddr_1_i; - raddrs[1] <= raddr_2_i; - end - IN_TRANSACTION_1, IN_TRANSACTION_2: begin - // Do nothing - end - endcase - end - end - - assign stallreq_o = ~(state == ACCEPT_ADDR) | axi_busy_i; - - always_ff @(posedge clk or negedge rst_n) begin : axi_ff - if (!rst_n) begin - axi_addr_o <= 0; - end else begin - case (state) - ACCEPT_ADDR: begin - if (raddr_1_i != 0 && axi_busy_i == 0) axi_addr_o <= raddr_1_i; - end - IN_TRANSACTION_1: begin - if (raddrs[1] != 0 && axi_busy_i == 0) axi_addr_o <= raddrs[1]; - end - IN_TRANSACTION_2: begin - if (next_state == ACCEPT_ADDR) begin - axi_addr_o <= 0; - end - end - default: begin - axi_addr_o <= 0; - end - endcase - end - end - - // Output logic - always_ff @(posedge clk or negedge rst_n) begin : output_ff - if (!rst_n || flush) begin - rvalid_1_o <= 0; - rvalid_2_o <= 0; - raddr_1_o <= 0; - raddr_2_o <= 0; - rdata_1_o <= 0; - rdata_2_o <= 0; - end else begin - rvalid_1_o <= 0; - rvalid_2_o <= 0; - raddr_1_o <= 0; - raddr_2_o <= 0; - rdata_1_o <= 0; - rdata_2_o <= 0; - case (state) - ACCEPT_ADDR: begin - end - IN_TRANSACTION_1: begin - rvalid_1_o <= ~axi_busy_i; - raddr_1_o <= axi_busy_i ? 0 : raddrs[0]; - rdata_1_o <= axi_busy_i ? 0 : axi_data_i; - end - IN_TRANSACTION_2: begin - rvalid_2_o <= ~axi_busy_i; - raddr_2_o <= axi_busy_i ? 0 : raddrs[1]; - rdata_2_o <= axi_busy_i ? 0 : axi_data_i; - end - endcase - end - end -endmodule diff --git a/src/vsrc/frontend/frontend.sv b/src/vsrc/frontend/frontend.sv index 297389d..61ed5f5 100644 --- a/src/vsrc/frontend/frontend.sv +++ b/src/vsrc/frontend/frontend.sv @@ -1,25 +1,32 @@ +`include "defines.sv" `include "instr_info.sv" -`include "pipeline_defines.sv" +`include "frontend/frontend_defines.sv" + +`include "frontend/ftq.sv" +`include "frontend/ifu.sv" + module frontend #( - parameter FETCH_WIDTH = 2, - parameter ADDR_WIDTH = 32, - parameter DATA_WIDTH = 32 + parameter FETCH_WIDTH = 4, + parameter ADDR_WIDTH = 32, + parameter DATA_WIDTH = 32, + parameter CACHELINE_WIDTH = 128 ) ( input logic clk, input logic rst, // <-> ICache - output logic [ADDR_WIDTH-1:0] icache_read_addr_o[FETCH_WIDTH], - input logic icache_stallreq_i, // ICache cannot accept more addr input - input logic icache_read_valid_i[FETCH_WIDTH], - input logic [ADDR_WIDTH-1:0] icache_read_addr_i[FETCH_WIDTH], - input logic [DATA_WIDTH-1:0] icache_read_data_i[FETCH_WIDTH], + // ICache is fixed dual port + output logic [1:0] icache_read_req_o, + output logic [1:0][ADDR_WIDTH-1:0] icache_read_addr_o, + input logic [1:0] icache_read_valid_i, + input logic [1:0][CACHELINE_WIDTH-1:0] icache_read_data_i, // <-> Backend input branch_update_info_t branch_update_info_i, input logic [ADDR_WIDTH-1:0] backend_next_pc_i, input logic backend_flush_i, + input logic backend_commit_i, // <-> Instruction buffer input logic instr_buffer_stallreq_i, @@ -28,10 +35,10 @@ module frontend #( // <- CSR input logic csr_pg, input logic csr_da, - input logic [31:0]csr_dmw0, - input logic [31:0]csr_dmw1, - input logic [1:0]csr_plv, - input logic [1:0]csr_datf, + input logic [31:0] csr_dmw0, + input logic [31:0] csr_dmw1, + input logic [1:0] csr_plv, + input logic [1:0] csr_datf, input logic disable_cache, // <-> TLB @@ -43,7 +50,7 @@ module frontend #( input logic inst_tlb_v, input logic inst_tlb_d, input logic [1:0] inst_tlb_mat, - input logic [1:0] inst_tlb_plv + input logic [1:0] inst_tlb_plv ); @@ -52,16 +59,16 @@ module frontend #( assign rst_n = ~rst; //addr trans TODO:修改dmw的赋值(还不确定双发射情况下pc的赋值方式) - assign inst_addr = pc; - assign inst_addr_trans_en = csr_pg && !csr_da && !dmw0_en && !dmw1_en; - assign dmw0_en = ((csr_dmw0[`PLV0] && csr_plv == 2'd0) || (csr_dmw0[`PLV3] && csr_plv == 2'd3)) && (pc[31:29] == csr_dmw0[`VSEG]); - assign dmw1_en = ((csr_dmw1[`PLV0] && csr_plv == 2'd0) || (csr_dmw1[`PLV3] && csr_plv == 2'd3)) && (pc[31:29] == csr_dmw1[`VSEG]); + // assign inst_addr = pc; + // assign inst_addr_trans_en = csr_pg && !csr_da && !dmw0_en && !dmw1_en; + // assign dmw0_en = ((csr_dmw0[`PLV0] && csr_plv == 2'd0) || (csr_dmw0[`PLV3] && csr_plv == 2'd3)) && (pc[31:29] == csr_dmw0[`VSEG]); + // assign dmw1_en = ((csr_dmw1[`PLV0] && csr_plv == 2'd0) || (csr_dmw1[`PLV3] && csr_plv == 2'd3)) && (pc[31:29] == csr_dmw1[`VSEG]); //excp - logic excp_tlbr,excp_pif,excp_ppi,excp_adef; + logic excp_tlbr, excp_pif, excp_ppi, excp_adef; assign excp_tlbr = !inst_tlb_found && inst_addr_trans_en; - assign excp_pif = !inst_tlb_v && inst_addr_trans_en; - assign excp_ppi = (csr_plv > inst_tlb_plv) && inst_addr_trans_en; + assign excp_pif = !inst_tlb_v && inst_addr_trans_en; + assign excp_ppi = (csr_plv > inst_tlb_plv) && inst_addr_trans_en; assign excp_adef = (pc[0] || pc[1]) | (pc[31] && (csr_plv == 2'd3) && inst_addr_trans_en); assign instr_buffer_o[0].excp = excp_tlbr | excp_pif | excp_ppi | excp_adef; @@ -80,72 +87,78 @@ module frontend #( end end + logic ftq_full; + always_comb begin : next_pc_comb if (backend_flush_i) begin next_pc = backend_next_pc_i; - end else if (instr_buffer_stallreq_i) begin - next_pc = pc; - end else if (icache_stallreq_i) begin + end else if (ftq_full) begin next_pc = pc; end else begin - next_pc = pc + 8; - end - end - - // ICache read_addr_o - always_comb begin : icache_read_addr_o_comb - for (integer i = 0; i < FETCH_WIDTH; i++) begin - icache_read_addr_o[i] = pc + i * 4; + next_pc = pc + FETCH_WIDTH * 4; end end - typedef struct packed { - bit valid; - bit [ADDR_WIDTH-1:0] pc; - bit [DATA_WIDTH-1:0] instr; - } icache_resp_t; - icache_resp_t icache_resp_buffer[FETCH_WIDTH]; - always_ff @(posedge clk or negedge rst_n) begin : icache_resp_buffer_ff - if (!rst_n || icache_resp_ready) begin - for (integer i = 0; i < FETCH_WIDTH; i++) begin - icache_resp_buffer[i] <= 0; - end + // BPU + bpu_ftq_t bpu_ftq_block; + always_comb begin + if (~ftq_full) begin + bpu_ftq_block.start_pc = pc; + bpu_ftq_block.valid = 1; + bpu_ftq_block.length = 4; + bpu_ftq_block.is_cross_cacheline = (pc[3:2] != 2'b00); end else begin - for (integer i = 0; i < FETCH_WIDTH; i++) begin - if (icache_read_valid_i[i]) begin - icache_resp_buffer[i].valid <= 1; - icache_resp_buffer[i].pc <= icache_read_addr_i[i]; - icache_resp_buffer[i].instr <= icache_read_data_i[i]; - end - end - end - end - logic icache_resp_ready; // 1 if all the instr in icache_resp_buffer is valid - always_comb begin : icache_resp_ready_comb - icache_resp_ready = 1; - for (integer i = 0; i < FETCH_WIDTH; i++) begin - icache_resp_ready = icache_resp_ready & icache_resp_buffer[i].valid; + bpu_ftq_block = 0; end end - always_ff @(posedge clk or negedge rst_n) begin : instr_buffer_o_ff - if (!rst_n || backend_flush_i) begin - for (integer i = 0; i < FETCH_WIDTH; i++) begin - instr_buffer_o[i] <= 0; - end - end else begin - // Keep 0 for most of the time - for (integer i = 0; i < FETCH_WIDTH; i++) begin - instr_buffer_o[i] <= 0; - end - if (icache_resp_ready && !instr_buffer_stallreq_i) begin - for (integer i = 0; i < FETCH_WIDTH; i++) begin - instr_buffer_o[i].valid <= 1; - instr_buffer_o[i].pc <= icache_resp_buffer[i].pc; - instr_buffer_o[i].instr <= icache_resp_buffer[i].instr; - end - end - end - end + ftq_ifu_t ftq_ifu_block; + logic ifu_ftq_accept; + + ftq u_ftq ( + .clk(clk), + .rst(rst), + + // Flush + .backend_flush_i(backend_flush_i), + + // <-> Frontend + .instr_buffer_stallreq_i(instr_buffer_stallreq_i), + + // <-> BPU + .bpu_i (bpu_ftq_block), + .bpu_queue_full_o(ftq_full), + + // <-> Backend + .backend_commit_i(backend_commit_i), + + // <-> IFU + .ifu_o (ftq_ifu_block), + .ifu_accept_i(ifu_ftq_accept) + ); + + + instr_buffer_info_t ifu_instr_output[FETCH_WIDTH]; + assign instr_buffer_o = instr_buffer_stallreq_i ? '{FETCH_WIDTH{0}} : ifu_instr_output; + ifu u_ifu ( + .clk(clk), + .rst(rst), + + // Flush + .flush_i(backend_flush_i), + + .ftq_i (ftq_ifu_block), + .ftq_accept_o (ifu_ftq_accept), + .icache_rreq_o (icache_read_req_o), + .icache_raddr_o (icache_read_addr_o), + .icache_rvalid_i(icache_read_valid_i), + .icache_rdata_i (icache_read_data_i), + .stallreq_i (instr_buffer_stallreq_i), + + // <-> Frontend + .instr_buffer_o(ifu_instr_output) + ); + + endmodule diff --git a/src/vsrc/frontend/frontend_defines.sv b/src/vsrc/frontend/frontend_defines.sv new file mode 100644 index 0000000..e752019 --- /dev/null +++ b/src/vsrc/frontend/frontend_defines.sv @@ -0,0 +1,31 @@ +`ifndef FRONTEND_DEFINES_SV +`define FRONTEND_DEFINES_SV +`include "defines.sv" + +`define FETCH_WIDTH 4 + +typedef struct packed { + logic valid; + logic [`InstAddrBus] start_pc; + logic is_cross_cacheline; + logic [$clog2(`FETCH_WIDTH+1)-1:0] length; + + // TODO: add BPU meta +} bpu_ftq_t; + +typedef struct packed { + logic valid; + logic [`InstAddrBus] start_pc; + logic is_cross_cacheline; + logic [$clog2(`FETCH_WIDTH+1)-1:0] length; +} ftq_block_t; + +// FTQ <-> IFU +typedef struct packed { + logic valid; + logic [`InstAddrBus] start_pc; + logic is_cross_cacheline; + logic [$clog2(`FETCH_WIDTH+1)-1:0] length; +} ftq_ifu_t; + +`endif diff --git a/src/vsrc/frontend/ftq.sv b/src/vsrc/frontend/ftq.sv new file mode 100644 index 0000000..2311627 --- /dev/null +++ b/src/vsrc/frontend/ftq.sv @@ -0,0 +1,97 @@ +`include "defines.sv" +`include "frontend/frontend_defines.sv" + +module ftq #( + parameter FETCH_WIDTH = 4, + parameter QUEUE_SIZE = 4 +) ( + input logic clk, + input logic rst, + + // <-> Frontend + input logic backend_flush_i, + input logic instr_buffer_stallreq_i, + + // <-> BPU + input bpu_ftq_t bpu_i, + output logic bpu_queue_full_o, + + // <-> Backend + input logic backend_commit_i, + + // <-> IFU + output ftq_ifu_t ifu_o, + input logic ifu_accept_i // Must return in the same cycle +); + + // QUEUE data structure + ftq_block_t [QUEUE_SIZE-1:0] FTQ, next_FTQ; + always_ff @(posedge clk) begin + if (rst) begin + FTQ <= 0; + end else begin + FTQ <= next_FTQ; + end + end + + // DEBUG signal + logic [`InstAddrBus] debug_queue_pc[QUEUE_SIZE]; + always_comb begin + for (integer i = 0; i < QUEUE_SIZE; i++) begin + debug_queue_pc[i] = FTQ[i].start_pc; + end + end + + // PTR + logic [$clog2(QUEUE_SIZE)-1:0] bpu_ptr, ifu_ptr, comm_ptr; + always_ff @(posedge clk) begin : ptr_ff + if (rst) begin + bpu_ptr <= 0; + ifu_ptr <= 0; + comm_ptr <= 0; + end else begin + // Backend committed, means that current comm_ptr block is done + if (backend_commit_i) comm_ptr <= comm_ptr + 1; + + // If block is accepted by IFU, ifu_ptr++ + // IB full should result in IFU not accepting FTQ input + if (ifu_accept_i) ifu_ptr <= ifu_ptr + 1; + + // BPU ptr + if (bpu_i.valid) bpu_ptr <= bpu_ptr + 1; + + // If backend redirect triggered, back to comm_ptr + 1 + // Since FTQ is cleared out, so not pending block + if (backend_flush_i) begin + ifu_ptr <= comm_ptr + 1; + bpu_ptr <= comm_ptr + 1; + end + end + end + + // next_FTQ + always_comb begin : next_FTQ_comb + // Default no change + next_FTQ = FTQ; + // clear out if committed + if (backend_commit_i) next_FTQ[comm_ptr] = 0; + // Accept BPU input + if (bpu_i.valid) next_FTQ[bpu_ptr] = bpu_i; + // If backend redirect triggered, clear FTQ + if (backend_flush_i) next_FTQ = 0; + end + + // Output + // -> IFU + assign ifu_o.valid = FTQ[ifu_ptr].valid; + assign ifu_o.is_cross_cacheline = FTQ[ifu_ptr].is_cross_cacheline; + assign ifu_o.start_pc = FTQ[ifu_ptr].start_pc; + assign ifu_o.length = FTQ[ifu_ptr].length; + + // -> BPU + logic [$clog2(QUEUE_SIZE)-1:0] bpu_ptr_plus1; // Limit the bit width + assign bpu_ptr_plus1 = bpu_ptr + 1; + assign bpu_queue_full_o = (bpu_ptr_plus1 == comm_ptr); + + +endmodule diff --git a/src/vsrc/frontend/ifu.sv b/src/vsrc/frontend/ifu.sv new file mode 100644 index 0000000..bbae041 --- /dev/null +++ b/src/vsrc/frontend/ifu.sv @@ -0,0 +1,169 @@ +`include "frontend/frontend_defines.sv" +`include "instr_info.sv" + + +module ifu #( + parameter FETCH_WIDTH = 4, + parameter ADDR_WIDTH = 32, + parameter DATA_WIDTH = 32, + parameter CACHELINE_WIDTH = 128 // FETCH_WIDTH and CACHELINE_WIDTH must match +) ( + input logic clk, + input logic rst, + + // Flush + input flush_i, + + // <-> Fetch Target Queue + input ftq_ifu_t ftq_i, + output logic ftq_accept_o, // In current cycle + + + // <-> Frontend <-> ICache + output logic [1:0] icache_rreq_o, + output logic [1:0][ADDR_WIDTH-1:0] icache_raddr_o, + input logic [1:0] icache_rvalid_i, + input logic [1:0][CACHELINE_WIDTH-1:0] icache_rdata_i, + + + // <-> Frontend <-> Instruction Buffer + input logic stallreq_i, + output instr_buffer_info_t instr_buffer_o[FETCH_WIDTH] +); + ///////////////////////////////////////////////////////////////////////////////// + // P0, send read req to ICache + ///////////////////////////////////////////////////////////////////////////////// + logic p0_send_rreq; + // Condition when to send rreq to ICache, see doc for detail + assign p0_send_rreq = ftq_i.valid & ~is_flushing & ~stallreq_i & ~p1_stallreq; + assign ftq_accept_o = p0_send_rreq; // FTQ handshake, same cycle as ftq_i + // Send read req to ICache + always_comb begin + if (p0_send_rreq) begin + // Send rreq to ICache if FTQ input is valid and not in flushing state + icache_rreq_o[0] = 1; + icache_rreq_o[1] = ftq_i.is_cross_cacheline ? 1 : 0; + icache_raddr_o[0] = {ftq_i.start_pc[ADDR_WIDTH-1:4], 4'b0}; + icache_raddr_o[1] = ftq_i.is_cross_cacheline ? {ftq_i.start_pc[ADDR_WIDTH-1:4], 4'b0} + 16 : 0; // TODO: remove magic number + end else begin + icache_rreq_o = 0; + icache_raddr_o = 0; + end + end + + ///////////////////////////////////////////////////////////////////////////////// + // P1 + ///////////////////////////////////////////////////////////////////////////////// + // Flush state + logic is_flushing_r, is_flushing; + assign is_flushing = is_flushing_r | flush_i; + always_ff @(posedge clk) begin : is_flushing_ff + if (rst) begin + is_flushing_r <= 0; + end else if (flush_i & p1_read_transaction.valid & ~p1_read_done) begin + // Enter a flusing state if flush_i and read transaction on-the-fly + is_flushing_r <= 1; + end else if (p1_read_done) begin + // Reset when read transaction is done + is_flushing_r <= 0; + end + end + + // P1 data structure + typedef struct packed { + logic valid; + logic [`InstAddrBus] start_pc; + logic is_cross_cacheline; + logic [$clog2(`FETCH_WIDTH+1)-1:0] length; + logic [1:0] icache_rvalid_r; + logic [1:0][CACHELINE_WIDTH-1:0] icache_rdata_r; + } read_transaction_t; + read_transaction_t p1_read_transaction; + + logic p1_read_done; // Read done is same cycle as ICache return valid + assign p1_read_done = p1_read_transaction.is_cross_cacheline ? + (icache_rvalid_i[0] | p1_read_transaction.icache_rvalid_r[0]) & (icache_rvalid_i[1]| p1_read_transaction.icache_rvalid_r[1]) : + (icache_rvalid_i[0] | p1_read_transaction.icache_rvalid_r[0]); + logic p1_stallreq; // Currently in transaction and not done yet + assign p1_stallreq = p1_read_transaction.valid & ~p1_read_done; + always_ff @(posedge clk) begin : p1_ff + if (rst) begin + p1_read_transaction <= 0; + end else if (p0_send_rreq) begin + // If P0 sent rreq to ICache, move info from P0 to P1 + p1_read_transaction.valid <= 1; + p1_read_transaction.start_pc <= ftq_i.start_pc; + p1_read_transaction.is_cross_cacheline <= ftq_i.is_cross_cacheline; + p1_read_transaction.length <= ftq_i.length; + p1_read_transaction.icache_rvalid_r <= 0; + p1_read_transaction.icache_rdata_r <= 0; + end else if (p1_read_done & ~stallreq_i) begin + // Reset if done and not stalling + p1_read_transaction <= 0; + end else begin + // Store rvalid in P1 data structure + // This is required since ICache do not guarantee rvalid of the two ports is returned in the same cycle + if (icache_rvalid_i[0]) begin + p1_read_transaction.icache_rvalid_r[0] <= 1; + p1_read_transaction.icache_rdata_r[0] <= icache_rdata_i[0]; + end + if (icache_rvalid_i[1]) begin + p1_read_transaction.icache_rvalid_r[1] <= 1; + p1_read_transaction.icache_rdata_r[1] <= icache_rdata_i[1]; + end + end + end + + logic [FETCH_WIDTH*2-1:0][DATA_WIDTH-1:0] cacheline_combined; // Same cycle as ICache return, used in P2 + assign cacheline_combined = { + icache_rvalid_i[1] ? icache_rdata_i[1] : p1_read_transaction.icache_rdata_r[1], + icache_rvalid_i[0] ? icache_rdata_i[0] : p1_read_transaction.icache_rdata_r[0] + }; + + // P1 debug, for observability + logic [ADDR_WIDTH-1:0] debug_p1_pc = p1_read_transaction.start_pc; // DEBUG + logic [ADDR_WIDTH-1:0] debug_p0_pc = ftq_i.start_pc; // DEBUG + logic [1:0] debug_p1_rvalid_r = p1_read_transaction.icache_rvalid_r; + + + ///////////////////////////////////////////////////////////////////////////////// + // P2, send instr info to IB + ///////////////////////////////////////////////////////////////////////////////// + always_ff @(posedge clk) begin : p2_ff + if (rst) begin + for (integer i = 0; i < FETCH_WIDTH; i++) begin + instr_buffer_o[i] <= 0; + end + end else if (stallreq_i) begin + // Hold output + end else if (p1_read_done & ~is_flushing) begin + // If p1 read done, pass data to IB + // However, if p1 read done comes from flushing, do not pass down to IB + for (integer i = 0; i < FETCH_WIDTH; i++) begin + // Default + instr_buffer_o[i].is_last_in_block <= 0; + + if (i < p1_read_transaction.length) begin + if (i == p1_read_transaction.length - 1) begin + instr_buffer_o[i].valid <= 1; + instr_buffer_o[i].is_last_in_block <= 1; // Mark the instruction as last in block, used when commit + instr_buffer_o[i].pc <= p1_read_transaction.start_pc + i * 4; // Instr is 4 bytes long + instr_buffer_o[i].instr <= cacheline_combined[p1_read_transaction.start_pc[3:2]+i]; + end else begin + instr_buffer_o[i].valid <= 1; + instr_buffer_o[i].pc <= p1_read_transaction.start_pc + i * 4; // Instr is 4 bytes long + instr_buffer_o[i].instr <= cacheline_combined[p1_read_transaction.start_pc[3:2]+i]; + end + end else begin + instr_buffer_o[i] <= 0; + end + end + end else begin + // Otherwise keep 0 + for (integer i = 0; i < FETCH_WIDTH; i++) begin + instr_buffer_o[i] <= 0; + end + end + end + +endmodule diff --git a/src/vsrc/icache.sv b/src/vsrc/icache.sv index f72e406..714b096 100644 --- a/src/vsrc/icache.sv +++ b/src/vsrc/icache.sv @@ -1,12 +1,12 @@ `include "utils/bram.sv" +`include "utils/lfsr.sv" module icache #( parameter NSET = 256, parameter NWAY = 2, parameter CACHELINE_WIDTH = 128, - parameter ADDR_WIDTH = 32, - parameter DATA_WIDTH = 32 + parameter ADDR_WIDTH = 32 ) ( input logic clk, input logic rst, @@ -15,21 +15,26 @@ module icache #( input logic rreq_1_i, input logic [ADDR_WIDTH-1:0] raddr_1_i, output logic rvalid_1_o, - output logic [DATA_WIDTH-1:0] rdata_1_o, + output logic [CACHELINE_WIDTH-1:0] rdata_1_o, // Read port 2 input logic rreq_2_i, input logic [ADDR_WIDTH-1:0] raddr_2_i, output logic rvalid_2_o, - output logic [DATA_WIDTH-1:0] rdata_2_o, + output logic [CACHELINE_WIDTH-1:0] rdata_2_o, // <-> AXI Controller output logic [ADDR_WIDTH-1:0] axi_addr_o, output logic axi_rreq_o, - input logic axi_busy_i, // High effective - input logic [DATA_WIDTH-1:0] axi_data_i + input logic axi_rdy_i, + input logic axi_rvalid_i, + input logic [1:0] axi_rlast_i, + input logic [CACHELINE_WIDTH-1:0] axi_data_i ); + ///////////////////////////////////////////////// + // PO, query BRAM + //////////////////////////////////////////////// logic [NWAY-1:0][1:0][CACHELINE_WIDTH-1:0] data_bram_rdata; logic [NWAY-1:0][1:0][CACHELINE_WIDTH-1:0] data_bram_wdata; @@ -46,6 +51,22 @@ module icache #( generate for (genvar i = 0; i < NWAY; i++) begin : tag_bram + +`ifdef BRAM_IP + bram_icache_tag_ram u_bram ( + .clka (clk), + .clkb (clk), + .wea (tag_bram_we[i][0]), + .web (tag_bram_we[i][1]), + .dina (tag_bram_wdata[i][0]), + .addra(tag_bram_addr[i][0]), + .douta(tag_bram_rdata[i][0]), + .dinb (tag_bram_wdata[i][1]), + .addrb(tag_bram_addr[i][1]), + .doutb(tag_bram_rdata[i][1]) + ); +`else + bram #( .DATA_WIDTH (TAG_BRAM_WIDTH), .DATA_DEPTH_EXP2(10) @@ -60,10 +81,25 @@ module icache #( .addrb(tag_bram_addr[i][1]), .doutb(tag_bram_rdata[i][1]) ); +`endif end endgenerate generate for (genvar i = 0; i < NWAY; i++) begin : data_bram +`ifdef BRAM_IP + bram_icache_data_ram u_bram ( + .clka (clk), + .clkb (clk), + .wea (data_bram_we[i][0]), + .web (data_bram_we[i][1]), + .dina (data_bram_wdata[i][0]), + .addra(data_bram_addr[i][0]), + .douta(data_bram_rdata[i][0]), + .dinb (data_bram_wdata[i][1]), + .addrb(data_bram_addr[i][1]), + .doutb(data_bram_rdata[i][1]) + ); +`else bram #( .DATA_WIDTH (128), .DATA_DEPTH_EXP2(10) @@ -78,22 +114,29 @@ module icache #( .addrb(data_bram_addr[i][1]), .doutb(data_bram_rdata[i][1]) ); +`endif end endgenerate - // Cache addr - always_comb begin : cache_addr_gen + // BRAM index gen + always_comb begin : bram_addr_gen for (integer i = 0; i < NWAY; i++) begin - if (rreq_1_i) begin + if (miss_1 | (state == REFILL_1_WAIT & ~rvalid_1)) begin + tag_bram_addr[i][0] = p1_raddr_1[11:4]; + data_bram_addr[i][0] = p1_raddr_1[11:4]; + end else if (rreq_1_i) begin tag_bram_addr[i][0] = raddr_1_i[11:4]; data_bram_addr[i][0] = raddr_1_i[11:4]; - end else begin // TODO: write + end else begin tag_bram_addr[i][0] = 0; data_bram_addr[i][0] = 0; end end for (integer i = 0; i < NWAY; i++) begin - if (rreq_2_i) begin + if (miss_2 | (state == REFILL_2_WAIT & ~rvalid_2)) begin + tag_bram_addr[i][1] = p1_raddr_2[11:4]; + data_bram_addr[i][1] = p1_raddr_2[11:4]; + end else if (rreq_2_i) begin tag_bram_addr[i][1] = raddr_2_i[11:4]; data_bram_addr[i][1] = raddr_2_i[11:4]; end else begin @@ -103,48 +146,184 @@ module icache #( end end + + + //////////////////////////////////////////////////// + // P1, output gen + /////////////////////////////////////////////////// + + // Input reg + logic p1_rreq_1, p1_rreq_2; + logic [ADDR_WIDTH-1:0] p1_raddr_1, p1_raddr_2; + always_ff @(posedge clk) begin + if (rvalid_1_o | ~p1_rreq_1) begin + p1_rreq_1 <= rreq_1_i; + p1_raddr_1 <= raddr_1_i; + end + if (rvalid_2_o | ~p1_rreq_2) begin + p1_rreq_2 <= rreq_2_i; + p1_raddr_2 <= raddr_2_i; + end + end + + logic [NWAY-1:0][1:0] tag_hit; always_comb begin for (integer i = 0; i < NWAY; i++) begin - tag_hit[i][0] = tag_bram_rdata[i][0][19:0] == raddr_1_i[ADDR_WIDTH-1:ADDR_WIDTH-20]; - tag_hit[i][1] = tag_bram_rdata[i][1][19:0] == raddr_2_i[ADDR_WIDTH-1:ADDR_WIDTH-20]; + tag_hit[i][0] = tag_bram_rdata[i][0][19:0] == p1_raddr_1[ADDR_WIDTH-1:ADDR_WIDTH-20] && tag_bram_rdata[i][0][20]; + tag_hit[i][1] = tag_bram_rdata[i][1][19:0] == p1_raddr_2[ADDR_WIDTH-1:ADDR_WIDTH-20] && tag_bram_rdata[i][1][20]; end end + logic rvalid_1, rvalid_2; + assign rvalid_1_o = rvalid_1 && p1_rreq_1; + assign rvalid_2_o = rvalid_2 && p1_rreq_2; // Generate read output - logic [1:0] offset_1, offset_2; - assign offset_1 = raddr_1_i[3:2]; - assign offset_2 = raddr_2_i[3:2]; - logic [NWAY-1:0][1:0][DATA_WIDTH-1:0] data_inside_cacheline; always_comb begin + rvalid_1 = 0; + rdata_1_o = 0; + rvalid_2 = 0; + rdata_2_o = 0; for (integer i = 0; i < NWAY; i++) begin - case (offset_1) - 2'b00: data_inside_cacheline[i][0] = data_bram_rdata[i][0][31:0]; - 2'b01: data_inside_cacheline[i][0] = data_bram_rdata[i][0][63:32]; - 2'b10: data_inside_cacheline[i][0] = data_bram_rdata[i][0][95:64]; - 2'b11: data_inside_cacheline[i][0] = data_bram_rdata[i][0][127:96]; - endcase - case (offset_2) - 2'b00: data_inside_cacheline[i][1] = data_bram_rdata[i][1][31:0]; - 2'b01: data_inside_cacheline[i][1] = data_bram_rdata[i][1][63:32]; - 2'b10: data_inside_cacheline[i][1] = data_bram_rdata[i][1][95:64]; - 2'b11: data_inside_cacheline[i][1] = data_bram_rdata[i][1][127:96]; + if (tag_hit[i][0]) begin + rvalid_1 = 1; + rdata_1_o = data_bram_rdata[i][0]; + end + if (tag_hit[i][1]) begin + rvalid_2 = 1; + rdata_2_o = data_bram_rdata[i][1]; + end + end + end + + + // Refill state machine + enum int { + IDLE, + REFILL_1_REQ, + REFILL_1_WAIT, + REFILL_2_REQ, + REFILL_2_WAIT + } + state, next_state; + always_ff @(posedge clk) begin + if (rst) begin + state <= IDLE; + end else begin + state <= next_state; + end + end + + + logic miss_1_pulse, miss_2_pulse, miss_1_r, miss_2_r, miss_1, miss_2; + assign miss_1_pulse = p1_rreq_1 & ~rvalid_1 & (state == IDLE); + assign miss_2_pulse = p1_rreq_2 & ~rvalid_2 & (state == IDLE); + assign miss_1 = miss_1_pulse | miss_1_r; + assign miss_2 = miss_2_pulse | miss_2_r; + always_ff @(posedge clk) begin + if (rst) begin + miss_1_r <= 0; + miss_2_r <= 0; + end else begin + case (state) + IDLE: begin + miss_1_r <= miss_1_pulse; + miss_2_r <= miss_2_pulse; + end + REFILL_1_WAIT: begin + if (axi_rvalid_i) miss_1_r <= 0; + end + REFILL_2_WAIT: begin + if (axi_rvalid_i) miss_2_r <= 0; + end + default: begin + end endcase end end + + + + always_comb begin : transition_comb + case (state) + IDLE: begin + if (miss_1) next_state = REFILL_1_REQ; + else if (miss_2) next_state = REFILL_2_REQ; + else next_state = IDLE; + end + REFILL_1_REQ: begin + if (axi_rdy_i) next_state = REFILL_1_WAIT; + else next_state = REFILL_1_REQ; + end + REFILL_2_REQ: begin + if (axi_rdy_i) next_state = REFILL_2_WAIT; + else next_state = REFILL_2_REQ; + end + REFILL_1_WAIT: begin + if (rvalid_1) begin + if (miss_2) next_state = REFILL_2_REQ; + else next_state = IDLE; + end else next_state = REFILL_1_WAIT; + end + REFILL_2_WAIT: begin + if (rvalid_2) begin + next_state = IDLE; + end else next_state = REFILL_2_WAIT; + end + default: begin + next_state = IDLE; + end + endcase + end + + // Read request to AXI Controller always_comb begin - rvalid_1_o = 0; - rdata_1_o = 0; - rvalid_2_o = 0; - rdata_2_o = 0; - for (integer i = 0; i < NWAY; i++) begin - if (tag_hit[i][0]) begin - rvalid_1_o = 1; - rdata_1_o = data_inside_cacheline[i][0]; + case (state) + REFILL_1_REQ, REFILL_1_WAIT: begin + axi_rreq_o = miss_1 ? 1 : 0; + axi_addr_o = miss_1 ? p1_raddr_1 : 0; end - if (tag_hit[i][1]) begin - rvalid_2_o = 1; - rdata_2_o = data_inside_cacheline[i][1]; + REFILL_2_REQ, REFILL_2_WAIT: begin + axi_rreq_o = miss_2 ? 1 : 0; + axi_addr_o = miss_2 ? p1_raddr_2 : 0; + end + default: begin + axi_rreq_o = 0; + axi_addr_o = 0; + end + endcase + end + + // Refill write BRAM + logic [2:0] random_r; + lfsr #( + .WIDTH(3) + ) u_lfsr ( + .clk (clk), + .rst (rst), + .en (1'b1), + .value(random_r) + ); + always_comb begin + for (integer i = 0; i < NWAY; i++) begin + tag_bram_we[i] = 0; + tag_bram_wdata[i] = 0; + data_bram_we[i] = 0; + data_bram_wdata[i] = 0; + if (i[0] == random_r[0]) begin + // write this way + if (state == REFILL_1_WAIT && axi_rvalid_i) begin + tag_bram_we[i][0] = 1; + tag_bram_wdata[i][0] = {1'b1, p1_raddr_1[31:12]}; + data_bram_we[i][0] = 1; + data_bram_wdata[i][0] = axi_data_i; + end + if (state == REFILL_2_WAIT && axi_rvalid_i) begin + tag_bram_we[i][1] = 1; + tag_bram_wdata[i][1] = {1'b1, p1_raddr_2[31:12]}; + data_bram_we[i][1] = 1; + data_bram_wdata[i][1] = axi_data_i; + end end end end diff --git a/src/vsrc/instr_buffer.sv b/src/vsrc/instr_buffer.sv index 7b3705b..38d08b6 100644 --- a/src/vsrc/instr_buffer.sv +++ b/src/vsrc/instr_buffer.sv @@ -25,12 +25,13 @@ module instr_buffer #( instr_buffer_info_t buffer_queue[BUFFER_SIZE], next_buffer_queue[BUFFER_SIZE]; - logic [$clog2(BUFFER_SIZE)-1:0] read_ptr, write_ptr, write_ptr_plus_2; + logic [$clog2(BUFFER_SIZE)-1:0] read_ptr, write_ptr; // Workaround, verilator seems to extend {write_ptr + 2} to more bits // we want a loopback counter, so declare a fixed width to get around - assign write_ptr_plus_2 = write_ptr + 2; - assign frontend_stallreq_o = (write_ptr_plus_2 == read_ptr); + logic [$clog2(BUFFER_SIZE)-1:0] buffer_clearance; + assign buffer_clearance = read_ptr - write_ptr; + assign frontend_stallreq_o = (buffer_clearance <= 4 && buffer_clearance != 0); // State transition always_ff @(posedge clk or negedge rst_n) begin : buffer_queue_ff diff --git a/src/vsrc/instr_info.sv b/src/vsrc/instr_info.sv index 6be8709..dece97e 100644 --- a/src/vsrc/instr_info.sv +++ b/src/vsrc/instr_info.sv @@ -5,6 +5,7 @@ typedef struct packed { bit valid; + bit is_last_in_block; // Mark the last instruction in basic block bit [`InstAddrBus] pc; bit [`InstBus] instr; diff --git a/src/vsrc/pipeline/1_decode/id.sv b/src/vsrc/pipeline/1_decode/id.sv index ddd1e54..ad5726a 100644 --- a/src/vsrc/pipeline/1_decode/id.sv +++ b/src/vsrc/pipeline/1_decode/id.sv @@ -42,6 +42,8 @@ module id ( assign pc_i = instr_buffer_i.valid ? instr_buffer_i.pc : `ZeroWord; logic [`InstBus] inst_i; assign inst_i = instr_buffer_i.valid ? instr_buffer_i.instr : `ZeroWord; + logic is_last_in_block; + assign is_last_in_block = instr_buffer_i.valid ? instr_buffer_i.is_last_in_block : 0; logic instr_break, instr_syscall, kernel_instr; assign kernel_instr = dispatch_o.aluop == `EXE_CSRRD_OP | dispatch_o.aluop == `EXE_CSRWR_OP | dispatch_o.aluop == `EXE_CSRXCHG_OP | @@ -216,6 +218,7 @@ module id ( assign dispatch_o.instr_info.valid = instr_valid; assign dispatch_o.instr_info.pc = pc_i; assign dispatch_o.instr_info.instr = inst_i; + assign dispatch_o.instr_info.is_last_in_block = is_last_in_block; // TODO: add explanation @@ -230,7 +233,9 @@ module id ( assign excp_ipe = kernel_instr && (csr_plv == 2'b11); assign excp = excp_ipe | instr_syscall | instr_break | instr_buffer_i.excp | excp_ine | has_int; - assign excp_num = {excp_ipe, excp_ine, instr_break, instr_syscall, instr_buffer_i.excp_num, has_int}; + assign excp_num = { + excp_ipe, excp_ine, instr_break, instr_syscall, instr_buffer_i.excp_num, has_int + }; assign dispatch_o.excp = excp; assign dispatch_o.excp_num = excp_num; diff --git a/src/vsrc/pipeline/4_mem/mem.sv b/src/vsrc/pipeline/4_mem/mem.sv index 878fae4..686118d 100644 --- a/src/vsrc/pipeline/4_mem/mem.sv +++ b/src/vsrc/pipeline/4_mem/mem.sv @@ -9,23 +9,22 @@ module mem ( output mem_wb_struct signal_o, - output mem_axi_struct signal_axi_o, + output mem_cache_struct signal_cache_o, // -> Ctrl output logic stallreq, - // <- AXI Controller - input logic axi_busy_i, + // <- cache + input logic addr_ok, + input logic data_ok, input logic [`RegBus] mem_data_i, + output data_fetch, + input logic LLbit_i, input logic wb_LLbit_we_i, input logic wb_LLbit_value_i, - //from csr - input csr_to_mem_struct csr_mem_signal, - input logic disable_cache, - // Data forward // -> Dispatch // -> EX @@ -49,14 +48,6 @@ module mem ( logic access_mem; logic mem_store_op; logic mem_load_op; - logic excp_adem; - logic pg_mode; - logic da_mode; - logic excp_tlbr; - logic excp_pil; - logic excp_pis; - logic excp_pme; - logic excp_ppi; logic [`InstAddrBus] debug_pc_i; assign debug_pc_i = signal_i.instr_info.pc; @@ -75,9 +66,11 @@ module mem ( assign excp_i = signal_i.excp; assign excp_num_i = signal_i.excp_num; + assign data_fetch = addr_ok | aluop_i == `EXE_TLBSRCH_OP; + assign access_mem = mem_load_op || mem_store_op; - assign stallreq = axi_busy_i & (mem_load_op | mem_store_op); + assign stallreq = !data_ok & (mem_load_op | mem_store_op); assign mem_load_op = aluop_i == `EXE_LD_B_OP || aluop_i == `EXE_LD_BU_OP || aluop_i == `EXE_LD_H_OP || aluop_i == `EXE_LD_HU_OP || aluop_i == `EXE_LD_W_OP || aluop_i == `EXE_LL_OP; @@ -105,28 +98,11 @@ module mem ( assign signal_o.load_addr = mem_load_op ? mem_addr : 0; assign signal_o.store_addr = mem_store_op ? mem_addr : 0; - //addr dmw trans - assign dmw0_en = ((csr_mem_signal.csr_dmw0[`PLV0] && csr_mem_signal.csr_plv == 2'd0) || (csr_mem_signal.csr_dmw0[`PLV3] && csr_mem_signal.csr_plv == 2'd3)) && (signal_i.wdata[31:29] == csr_mem_signal.csr_dmw0[`VSEG]); - assign dmw1_en = ((csr_mem_signal.csr_dmw1[`PLV0] && csr_mem_signal.csr_plv == 2'd0) || (csr_mem_signal.csr_dmw1[`PLV3] && csr_mem_signal.csr_plv == 2'd3)) && (signal_i.wdata[31:29] == csr_mem_signal.csr_dmw1[`VSEG]); - - assign pg_mode = !csr_mem_signal.csr_da && csr_mem_signal.csr_pg; - assign da_mode = csr_mem_signal.csr_da && !csr_mem_signal.csr_pg; - - assign data_addr_trans_en = pg_mode && !dmw0_en && !dmw1_en && !cacop_op_mode_di; - // Data forward assign mem_data_forward_o = {mem_load_op, signal_o.wreg, signal_o.waddr, signal_o.wdata}; - assign excp_tlbr = access_mem && !tlb_mem_signal.data_tlb_found && data_addr_trans_en; - assign excp_pil = mem_load_op && !tlb_mem_signal.data_tlb_v && data_addr_trans_en; //cache will generate pil exception?? - assign excp_pis = mem_store_op && !tlb_mem_signal.data_tlb_v && data_addr_trans_en; - assign excp_ppi = access_mem && tlb_mem_signal.data_tlb_v && (csr_mem_signal.csr_plv > tlb_mem_signal.data_tlb_plv) && data_addr_trans_en; - assign excp_pme = mem_store_op && tlb_mem_signal.data_tlb_v && (csr_mem_signal.csr_plv <= tlb_mem_signal.data_tlb_plv) && !tlb_mem_signal.data_tlb_d && data_addr_trans_en; - - assign signal_o.excp = excp_tlbr || excp_pil || excp_pis || excp_ppi || excp_pme || excp_adem || excp_i; - assign signal_o.excp_num = { - excp_pil, excp_pis, excp_ppi, excp_pme, excp_tlbr, excp_adem, excp_num_i - }; + assign signal_o.excp = excp_i; + assign signal_o.excp_num = excp_num_i; assign signal_o.refetch = signal_i.refetch; always @(*) begin @@ -147,11 +123,8 @@ module mem ( signal_o.aluop = 0; signal_o.tlb_found = 1'b0; signal_o.tlb_index = 5'b0; - signal_axi_o = 0; - LLbit_we_o = 1'b0; - LLbit_value_o = 1'b0; signal_o.inv_i = 0; - signal_axi_o = 0; + signal_cache_o = 0; signal_o.store_data = 0; end else begin LLbit_we_o = 1'b0; @@ -167,30 +140,30 @@ module mem ( signal_o.tlb_index = tlb_mem_signal.data_tlb_index; signal_o.csr_signal = signal_i.csr_signal; signal_o.inv_i = signal_i.inv_i; - signal_axi_o = 0; + signal_cache_o = 0; signal_o.store_data = 0; case (aluop_i) `EXE_LD_B_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.sel = 4'b1111; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.sel = 4'b1111; case (mem_addr[1:0]) 2'b11: begin signal_o.wdata = {{24{mem_data_i[31]}}, mem_data_i[31:24]}; - // signal_axi_o.sel = 4'b1000; + signal_cache_o.sel = 4'b1000; end 2'b10: begin signal_o.wdata = {{24{mem_data_i[23]}}, mem_data_i[23:16]}; - // signal_axi_o.sel = 4'b0100; + signal_cache_o.sel = 4'b0100; end 2'b01: begin signal_o.wdata = {{24{mem_data_i[15]}}, mem_data_i[15:8]}; - // signal_axi_o.sel = 4'b0010; + signal_cache_o.sel = 4'b0010; end 2'b00: begin signal_o.wdata = {{24{mem_data_i[7]}}, mem_data_i[7:0]}; - // signal_axi_o.sel = 4'b0001; + signal_cache_o.sel = 4'b0001; end default: begin signal_o.wdata = `ZeroWord; @@ -198,18 +171,18 @@ module mem ( endcase end `EXE_LD_H_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; + signal_cache_o.ce = `ChipEnable; case (mem_addr[1:0]) 2'b10: begin - signal_o.wdata = {{16{mem_data_i[31]}}, mem_data_i[31:16]}; - signal_axi_o.sel = 4'b1100; + signal_o.wdata = {{16{mem_data_i[31]}}, mem_data_i[31:16]}; + signal_cache_o.sel = 4'b1100; end 2'b00: begin - signal_o.wdata = {{16{mem_data_i[15]}}, mem_data_i[15:0]}; - signal_axi_o.sel = 4'b0011; + signal_o.wdata = {{16{mem_data_i[15]}}, mem_data_i[15:0]}; + signal_cache_o.sel = 4'b0011; end default: begin @@ -218,32 +191,32 @@ module mem ( endcase end `EXE_LD_W_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.sel = 4'b1111; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.sel = 4'b1111; signal_o.wdata = mem_data_i; end `EXE_LD_BU_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; + signal_cache_o.ce = `ChipEnable; case (mem_addr[1:0]) 2'b11: begin - signal_o.wdata = {{24{1'b0}}, mem_data_i[31:24]}; - signal_axi_o.sel = 4'b1000; + signal_o.wdata = {{24{1'b0}}, mem_data_i[31:24]}; + signal_cache_o.sel = 4'b1000; end 2'b10: begin - signal_o.wdata = {{24{1'b0}}, mem_data_i[23:16]}; - signal_axi_o.sel = 4'b0100; + signal_o.wdata = {{24{1'b0}}, mem_data_i[23:16]}; + signal_cache_o.sel = 4'b0100; end 2'b01: begin - signal_o.wdata = {{24{1'b0}}, mem_data_i[15:8]}; - signal_axi_o.sel = 4'b0010; + signal_o.wdata = {{24{1'b0}}, mem_data_i[15:8]}; + signal_cache_o.sel = 4'b0010; end 2'b00: begin - signal_o.wdata = {{24{1'b0}}, mem_data_i[7:0]}; - signal_axi_o.sel = 4'b0001; + signal_o.wdata = {{24{1'b0}}, mem_data_i[7:0]}; + signal_cache_o.sel = 4'b0001; end default: begin signal_o.wdata = `ZeroWord; @@ -251,17 +224,17 @@ module mem ( endcase end `EXE_LD_HU_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; + signal_cache_o.ce = `ChipEnable; case (mem_addr[1:0]) 2'b10: begin - signal_o.wdata = {{16{1'b0}}, mem_data_i[31:16]}; - signal_axi_o.sel = 4'b1100; + signal_o.wdata = {{16{1'b0}}, mem_data_i[31:16]}; + signal_cache_o.sel = 4'b1100; end 2'b00: begin - signal_o.wdata = {{16{1'b0}}, mem_data_i[15:0]}; - signal_axi_o.sel = 4'b0011; + signal_o.wdata = {{16{1'b0}}, mem_data_i[15:0]}; + signal_cache_o.sel = 4'b0011; end default: begin signal_o.wdata = `ZeroWord; @@ -269,76 +242,76 @@ module mem ( endcase end `EXE_ST_B_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.we = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.data = {reg2_i[7:0], reg2_i[7:0], reg2_i[7:0], reg2_i[7:0]}; + signal_cache_o.we = `WriteEnable; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.data = {reg2_i[7:0], reg2_i[7:0], reg2_i[7:0], reg2_i[7:0]}; case (mem_addr[1:0]) 2'b11: begin - signal_axi_o.sel = 4'b1000; + signal_cache_o.sel = 4'b1000; signal_o.store_data = {reg2_i[7:0], 24'b0}; end 2'b10: begin - signal_axi_o.sel = 4'b0100; + signal_cache_o.sel = 4'b0100; signal_o.store_data = {8'b0, reg2_i[7:0], 16'b0}; end 2'b01: begin - signal_axi_o.sel = 4'b0010; + signal_cache_o.sel = 4'b0010; signal_o.store_data = {16'b0, reg2_i[7:0], 8'b0}; end 2'b00: begin - signal_axi_o.sel = 4'b0001; + signal_cache_o.sel = 4'b0001; signal_o.store_data = {24'b0, reg2_i[7:0]}; end endcase end `EXE_ST_H_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.we = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.data = {reg2_i[15:0], reg2_i[15:0]}; + signal_cache_o.we = `WriteEnable; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.data = {reg2_i[15:0], reg2_i[15:0]}; case (mem_addr[1:0]) 2'b10: begin - signal_axi_o.sel = 4'b1100; + signal_cache_o.sel = 4'b1100; signal_o.store_data = {reg2_i[15:0], 16'b0}; end 2'b00: begin - signal_axi_o.sel = 4'b0011; + signal_cache_o.sel = 4'b0011; signal_o.store_data = {16'b0, reg2_i[15:0]}; end default: begin - signal_axi_o.sel = 4'b0000; + signal_cache_o.sel = 4'b0000; end endcase end `EXE_ST_W_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteEnable; - signal_axi_o.we = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.data = reg2_i; - signal_axi_o.sel = 4'b1111; + signal_cache_o.we = `WriteEnable; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.data = reg2_i; + signal_cache_o.sel = 4'b1111; signal_o.store_data = reg2_i; end `EXE_LL_OP: begin - signal_axi_o.addr = mem_addr; + signal_cache_o.addr = mem_addr; signal_o.wreg = `WriteDisable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.sel = 4'b1111; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.sel = 4'b1111; signal_o.wdata = mem_data_i; LLbit_we_o = 1'b1; LLbit_value_o = 1'b1; end `EXE_SC_OP: begin if (LLbit == 1'b1) begin - signal_axi_o.addr = mem_addr; - signal_axi_o.we = `WriteEnable; + signal_cache_o.addr = mem_addr; + signal_cache_o.we = `WriteEnable; signal_o.wreg = `WriteEnable; - signal_axi_o.ce = `ChipEnable; - signal_axi_o.data = reg2_i; - signal_axi_o.sel = 4'b1111; + signal_cache_o.ce = `ChipEnable; + signal_cache_o.data = reg2_i; + signal_cache_o.sel = 4'b1111; LLbit_we_o = 1'b1; LLbit_value_o = 1'b0; signal_o.wdata = 32'b1; @@ -348,7 +321,7 @@ module mem ( end default: begin // Reset AXI signals, IMPORTANT! - signal_axi_o = 0; + signal_cache_o = 0; end endcase end diff --git a/src/vsrc/pipeline/4_mem/mem_wb.sv b/src/vsrc/pipeline/4_mem/mem_wb.sv index 1125eac..569641e 100644 --- a/src/vsrc/pipeline/4_mem/mem_wb.sv +++ b/src/vsrc/pipeline/4_mem/mem_wb.sv @@ -12,46 +12,73 @@ module mem_wb ( input logic flush, + //<- csr + input csr_to_mem_struct csr_mem_signal, + input logic disable_cache, + + //<- tlb + output logic data_addr_trans_en, + output logic dmw0_en, + output logic dmw1_en, + output logic cacop_op_mode_di, + + //-> tlb + input tlb_to_mem_struct tlb_mem_signal, + // load store relate difftest - output wb_ctrl wb_ctrl_signal + output wb_ctrl wb_ctrl_signal, + + // <-> Frontend + output logic is_last_in_block ); + logic excp, pg_mode, da_mode; + logic [15:0] excp_num; + logic access_mem, mem_store_op, mem_load_op; + logic excp_tlbr, excp_pil, excp_pis, excp_pme, excp_ppi, excp_adem; + + assign access_mem = mem_load_op || mem_store_op; + + assign mem_load_op = mem_signal_o.aluop == `EXE_LD_B_OP || mem_signal_o.aluop == `EXE_LD_BU_OP || mem_signal_o.aluop == `EXE_LD_H_OP || mem_signal_o.aluop == `EXE_LD_HU_OP || + mem_signal_o.aluop == `EXE_LD_W_OP || mem_signal_o.aluop == `EXE_LL_OP; + + assign mem_store_op = mem_signal_o.aluop == `EXE_ST_B_OP || mem_signal_o.aluop == `EXE_ST_H_OP || mem_signal_o.aluop == `EXE_ST_W_OP || mem_signal_o.aluop == `EXE_SC_OP; + + assign dmw0_en = ((csr_mem_signal.csr_dmw0[`PLV0] && csr_mem_signal.csr_plv == 2'd0) || (csr_mem_signal.csr_dmw0[`PLV3] && csr_mem_signal.csr_plv == 2'd3)) && (mem_signal_o.wdata[31:29] == csr_mem_signal.csr_dmw0[`VSEG]); + assign dmw1_en = ((csr_mem_signal.csr_dmw1[`PLV0] && csr_mem_signal.csr_plv == 2'd0) || (csr_mem_signal.csr_dmw1[`PLV3] && csr_mem_signal.csr_plv == 2'd3)) && (mem_signal_o.wdata[31:29] == csr_mem_signal.csr_dmw1[`VSEG]); + + assign pg_mode = !csr_mem_signal.csr_da && csr_mem_signal.csr_pg; + assign da_mode = csr_mem_signal.csr_da && !csr_mem_signal.csr_pg; + + assign data_addr_trans_en = pg_mode && !dmw0_en && !dmw1_en && !cacop_op_mode_di; + + assign excp_adem = 0; + assign excp_tlbr = access_mem && !tlb_mem_signal.data_tlb_found && data_addr_trans_en; + assign excp_pil = mem_load_op && !tlb_mem_signal.data_tlb_v && data_addr_trans_en; //cache will generate pil exception?? + assign excp_pis = mem_store_op && !tlb_mem_signal.data_tlb_v && data_addr_trans_en; + assign excp_ppi = access_mem && tlb_mem_signal.data_tlb_v && (csr_mem_signal.csr_plv > tlb_mem_signal.data_tlb_plv) && data_addr_trans_en; + assign excp_pme = mem_store_op && tlb_mem_signal.data_tlb_v && (csr_mem_signal.csr_plv <= tlb_mem_signal.data_tlb_plv) && !tlb_mem_signal.data_tlb_d && data_addr_trans_en; + + assign excp = excp_tlbr || excp_pil || excp_pis || excp_ppi || excp_pme || excp_adem || mem_signal_o.excp; + assign excp_num = { + excp_pil, excp_pis, excp_ppi, excp_pme, excp_tlbr, excp_adem, mem_signal_o.excp_num + }; + // For observability logic [`RegBus] debug_mem_wdata; assign debug_mem_wdata = mem_signal_o.wdata; always @(posedge clk) begin if (rst == `RstEnable) begin - wb_ctrl_signal.valid <= 1'b0; - wb_ctrl_signal.aluop <= 8'b0; - wb_ctrl_signal.wb_reg_o.waddr <= `NOPRegAddr; - wb_ctrl_signal.wb_reg_o.we <= `WriteDisable; - wb_ctrl_signal.wb_reg_o.wdata <= `ZeroWord; - wb_ctrl_signal.wb_reg_o.pc <= `ZeroWord; - wb_ctrl_signal.llbit_o.we <= 1'b0; - wb_ctrl_signal.llbit_o.value <= 1'b0; - wb_ctrl_signal.excp <= 1'b0; - wb_ctrl_signal.excp_num <= 16'b0; - wb_ctrl_signal.fetch_flush <= 1'b0; - wb_ctrl_signal.data_tlb_found <= 1'b0; - wb_ctrl_signal.data_tlb_index <= 5'b0; - wb_ctrl_signal.csr_signal_o <= 47'b0; - wb_ctrl_signal.inv_i <= 0; - wb_ctrl_signal.diff_commit_o.instr <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.pc <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.valid <= `InstInvalid; - wb_ctrl_signal.diff_commit_o.inst_ld_en <= 8'b0; - wb_ctrl_signal.diff_commit_o.inst_st_en <= 8'b0; - wb_ctrl_signal.diff_commit_o.ld_paddr <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.ld_vaddr <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.st_paddr <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.st_vaddr <= `ZeroWord; - wb_ctrl_signal.diff_commit_o.st_data <= `ZeroWord; + wb_ctrl_signal <= 0; + is_last_in_block <= 0; end else if (stall == `Stop) begin wb_ctrl_signal.diff_commit_o.instr <= `ZeroWord; wb_ctrl_signal.diff_commit_o.pc <= `ZeroWord; wb_ctrl_signal.diff_commit_o.valid <= `InstInvalid; + is_last_in_block <= 0; end else begin + is_last_in_block <= mem_signal_o.instr_info.is_last_in_block; wb_ctrl_signal.valid <= 1'b1; wb_ctrl_signal.aluop <= mem_signal_o.aluop; wb_ctrl_signal.wb_reg_o.waddr <= mem_signal_o.waddr; @@ -60,8 +87,8 @@ module mem_wb ( wb_ctrl_signal.wb_reg_o.pc <= mem_signal_o.instr_info.pc; wb_ctrl_signal.llbit_o.we <= mem_LLbit_we; wb_ctrl_signal.llbit_o.value <= mem_LLbit_value; - wb_ctrl_signal.excp <= mem_signal_o.excp; - wb_ctrl_signal.excp_num <= mem_signal_o.excp_num; + wb_ctrl_signal.excp <= excp; + wb_ctrl_signal.excp_num <= excp_num; wb_ctrl_signal.fetch_flush <= mem_signal_o.refetch; wb_ctrl_signal.data_tlb_found <= mem_signal_o.tlb_found; wb_ctrl_signal.data_tlb_index <= mem_signal_o.tlb_index; diff --git a/src/vsrc/pipeline_defines.sv b/src/vsrc/pipeline_defines.sv index f427d23..44566dc 100644 --- a/src/vsrc/pipeline_defines.sv +++ b/src/vsrc/pipeline_defines.sv @@ -103,7 +103,7 @@ typedef struct packed { logic [`RegBus] store_data; logic excp; - logic [15:0] excp_num; + logic [9:0] excp_num; logic refetch; logic tlb_found; @@ -118,7 +118,8 @@ typedef struct packed { logic [3:0] sel; logic [`DataAddrBus] addr; logic [`RegBus] data; -} mem_axi_struct; + logic uncache_en; +} mem_cache_struct; typedef struct packed { logic we; diff --git a/src/vsrc/tlb.sv b/src/vsrc/tlb.sv index d53fea2..1381140 100644 --- a/src/vsrc/tlb.sv +++ b/src/vsrc/tlb.sv @@ -2,8 +2,7 @@ `include "csr_defines.sv" `include "tlb_defines.sv" -module tlb -( +module tlb ( input logic clk, input logic [9:0] asid, //trans mode @@ -22,130 +21,134 @@ module tlb //invtlb input tlb_inv_in_struct inv_signal_i, //from csr - input logic [31:0] csr_dmw0 , + input logic [31:0] csr_dmw0, input logic [31:0] csr_dmw1, input logic csr_da, - input logic csr_pg + input logic csr_pg ); -logic [18:0] s0_vppn ; -logic s0_odd_page ; -logic [ 5:0] s0_ps ; -logic [19:0] s0_ppn ; - -logic [18:0] s1_vppn ; -logic s1_odd_page ; -logic [ 5:0] s1_ps ; -logic [19:0] s1_ppn ; - -logic we ; -logic [ 4:0] w_index ; -tlb_wr_port w_port; - - -logic [ 4:0] r_index ; -tlb_wr_port r_port; - -logic [31:0] inst_vaddr_buffer ; -logic [31:0] data_vaddr_buffer ; -logic [31:0] inst_paddr; -logic [31:0] data_paddr; - -logic pg_mode; -logic da_mode; - -always @(posedge clk) begin - inst_vaddr_buffer <= inst_i.vaddr; - data_vaddr_buffer <= data_i.vaddr; -end - -//trans search port sig -assign s0_vppn = inst_i.vaddr[31:13]; -assign s0_odd_page = inst_i.vaddr[12]; - -assign s1_vppn = data_i.vaddr[31:13]; -assign s1_odd_page = data_i.vaddr[12]; - -//trans write port sig -assign we = write_signal_i.tlbfill_en || write_signal_i.tlbwr_en; -assign w_index = ({5{write_signal_i.tlbfill_en}} & write_signal_i.rand_index) | ({5{write_signal_i.tlbwr_en}} & write_signal_i.tlbidx[`INDEX]); -assign w_port.vppn = write_signal_i.tlbehi[`VPPN]; -assign w_port.g = write_signal_i.tlbelo0[`TLB_G] && write_signal_i.tlbelo1[`TLB_G]; -assign w_port.ps = write_signal_i.tlbidx[`PS]; -assign w_port.e = (write_signal_i.ecode == 6'h3f) ? 1'b1 : !write_signal_i.tlbidx[`NE]; -assign w_port.v0 = write_signal_i.tlbelo0[`TLB_V]; -assign w_port.d0 = write_signal_i.tlbelo0[`TLB_D]; -assign w_port.plv0 = write_signal_i.tlbelo0[`TLB_PLV]; -assign w_port.mat0 = write_signal_i.tlbelo0[`TLB_MAT]; -assign w_port.ppn0 = write_signal_i.tlbelo0[`TLB_PPN_EN]; -assign w_port.v1 = write_signal_i.tlbelo1[`TLB_V]; -assign w_port.d1 = write_signal_i.tlbelo1[`TLB_D]; -assign w_port.plv1 = write_signal_i.tlbelo1[`TLB_PLV]; -assign w_port.mat1 = write_signal_i.tlbelo1[`TLB_MAT]; -assign w_port.ppn1 = write_signal_i.tlbelo1[`TLB_PPN_EN]; - -//trans read port sig -assign r_index = write_signal_i.tlbidx[`INDEX]; -assign read_signal_o.tlbehi = {r_port.vppn, 13'b0}; -assign read_signal_o.tlbelo0 = {4'b0, r_port.ppn0, 1'b0, r_port.g, r_port.mat0, r_port.plv0, r_port.d0, r_port.v0}; -assign read_signal_o.tlbelo1 = {4'b0, r_port.ppn1, 1'b0, r_port.g, r_port.mat1, r_port.plv1, r_port.d1, r_port.v1}; -assign read_signal_o.tlbidx = {!r_port.e, 1'b0, r_port.ps, 24'b0}; //note do not write index -assign read_signal_o.asid = r_port.asid; - -tlb_entry tlb_entry( - .clk (clk), - // search port 0 - .s0_fetch (inst_i.fetch), - .s0_vppn (s0_vppn), - .s0_odd_page (s0_odd_page), - .s0_asid (asid), - .s0_found (inst_o.tlb_found ), - .s0_index (), - .s0_ps (s0_ps), - .s0_ppn (s0_ppn), - .s0_v (inst_o.tlb_v ), - .s0_d (inst_o.tlb_d ), - .s0_mat (inst_o.tlb_mat ), - .s0_plv (inst_o.tlb_plv ), - // search port 1 - .s1_fetch (data_i.fetch ), - .s1_vppn (s1_vppn ), - .s1_odd_page (s1_odd_page ), - .s1_asid (asid ), - .s1_found (data_o.found ), - .s1_index (data_o.tlb_index ), - .s1_ps (s1_ps ), - .s1_ppn (s1_ppn ), - .s1_v (data_o.tlb_v ), - .s1_d (data_o.tlb_d ), - .s1_mat (data_o.tlb_mat ), - .s1_plv (data_o.tlb_plv ), - // write port - .we(we), - .w_index(w_index), - .write_port(w_port), - //read port - .r_index(r_index), - .read_port(r_port), - //invalid port - .inv_i(inv_signal_i) -); - -assign pg_mode = !csr_da && csr_pg; -assign da_mode = csr_da && !csr_pg; - -assign inst_paddr = (pg_mode && inst_i.dmw0_en) ? {csr_dmw0[`PSEG], inst_vaddr_buffer[28:0]} : + logic [18:0] s0_vppn; + logic s0_odd_page; + logic [ 5:0] s0_ps; + logic [19:0] s0_ppn; + + logic [18:0] s1_vppn; + logic s1_odd_page; + logic [ 5:0] s1_ps; + logic [19:0] s1_ppn; + + logic we; + logic [ 4:0] w_index; + tlb_wr_port w_port; + + + logic [ 4:0] r_index; + tlb_wr_port r_port; + + logic [31:0] inst_vaddr_buffer; + logic [31:0] data_vaddr_buffer; + logic [31:0] inst_paddr; + logic [31:0] data_paddr; + + logic pg_mode; + logic da_mode; + + always @(posedge clk) begin + inst_vaddr_buffer <= inst_i.vaddr; + data_vaddr_buffer <= data_i.vaddr; + end + + //trans search port sig + assign s0_vppn = inst_i.vaddr[31:13]; + assign s0_odd_page = inst_i.vaddr[12]; + + assign s1_vppn = data_i.vaddr[31:13]; + assign s1_odd_page = data_i.vaddr[12]; + + //trans write port sig + assign we = write_signal_i.tlbfill_en || write_signal_i.tlbwr_en; + assign w_index = ({5{write_signal_i.tlbfill_en}} & write_signal_i.rand_index) | ({5{write_signal_i.tlbwr_en}} & write_signal_i.tlbidx[`INDEX]); + assign w_port.vppn = write_signal_i.tlbehi[`VPPN]; + assign w_port.g = write_signal_i.tlbelo0[`TLB_G] && write_signal_i.tlbelo1[`TLB_G]; + assign w_port.ps = write_signal_i.tlbidx[`PS]; + assign w_port.e = (write_signal_i.ecode == 6'h3f) ? 1'b1 : !write_signal_i.tlbidx[`NE]; + assign w_port.v0 = write_signal_i.tlbelo0[`TLB_V]; + assign w_port.d0 = write_signal_i.tlbelo0[`TLB_D]; + assign w_port.plv0 = write_signal_i.tlbelo0[`TLB_PLV]; + assign w_port.mat0 = write_signal_i.tlbelo0[`TLB_MAT]; + assign w_port.ppn0 = write_signal_i.tlbelo0[`TLB_PPN_EN]; + assign w_port.v1 = write_signal_i.tlbelo1[`TLB_V]; + assign w_port.d1 = write_signal_i.tlbelo1[`TLB_D]; + assign w_port.plv1 = write_signal_i.tlbelo1[`TLB_PLV]; + assign w_port.mat1 = write_signal_i.tlbelo1[`TLB_MAT]; + assign w_port.ppn1 = write_signal_i.tlbelo1[`TLB_PPN_EN]; + + //trans read port sig + assign r_index = write_signal_i.tlbidx[`INDEX]; + assign read_signal_o.tlbehi = {r_port.vppn, 13'b0}; + assign read_signal_o.tlbelo0 = { + 4'b0, r_port.ppn0, 1'b0, r_port.g, r_port.mat0, r_port.plv0, r_port.d0, r_port.v0 + }; + assign read_signal_o.tlbelo1 = { + 4'b0, r_port.ppn1, 1'b0, r_port.g, r_port.mat1, r_port.plv1, r_port.d1, r_port.v1 + }; + assign read_signal_o.tlbidx = {!r_port.e, 1'b0, r_port.ps, 24'b0}; //note do not write index + assign read_signal_o.asid = r_port.asid; + + tlb_entry tlb_entry ( + .clk (clk), + // search port 0 + .s0_fetch (inst_i.fetch), + .s0_vppn (s0_vppn), + .s0_odd_page(s0_odd_page), + .s0_asid (asid), + .s0_found (inst_o.tlb_found), + .s0_index (), + .s0_ps (s0_ps), + .s0_ppn (s0_ppn), + .s0_v (inst_o.tlb_v), + .s0_d (inst_o.tlb_d), + .s0_mat (inst_o.tlb_mat), + .s0_plv (inst_o.tlb_plv), + // search port 1 + .s1_fetch (data_i.fetch), + .s1_vppn (s1_vppn), + .s1_odd_page(s1_odd_page), + .s1_asid (asid), + .s1_found (data_o.found), + .s1_index (data_o.tlb_index), + .s1_ps (s1_ps), + .s1_ppn (s1_ppn), + .s1_v (data_o.tlb_v), + .s1_d (data_o.tlb_d), + .s1_mat (data_o.tlb_mat), + .s1_plv (data_o.tlb_plv), + // write port + .we (we), + .w_index (w_index), + .write_port (w_port), + //read port + .r_index (r_index), + .read_port (r_port), + //invalid port + .inv_i (inv_signal_i) + ); + + assign pg_mode = !csr_da && csr_pg; + assign da_mode = csr_da && !csr_pg; + + assign inst_paddr = (pg_mode && inst_i.dmw0_en) ? {csr_dmw0[`PSEG], inst_vaddr_buffer[28:0]} : (pg_mode && inst_i.dmw1_en) ? {csr_dmw1[`PSEG], inst_vaddr_buffer[28:0]} : inst_vaddr_buffer; -assign inst_o.offset = inst_i.vaddr[3:0]; -assign inst_o.index = inst_i.vaddr[11:4]; -assign inst_o.tag = inst_addr_trans_en ? ((s0_ps == 6'd12) ? s0_ppn : {s0_ppn[19:10], inst_paddr[21:12]}) : inst_paddr[31:12]; + assign inst_o.offset = inst_i.vaddr[3:0]; + assign inst_o.index = inst_i.vaddr[11:4]; + assign inst_o.tag = inst_addr_trans_en ? ((s0_ps == 6'd12) ? s0_ppn : {s0_ppn[19:10], inst_paddr[21:12]}) : inst_paddr[31:12]; -assign data_paddr = (pg_mode && data_i.dmw0_en && !data_i.cacop_op_mode_di) ? {csr_dmw0[`PSEG], data_vaddr_buffer[28:0]} : + assign data_paddr = (pg_mode && data_i.dmw0_en && !data_i.cacop_op_mode_di) ? {csr_dmw0[`PSEG], data_vaddr_buffer[28:0]} : (pg_mode && data_i.dmw1_en && !data_i.cacop_op_mode_di) ? {csr_dmw1[`PSEG], data_vaddr_buffer[28:0]} : data_vaddr_buffer; -assign data_o.offset = data_i.vaddr[3:0]; -assign data_o.index = data_i.vaddr[11:4]; -assign data_o.tag = data_addr_trans_en ? ((s1_ps == 6'd12) ? s1_ppn : {s1_ppn[19:10], data_paddr[21:12]}) : data_paddr[31:12]; + assign data_o.offset = data_i.vaddr[3:0]; + assign data_o.index = data_i.vaddr[11:4]; + assign data_o.tag = data_addr_trans_en ? ((s1_ps == 6'd12) ? s1_ppn : {s1_ppn[19:10], data_paddr[21:12]}) : data_paddr[31:12]; endmodule diff --git a/src/vsrc/tlb_entry.sv b/src/vsrc/tlb_entry.sv index fee4fb9..5cf04be 100644 --- a/src/vsrc/tlb_entry.sv +++ b/src/vsrc/tlb_entry.sv @@ -1,69 +1,68 @@ `include "tlb_defines.sv" -module tlb_entry( - input clk, +module tlb_entry ( + input clk, // search port 0 - input logic s0_fetch, - input logic [18:0] s0_vppn, - input logic s0_odd_page, - input logic[9:0]s0_asid, - output logic s0_found, - output logic[$clog2(TLBNUM)-1:0]s0_index, - output logic[5:0]s0_ps, - output logic[19:0]s0_ppn, - output logic s0_v, - output logic s0_d, - output logic[1:0]s0_mat, - output logic[1:0]s0_plv, + input logic s0_fetch, + input logic [ 18:0] s0_vppn, + input logic s0_odd_page, + input logic [ 9:0] s0_asid, + output logic s0_found, + output logic [$clog2(TLBNUM)-1:0] s0_index, + output logic [ 5:0] s0_ps, + output logic [ 19:0] s0_ppn, + output logic s0_v, + output logic s0_d, + output logic [ 1:0] s0_mat, + output logic [ 1:0] s0_plv, //search port 1 - input logic s1_fetch, - input logic [18:0]s1_vppn, - input logic s1_odd_page, - input logic [9:0]s1_asid, - output logic s1_found, - output logic[$clog2(TLBNUM)-1:0] s1_index, - output logic[5:0] s1_ps, - output logic[19:0]s1_ppn, - output logic s1_v, - output logic s1_d, - output logic[1:0]s1_mat, - output logic[1:0]s1_plv, + input logic s1_fetch, + input logic [ 18:0] s1_vppn, + input logic s1_odd_page, + input logic [ 9:0] s1_asid, + output logic s1_found, + output logic [$clog2(TLBNUM)-1:0] s1_index, + output logic [ 5:0] s1_ps, + output logic [ 19:0] s1_ppn, + output logic s1_v, + output logic s1_d, + output logic [ 1:0] s1_mat, + output logic [ 1:0] s1_plv, // write port - input logic we, - input logic [$clog2(TLBNUM)-1:0] w_index, - input tlb_wr_port write_port, + input logic we, + input logic [$clog2(TLBNUM)-1:0] w_index, + input tlb_wr_port write_port, // read port - input [$clog2(TLBNUM)-1:0] r_index , - output tlb_wr_port read_port, + input [$clog2(TLBNUM)-1:0] r_index, + output tlb_wr_port read_port, // invalid port - input tlb_inv_in_struct inv_i + input tlb_inv_in_struct inv_i ); -reg [18:0] tlb_vppn [TLBNUM-1:0]; -reg tlb_e [TLBNUM-1:0]; -reg [ 9:0] tlb_asid [TLBNUM-1:0]; -reg tlb_g [TLBNUM-1:0]; -reg [ 5:0] tlb_ps [TLBNUM-1:0]; -reg [19:0] tlb_ppn0 [TLBNUM-1:0]; -reg [ 1:0] tlb_plv0 [TLBNUM-1:0]; -reg [ 1:0] tlb_mat0 [TLBNUM-1:0]; -reg tlb_d0 [TLBNUM-1:0]; -reg tlb_v0 [TLBNUM-1:0]; -reg [19:0] tlb_ppn1 [TLBNUM-1:0]; -reg [ 1:0] tlb_plv1 [TLBNUM-1:0]; -reg [ 1:0] tlb_mat1 [TLBNUM-1:0]; -reg tlb_d1 [TLBNUM-1:0]; -reg tlb_v1 [TLBNUM-1:0]; + reg [ 18:0] tlb_vppn [TLBNUM-1:0]; + reg tlb_e [TLBNUM-1:0]; + reg [ 9:0] tlb_asid [TLBNUM-1:0]; + reg tlb_g [TLBNUM-1:0]; + reg [ 5:0] tlb_ps [TLBNUM-1:0]; + reg [ 19:0] tlb_ppn0 [TLBNUM-1:0]; + reg [ 1:0] tlb_plv0 [TLBNUM-1:0]; + reg [ 1:0] tlb_mat0 [TLBNUM-1:0]; + reg tlb_d0 [TLBNUM-1:0]; + reg tlb_v0 [TLBNUM-1:0]; + reg [ 19:0] tlb_ppn1 [TLBNUM-1:0]; + reg [ 1:0] tlb_plv1 [TLBNUM-1:0]; + reg [ 1:0] tlb_mat1 [TLBNUM-1:0]; + reg tlb_d1 [TLBNUM-1:0]; + reg tlb_v1 [TLBNUM-1:0]; -reg [TLBNUM-1:0] match0; -reg [TLBNUM-1:0] match1; + reg [TLBNUM-1:0] match0; + reg [TLBNUM-1:0] match1; -reg [TLBNUM-1:0] s0_odd_page_buffer; -reg [TLBNUM-1:0] s1_odd_page_buffer; + reg [TLBNUM-1:0] s0_odd_page_buffer; + reg [TLBNUM-1:0] s1_odd_page_buffer; -genvar i; -generate - for (i = 0; i < TLBNUM; i = i + 1) - begin: match + genvar i; + generate + for (i = 0; i < TLBNUM; i = i + 1) begin : match always @(posedge clk) begin if (s0_fetch) begin s0_odd_page_buffer[i] <= (tlb_ps[i] == 6'd12) ? s0_odd_page : s0_vppn[9]; @@ -75,82 +74,78 @@ generate end end end -endgenerate + endgenerate -assign s0_found = !(!match0); -assign s1_found = !(!match1); + assign s0_found = match0 != 32'b0; //!(!match0); + assign s1_found = match1 != 32'b0; //!(!match1); -always_comb begin - for(integer j = 0; j < 32; j++)begin - {s0_index, s0_ps, s0_ppn, s0_v, s0_d, s0_mat, s0_plv} = {37{match0[j] & s0_odd_page_buffer[j] }} & {5'd0, tlb_ps[j], tlb_ppn1[j], tlb_v1[j], tlb_d1[j], tlb_mat1[j], tlb_plv1[j]} | + always_comb begin + for (integer j = 0; j < 32; j++) begin + {s0_index, s0_ps, s0_ppn, s0_v, s0_d, s0_mat, s0_plv} = {37{match0[j] & s0_odd_page_buffer[j] }} & {5'd0, tlb_ps[j], tlb_ppn1[j], tlb_v1[j], tlb_d1[j], tlb_mat1[j], tlb_plv1[j]} | {37{match0[j] & ~s0_odd_page_buffer[j] }} & {5'd0, tlb_ps[j], tlb_ppn0[j], tlb_v0[j], tlb_d0[j], tlb_mat0[j], tlb_plv0[j]}; - {s1_index, s1_ps, s1_ppn, s1_v, s1_d, s1_mat, s1_plv} = {37{match1[j] & s1_odd_page_buffer[j] }} & {5'd0, tlb_ps[j], tlb_ppn1[j], tlb_v1[j], tlb_d1[j], tlb_mat1[j], tlb_plv1[j]} | + {s1_index, s1_ps, s1_ppn, s1_v, s1_d, s1_mat, s1_plv} = {37{match1[j] & s1_odd_page_buffer[j] }} & {5'd0, tlb_ps[j], tlb_ppn1[j], tlb_v1[j], tlb_d1[j], tlb_mat1[j], tlb_plv1[j]} | {37{match1[j] & ~s1_odd_page_buffer[j] }} & {5'd0, tlb_ps[j], tlb_ppn0[j], tlb_v0[j], tlb_d0[j], tlb_mat0[j], tlb_plv0[j]}; + end end -end -always @(posedge clk) begin - if (we) begin - tlb_vppn [w_index] <= write_port.vppn; - tlb_asid [w_index] <= write_port.asid; - tlb_g [w_index] <= write_port.g; - tlb_ps [w_index] <= write_port.ps; - tlb_ppn0 [w_index] <= write_port.ppn0; - tlb_plv0 [w_index] <= write_port.plv0; - tlb_mat0 [w_index] <= write_port.mat0; - tlb_d0 [w_index] <= write_port.d0; - tlb_v0 [w_index] <= write_port.v0; - tlb_ppn1 [w_index] <= write_port.ppn1; - tlb_plv1 [w_index] <= write_port.plv1; - tlb_mat1 [w_index] <= write_port.mat1; - tlb_d1 [w_index] <= write_port.d1; - tlb_v1 [w_index] <= write_port.v1; + always @(posedge clk) begin + if (we) begin + tlb_vppn[w_index] <= write_port.vppn; + tlb_asid[w_index] <= write_port.asid; + tlb_g[w_index] <= write_port.g; + tlb_ps[w_index] <= write_port.ps; + tlb_ppn0[w_index] <= write_port.ppn0; + tlb_plv0[w_index] <= write_port.plv0; + tlb_mat0[w_index] <= write_port.mat0; + tlb_d0[w_index] <= write_port.d0; + tlb_v0[w_index] <= write_port.v0; + tlb_ppn1[w_index] <= write_port.ppn1; + tlb_plv1[w_index] <= write_port.plv1; + tlb_mat1[w_index] <= write_port.mat1; + tlb_d1[w_index] <= write_port.d1; + tlb_v1[w_index] <= write_port.v1; + end end -end -assign read_port.vppn = tlb_vppn [r_index]; -assign read_port.asid = tlb_asid [r_index]; -assign read_port.g = tlb_g [r_index]; -assign read_port.ps = tlb_ps [r_index]; -assign read_port.e = tlb_e [r_index]; -assign read_port.v0 = tlb_v0 [r_index]; -assign read_port.d0 = tlb_d0 [r_index]; -assign read_port.mat0 = tlb_mat0 [r_index]; -assign read_port.plv0 = tlb_plv0 [r_index]; -assign read_port.ppn0 = tlb_ppn0 [r_index]; -assign read_port.v1 = tlb_v1 [r_index]; -assign read_port.d1 = tlb_d1 [r_index]; -assign read_port.mat1 = tlb_mat1 [r_index]; -assign read_port.plv1 = tlb_plv1 [r_index]; -assign read_port.ppn1 = tlb_ppn1 [r_index]; + assign read_port.vppn = tlb_vppn[r_index]; + assign read_port.asid = tlb_asid[r_index]; + assign read_port.g = tlb_g[r_index]; + assign read_port.ps = tlb_ps[r_index]; + assign read_port.e = tlb_e[r_index]; + assign read_port.v0 = tlb_v0[r_index]; + assign read_port.d0 = tlb_d0[r_index]; + assign read_port.mat0 = tlb_mat0[r_index]; + assign read_port.plv0 = tlb_plv0[r_index]; + assign read_port.ppn0 = tlb_ppn0[r_index]; + assign read_port.v1 = tlb_v1[r_index]; + assign read_port.d1 = tlb_d1[r_index]; + assign read_port.mat1 = tlb_mat1[r_index]; + assign read_port.plv1 = tlb_plv1[r_index]; + assign read_port.ppn1 = tlb_ppn1[r_index]; -//tlb entry invalid -generate - for (i = 0; i < TLBNUM; i = i + 1) - begin: invalid_tlb_entry + //tlb entry invalid + generate + for (i = 0; i < TLBNUM; i = i + 1) begin : invalid_tlb_entry always @(posedge clk) begin if (we && (w_index == i)) tlb_e[i] <= write_port.e; else if (inv_i.en) begin - if (inv_i.op == 5'd0 || inv_i.op == 5'd1) - tlb_e[i] <= 1'b0; - else if (inv_i.op == 5'd2 && tlb_g[i]) - tlb_e[i] <= 1'b0; - else if (inv_i.op == 5'd3 && !tlb_g[i]) - tlb_e[i] <= 1'b0; + if (inv_i.op == 5'd0 || inv_i.op == 5'd1) tlb_e[i] <= 1'b0; + else if (inv_i.op == 5'd2 && tlb_g[i]) tlb_e[i] <= 1'b0; + else if (inv_i.op == 5'd3 && !tlb_g[i]) tlb_e[i] <= 1'b0; else if (inv_i.op == 5'd4 && !tlb_g[i] && (tlb_asid[i] == inv_i.asid)) tlb_e[i] <= 1'b0; else if (inv_i.op == 5'd5 && !tlb_g[i] && (tlb_asid[i] == inv_i.asid) && ((tlb_ps[i] == 6'd12) ? (tlb_vppn[i] == inv_i.vpn) : (tlb_vppn[i][18:10] == inv_i.vpn[18:10]))) - tlb_e[i] <= 1'b0; + tlb_e[i] <= 1'b0; else if (inv_i.op == 5'd6 && (tlb_g[i] || (tlb_asid[i] == inv_i.asid)) && ((tlb_ps[i] == 6'd12) ? (tlb_vppn[i] == inv_i.vpn) : (tlb_vppn[i][18:10] == inv_i.vpn[18:10]))) - tlb_e[i] <= 1'b0; + tlb_e[i] <= 1'b0; end end - end -endgenerate + end + endgenerate endmodule diff --git a/src/vsrc/utils/lfsr.sv b/src/vsrc/utils/lfsr.sv new file mode 100644 index 0000000..f48a583 --- /dev/null +++ b/src/vsrc/utils/lfsr.sv @@ -0,0 +1,91 @@ +/* + * Copyright © 2021 Eric Matthews + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Initial code developed under the supervision of Dr. Lesley Shannon, + * Reconfigurable Computing Lab, Simon Fraser University. + * + * Author(s): + * Eric Matthews + */ + +//3-16 bit LFSRs with additional feedback to support full 2^N range +module lfsr #( + parameter int unsigned WIDTH = 3, + parameter NEEDS_RESET = 1 +) ( + input logic clk, + input logic rst, + input logic en, + output logic [WIDTH-1:0] value +); + + typedef struct packed { + int unsigned NUM; + bit [3:0][31:0] INDICIES; + } tap_t; + + //XNOR taps for LFSR from 3-16 bits wide (source: Xilinx xapp052) + localparam tap_t LFSR_TAPS[17] = '{ + //Dummy entries for widths 0-2 + '{ + NUM : 1, + INDICIES : '{0, 0, 0, 0} + }, + '{NUM : 1, INDICIES : '{0, 0, 0, 0}}, + '{NUM : 1, INDICIES : '{0, 0, 0, 0}}, + //Number of taps and indicies[3:0] for LFSRs width 3 to 16 + '{ + NUM : 2, + INDICIES : '{0, 0, 1, 2} + }, //3 + '{NUM : 2, INDICIES : '{0, 0, 2, 3}}, //4 + '{NUM : 2, INDICIES : '{0, 0, 2, 4}}, + '{NUM : 2, INDICIES : '{0, 0, 4, 5}}, + '{NUM : 2, INDICIES : '{0, 0, 5, 6}}, + '{NUM : 4, INDICIES : '{3, 4, 5, 7}}, //8 + '{NUM : 2, INDICIES : '{0, 0, 4, 8}}, + '{NUM : 2, INDICIES : '{0, 0, 6, 9}}, + '{NUM : 2, INDICIES : '{0, 0, 8, 10}}, + '{NUM : 4, INDICIES : '{0, 3, 5, 11}}, //12 + '{NUM : 4, INDICIES : '{0, 2, 3, 12}}, + '{NUM : 4, INDICIES : '{0, 2, 4, 13}}, + '{NUM : 2, INDICIES : '{0, 0, 13, 14}}, //15 + '{NUM : 4, INDICIES : '{3, 12, 14, 15}} //16 + }; + + localparam tap_t TAPS = LFSR_TAPS[WIDTH]; + + logic [TAPS.NUM-1:0] feedback_input; + logic feedback; + //////////////////////////////////////////////////// + //Implementation + generate + if (WIDTH == 2) begin : gen_width_two + assign feedback = ~value[WIDTH-1]; + end else begin : gen_width_three_plus + for (genvar i = 0; i < TAPS.NUM; i++) begin : gen_taps + assign feedback_input[i] = value[int'(TAPS.INDICIES[i])]; + end + //XNOR of taps and range extension to include all ones + assign feedback = (~^feedback_input) ^ |value[WIDTH-2:0]; + end + endgenerate + + initial value = 0; + always_ff @(posedge clk) begin + if (NEEDS_RESET & rst) value <= '0; + else if (en) value <= {value[WIDTH-2:0], feedback}; + end + +endmodule diff --git a/src/vsrc/utils/priority_encoder.sv b/src/vsrc/utils/priority_encoder.sv new file mode 100644 index 0000000..1835106 --- /dev/null +++ b/src/vsrc/utils/priority_encoder.sv @@ -0,0 +1,61 @@ +/* + * Copyright © 2021 Eric Matthews, Lesley Shannon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Initial code developed under the supervision of Dr. Lesley Shannon, + * Reconfigurable Computing Lab, Simon Fraser University. + * + * Author(s): + * Eric Matthews + */ + +//////////////////////////////////////////////////// +//Highest Priority for: Index Zero +//Look-up Table based +//Max width of 12 +//////////////////////////////////////////////////// +module priority_encoder #( + parameter WIDTH = 4 +) ( + input logic [WIDTH-1:0] priority_vector, + output logic [(WIDTH == 1) ? 0 : ($clog2(WIDTH)-1) : 0] encoded_result +); + //////////////////////////////////////////////////// + //Width Check + if (WIDTH > 12) $error("Max priority encoder width exceeded!"); + + //Tool workaround + localparam MIN_WIDTH = (WIDTH == 1) ? 2 : WIDTH; + localparam LOG2_WIDTH = $clog2(MIN_WIDTH); + //Table generation for priority encoder + function [2**MIN_WIDTH-1:0][LOG2_WIDTH-1 : 0] table_gen(); + for (int i = 0; i < 2 ** MIN_WIDTH; i++) begin //Loop through all memory addresses + table_gen[i] = LOG2_WIDTH'(MIN_WIDTH - 1); //Initialize to lowest priority + for ( + int j = (int'(MIN_WIDTH) - 2); j >= 0; j-- + ) begin //Check each bit in increasing priority + if (i[j]) //If bit is set update table value with that bit's index + table_gen[i] = LOG2_WIDTH'(j); + end + end + endfunction + + //Initialize Table + localparam logic [2**MIN_WIDTH-1:0][LOG2_WIDTH-1 : 0] ENCODER_ROM = table_gen(); + + //////////////////////////////////////////////////// + //Implementation + assign encoded_result = (WIDTH == 1) ? 0 : ENCODER_ROM[priority_vector]; + +endmodule