From 253e0236e68bae15f928af26d7c9cba8d8151182 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Tue, 23 Feb 2021 18:07:06 -0500 Subject: [PATCH 1/4] Fix the noc1 msg_data consumption in L2P1 When an op goes into mshr in L2P1, we didn't store the msg_data in mshr, thus the data left in l2_pipe1_buf_in may be consumed by later ops. This fix stalls the pipe when msg_data is waiting to be consumed in the buf_in. --- .../chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv | 56 ++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv b/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv index eb5692e33..d90ece19b 100644 --- a/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv +++ b/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv @@ -345,6 +345,8 @@ reg [`MSG_TYPE_WIDTH-1:0] msg_type_S2_f; reg msg_from_mshr_S2_f; reg [`MSG_TYPE_WIDTH-1:0] msg_type_S4_f; +reg msg_data_pending, msg_data_pending_f; + //============================ // Stage 1 //============================ @@ -932,15 +934,40 @@ begin end reg stall_msg_S1; +reg stall_msg_data_S1; always @ * begin stall_msg_S1 = msg_data_rd_S1 && ~msg_data_valid_S1; end +wire msg_carrying_data_S1 = (msg_data_16B_amo_S2_f && ((msg_type_S2_f == `MSG_TYPE_SWAP_P1_REQ) || (msg_type_S2_f == `MSG_TYPE_SWAPWB_P1_REQ))) || + (msg_type_S2_f == `MSG_TYPE_CAS_P2Y_REQ ) || + (msg_type_S2_f == `MSG_TYPE_SWAP_P2_REQ ) || + (msg_type_S2_f == `MSG_TYPE_SWAPWB_P2_REQ ) || + (msg_type_S2_f == `MSG_TYPE_AMO_ADD_P2_REQ ) || + (msg_type_S2_f == `MSG_TYPE_AMO_AND_P2_REQ ) || + (msg_type_S2_f == `MSG_TYPE_AMO_OR_P2_REQ ) || + (msg_type_S2_f == `MSG_TYPE_AMO_XOR_P2_REQ ) || + (msg_type_S2_f == `MSG_TYPE_AMO_MAX_P2_REQ ) || + (msg_type_S2_f == `MSG_TYPE_AMO_MAXU_P2_REQ) || + (msg_type_S2_f == `MSG_TYPE_AMO_MIN_P2_REQ ) || + (msg_type_S2_f == `MSG_TYPE_AMO_MINU_P2_REQ) || + (msg_type_S2_f == `MSG_TYPE_NC_STORE_REQ) || + (msg_type_S2_f == `MSG_TYPE_CAS_P1_REQ) || + (msg_type_S2_f == `MSG_TYPE_CAS_P2N_REQ) || (msg_type_S2_f == `MSG_TYPE_INTERRUPT_FWD); +always @(*) begin + // This is a pretty conservative stall, but we actually won't have many cases with msg_data_pending. + // For requests that carries msg_data (nc_store, atomic), they might get into mshr. In this case, + // msg_data_val would be high for a long time, until that request come back from mshr and raise + // msg_data_ready. During this period, if another request carrying data arrives, it will read the + // wrong data. To prevent this, we simply stop accepting new request until msg_data_pending is resolved. + stall_msg_data_S1 = (msg_data_pending || msg_data_pending_f) && msg_carrying_data_S1 && !msg_from_mshr_S1; +end + always @ * begin - stall_S1 = valid_S1 && (stall_pre_S1 || stall_hazard_S1 || stall_mshr_S1 || stall_msg_S1); + stall_S1 = valid_S1 && (stall_pre_S1 || stall_hazard_S1 || stall_mshr_S1 || stall_msg_S1 || stall_msg_data_S1); end @@ -1029,6 +1056,7 @@ reg mshr_smc_miss_S2_f; reg [`L2_MSHR_INDEX_WIDTH-1:0] mshr_pending_index_S2_f; reg special_addr_type_S2_f; reg msg_data_rd_S2_f; +reg msg_carrying_data_S2_f; always @ (posedge clk) begin @@ -1047,6 +1075,7 @@ begin special_addr_type_S2_f <= 0; msg_data_rd_S2_f <= 0; amo_alu_op_S2_f <= `L2_AMO_ALU_OP_WIDTH'b0; + msg_carrying_data_S2_f <= 1'b0; end else if (!stall_S2) begin @@ -1067,6 +1096,7 @@ begin special_addr_type_S2_f <= special_addr_type_S1; msg_data_rd_S2_f <= msg_data_rd_S1; amo_alu_op_S2_f <= amo_alu_op_S1; + msg_carrying_data_S2_f <= msg_carrying_data_S1; end end @@ -2182,6 +2212,30 @@ begin msg_data_ready_S2 = valid_S2 && !stall_S2 && (cs_S2[`CS_STATE_DATA_RDY_P1S2] || msg_data_rd_S2_f); end +always @(*) begin + msg_data_pending = 1'b0; + if (msg_carrying_data_S2_f && !msg_data_ready_S2) begin + if (cs_S2[`CS_MSHR_WR_EN_P1S2]) begin + // CAUTION: We assume that after a request reads msg_data, it would not get into mshr again. + msg_data_pending = 1'b1; + end + end +end + +always @(posedge clk) begin + if (~rst_n) begin + msg_data_pending_f <= 1'b0; + end + else if (msg_carrying_data_S2_f && !msg_data_ready_S2) begin + if (cs_S2[`CS_MSHR_WR_EN_P1S2]) begin + msg_data_pending_f <= 1'b1; + end + end + else if (msg_data_ready_S2 && msg_data_valid_S2) begin + msg_data_pending_f <= 1'b0; + end +end + always @ * begin From 7f6fa457326f08dd844f609b0534e8880d6ae2a6 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Wed, 24 Feb 2021 10:39:05 -0500 Subject: [PATCH 2/4] wrong stage signal used --- .../chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv b/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv index d90ece19b..264575a66 100644 --- a/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv +++ b/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv @@ -941,21 +941,21 @@ begin stall_msg_S1 = msg_data_rd_S1 && ~msg_data_valid_S1; end -wire msg_carrying_data_S1 = (msg_data_16B_amo_S2_f && ((msg_type_S2_f == `MSG_TYPE_SWAP_P1_REQ) || (msg_type_S2_f == `MSG_TYPE_SWAPWB_P1_REQ))) || - (msg_type_S2_f == `MSG_TYPE_CAS_P2Y_REQ ) || - (msg_type_S2_f == `MSG_TYPE_SWAP_P2_REQ ) || - (msg_type_S2_f == `MSG_TYPE_SWAPWB_P2_REQ ) || - (msg_type_S2_f == `MSG_TYPE_AMO_ADD_P2_REQ ) || - (msg_type_S2_f == `MSG_TYPE_AMO_AND_P2_REQ ) || - (msg_type_S2_f == `MSG_TYPE_AMO_OR_P2_REQ ) || - (msg_type_S2_f == `MSG_TYPE_AMO_XOR_P2_REQ ) || - (msg_type_S2_f == `MSG_TYPE_AMO_MAX_P2_REQ ) || - (msg_type_S2_f == `MSG_TYPE_AMO_MAXU_P2_REQ) || - (msg_type_S2_f == `MSG_TYPE_AMO_MIN_P2_REQ ) || - (msg_type_S2_f == `MSG_TYPE_AMO_MINU_P2_REQ) || - (msg_type_S2_f == `MSG_TYPE_NC_STORE_REQ) || - (msg_type_S2_f == `MSG_TYPE_CAS_P1_REQ) || - (msg_type_S2_f == `MSG_TYPE_CAS_P2N_REQ) || (msg_type_S2_f == `MSG_TYPE_INTERRUPT_FWD); +wire msg_carrying_data_S1 = (msg_data_16B_amo_S1 && ((msg_type_trans_S1 == `MSG_TYPE_SWAP_P1_REQ) || (msg_type_trans_S1 == `MSG_TYPE_SWAPWB_P1_REQ))) || + (msg_type_trans_S1 == `MSG_TYPE_CAS_P2Y_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_SWAP_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_SWAPWB_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_ADD_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_AND_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_OR_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_XOR_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_MAX_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_MAXU_P2_REQ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_MIN_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_MINU_P2_REQ) || + (msg_type_trans_S1 == `MSG_TYPE_NC_STORE_REQ) || + (msg_type_trans_S1 == `MSG_TYPE_CAS_P1_REQ) || + (msg_type_trans_S1 == `MSG_TYPE_CAS_P2N_REQ) || (msg_type_trans_S1 == `MSG_TYPE_INTERRUPT_FWD); always @(*) begin // This is a pretty conservative stall, but we actually won't have many cases with msg_data_pending. // For requests that carries msg_data (nc_store, atomic), they might get into mshr. In this case, From 9aca28cc3f3c6a261e273e47aa257bd78ae1f711 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Wed, 24 Feb 2021 11:16:25 -0500 Subject: [PATCH 3/4] Add valid check Conflicts: piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv --- piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv b/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv index 264575a66..543de9337 100644 --- a/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv +++ b/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv @@ -2214,7 +2214,7 @@ end always @(*) begin msg_data_pending = 1'b0; - if (msg_carrying_data_S2_f && !msg_data_ready_S2) begin + if (valid_S2 && msg_carrying_data_S2_f && !msg_data_ready_S2) begin if (cs_S2[`CS_MSHR_WR_EN_P1S2]) begin // CAUTION: We assume that after a request reads msg_data, it would not get into mshr again. msg_data_pending = 1'b1; @@ -2226,7 +2226,7 @@ always @(posedge clk) begin if (~rst_n) begin msg_data_pending_f <= 1'b0; end - else if (msg_carrying_data_S2_f && !msg_data_ready_S2) begin + else if (valid_S2 && msg_carrying_data_S2_f && !msg_data_ready_S2) begin if (cs_S2[`CS_MSHR_WR_EN_P1S2]) begin msg_data_pending_f <= 1'b1; end From 83d5a1def410c0a6d742d9078b4638a9b55da789 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Wed, 24 Mar 2021 18:20:48 -0400 Subject: [PATCH 4/4] Cleanup code and add comments --- .../chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv b/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv index 543de9337..67567b454 100644 --- a/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv +++ b/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv @@ -941,10 +941,8 @@ begin stall_msg_S1 = msg_data_rd_S1 && ~msg_data_valid_S1; end -wire msg_carrying_data_S1 = (msg_data_16B_amo_S1 && ((msg_type_trans_S1 == `MSG_TYPE_SWAP_P1_REQ) || (msg_type_trans_S1 == `MSG_TYPE_SWAPWB_P1_REQ))) || - (msg_type_trans_S1 == `MSG_TYPE_CAS_P2Y_REQ ) || +wire msg_carrying_data_S1 = (msg_type_trans_S1 == `MSG_TYPE_CAS_P2Y_REQ ) || (msg_type_trans_S1 == `MSG_TYPE_SWAP_P2_REQ ) || - (msg_type_trans_S1 == `MSG_TYPE_SWAPWB_P2_REQ ) || (msg_type_trans_S1 == `MSG_TYPE_AMO_ADD_P2_REQ ) || (msg_type_trans_S1 == `MSG_TYPE_AMO_AND_P2_REQ ) || (msg_type_trans_S1 == `MSG_TYPE_AMO_OR_P2_REQ ) || @@ -957,11 +955,13 @@ wire msg_carrying_data_S1 = (msg_data_16B_amo_S1 && ((msg_type_trans_S1 == `MSG_ (msg_type_trans_S1 == `MSG_TYPE_CAS_P1_REQ) || (msg_type_trans_S1 == `MSG_TYPE_CAS_P2N_REQ) || (msg_type_trans_S1 == `MSG_TYPE_INTERRUPT_FWD); always @(*) begin - // This is a pretty conservative stall, but we actually won't have many cases with msg_data_pending. - // For requests that carries msg_data (nc_store, atomic), they might get into mshr. In this case, - // msg_data_val would be high for a long time, until that request come back from mshr and raise - // msg_data_ready. During this period, if another request carrying data arrives, it will read the - // wrong data. To prevent this, we simply stop accepting new request until msg_data_pending is resolved. + // We only need to worry about the case where a nc_store gets into mshr. + // In this case, msg_data is not consumed immediately and msg_data_val + // would be high for a long time, until that request come back from mshr + // and raise msg_data_ready. During this period, if another request carrying + // data (e.g. another nc_store, atomics, or interrupt_fwd) arrives, it will + // read the wrong data. To prevent this, we simply stop accepting new request + // until msg_data_pending is resolved. stall_msg_data_S1 = (msg_data_pending || msg_data_pending_f) && msg_carrying_data_S1 && !msg_from_mshr_S1; end @@ -2212,6 +2212,12 @@ begin msg_data_ready_S2 = valid_S2 && !stall_S2 && (cs_S2[`CS_STATE_DATA_RDY_P1S2] || msg_data_rd_S2_f); end +// Actually it's conservative to use msg_carrying_data_S2_f as +// the condition to raise the msg_data_pending flag. For atomic operations, +// it's already guaranteed that no other noc reqs can be consumes by L2 +// until it reaches phase 2. But being conservative makes no harm to +// the performance, it's just a double check. + always @(*) begin msg_data_pending = 1'b0; if (valid_S2 && msg_carrying_data_S2_f && !msg_data_ready_S2) begin