diff --git a/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv b/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv index eb5692e33..67567b454 100644 --- a/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv +++ b/piton/design/chip/tile/l2/rtl/l2_pipe1_ctrl.v.pyv @@ -345,6 +345,8 @@ reg [`MSG_TYPE_WIDTH-1:0] msg_type_S2_f; reg msg_from_mshr_S2_f; reg [`MSG_TYPE_WIDTH-1:0] msg_type_S4_f; +reg msg_data_pending, msg_data_pending_f; + //============================ // Stage 1 //============================ @@ -932,15 +934,40 @@ begin end reg stall_msg_S1; +reg stall_msg_data_S1; always @ * begin stall_msg_S1 = msg_data_rd_S1 && ~msg_data_valid_S1; end +wire msg_carrying_data_S1 = (msg_type_trans_S1 == `MSG_TYPE_CAS_P2Y_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_SWAP_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_ADD_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_AND_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_OR_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_XOR_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_MAX_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_MAXU_P2_REQ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_MIN_P2_REQ ) || + (msg_type_trans_S1 == `MSG_TYPE_AMO_MINU_P2_REQ) || + (msg_type_trans_S1 == `MSG_TYPE_NC_STORE_REQ) || + (msg_type_trans_S1 == `MSG_TYPE_CAS_P1_REQ) || + (msg_type_trans_S1 == `MSG_TYPE_CAS_P2N_REQ) || (msg_type_trans_S1 == `MSG_TYPE_INTERRUPT_FWD); +always @(*) begin + // We only need to worry about the case where a nc_store gets into mshr. + // In this case, msg_data is not consumed immediately and msg_data_val + // would be high for a long time, until that request come back from mshr + // and raise msg_data_ready. During this period, if another request carrying + // data (e.g. another nc_store, atomics, or interrupt_fwd) arrives, it will + // read the wrong data. To prevent this, we simply stop accepting new request + // until msg_data_pending is resolved. + stall_msg_data_S1 = (msg_data_pending || msg_data_pending_f) && msg_carrying_data_S1 && !msg_from_mshr_S1; +end + always @ * begin - stall_S1 = valid_S1 && (stall_pre_S1 || stall_hazard_S1 || stall_mshr_S1 || stall_msg_S1); + stall_S1 = valid_S1 && (stall_pre_S1 || stall_hazard_S1 || stall_mshr_S1 || stall_msg_S1 || stall_msg_data_S1); end @@ -1029,6 +1056,7 @@ reg mshr_smc_miss_S2_f; reg [`L2_MSHR_INDEX_WIDTH-1:0] mshr_pending_index_S2_f; reg special_addr_type_S2_f; reg msg_data_rd_S2_f; +reg msg_carrying_data_S2_f; always @ (posedge clk) begin @@ -1047,6 +1075,7 @@ begin special_addr_type_S2_f <= 0; msg_data_rd_S2_f <= 0; amo_alu_op_S2_f <= `L2_AMO_ALU_OP_WIDTH'b0; + msg_carrying_data_S2_f <= 1'b0; end else if (!stall_S2) begin @@ -1067,6 +1096,7 @@ begin special_addr_type_S2_f <= special_addr_type_S1; msg_data_rd_S2_f <= msg_data_rd_S1; amo_alu_op_S2_f <= amo_alu_op_S1; + msg_carrying_data_S2_f <= msg_carrying_data_S1; end end @@ -2182,6 +2212,36 @@ begin msg_data_ready_S2 = valid_S2 && !stall_S2 && (cs_S2[`CS_STATE_DATA_RDY_P1S2] || msg_data_rd_S2_f); end +// Actually it's conservative to use msg_carrying_data_S2_f as +// the condition to raise the msg_data_pending flag. For atomic operations, +// it's already guaranteed that no other noc reqs can be consumes by L2 +// until it reaches phase 2. But being conservative makes no harm to +// the performance, it's just a double check. + +always @(*) begin + msg_data_pending = 1'b0; + if (valid_S2 && msg_carrying_data_S2_f && !msg_data_ready_S2) begin + if (cs_S2[`CS_MSHR_WR_EN_P1S2]) begin + // CAUTION: We assume that after a request reads msg_data, it would not get into mshr again. + msg_data_pending = 1'b1; + end + end +end + +always @(posedge clk) begin + if (~rst_n) begin + msg_data_pending_f <= 1'b0; + end + else if (valid_S2 && msg_carrying_data_S2_f && !msg_data_ready_S2) begin + if (cs_S2[`CS_MSHR_WR_EN_P1S2]) begin + msg_data_pending_f <= 1'b1; + end + end + else if (msg_data_ready_S2 && msg_data_valid_S2) begin + msg_data_pending_f <= 1'b0; + end +end + always @ * begin