From 236a50ad5cc888f09d46933e384b2d022998b1ec Mon Sep 17 00:00:00 2001 From: Jason Davies Date: Wed, 26 Feb 2025 17:55:10 +0000 Subject: [PATCH] SFPIADD doesn't take two cycles. --- tt_llk_blackhole/common/inc/sfpu/ckernel_sfpu_add_int32.h | 2 -- tt_llk_blackhole/common/inc/sfpu/ckernel_sfpu_sub_int32.h | 2 -- tt_llk_wormhole_b0/common/inc/sfpu/ckernel_sfpu_add_int32.h | 2 -- tt_llk_wormhole_b0/common/inc/sfpu/ckernel_sfpu_sub_int32.h | 2 -- 4 files changed, 8 deletions(-) diff --git a/tt_llk_blackhole/common/inc/sfpu/ckernel_sfpu_add_int32.h b/tt_llk_blackhole/common/inc/sfpu/ckernel_sfpu_add_int32.h index 2f01b92a..6016c1d7 100644 --- a/tt_llk_blackhole/common/inc/sfpu/ckernel_sfpu_add_int32.h +++ b/tt_llk_blackhole/common/inc/sfpu/ckernel_sfpu_add_int32.h @@ -48,8 +48,6 @@ inline void _add_int32_(const uint dst_offset) { } TTI_SFPIADD(0 /*imm*/, 1 /*lreg_c*/, 0 /*lreg_dest*/, 4 /*imod*/); - // MAD has a 2-cycle pipeline latency so we need one cycle latency until next instr can consume the result - TTI_NOP; // LREG_0 -> dest as int32 if constexpr (SIGN_MAGNITUDE_FORMAT) { diff --git a/tt_llk_blackhole/common/inc/sfpu/ckernel_sfpu_sub_int32.h b/tt_llk_blackhole/common/inc/sfpu/ckernel_sfpu_sub_int32.h index c7396746..c9a510ff 100644 --- a/tt_llk_blackhole/common/inc/sfpu/ckernel_sfpu_sub_int32.h +++ b/tt_llk_blackhole/common/inc/sfpu/ckernel_sfpu_sub_int32.h @@ -49,8 +49,6 @@ inline void _sub_int32_(const uint dst_offset) { // Set instruction modifier to 6 to get B's 2's complement TTI_SFPIADD(0 /*imm*/, 1 /*lreg_c*/, 0 /*lreg_dest*/, 6 /*imod*/); - // MAD has a 2-cycle pipeline latency so we need one cycle latency until next instr can consume the result - TTI_NOP; // LREG_0 -> dest as int32 if constexpr (SIGN_MAGNITUDE_FORMAT) { diff --git a/tt_llk_wormhole_b0/common/inc/sfpu/ckernel_sfpu_add_int32.h b/tt_llk_wormhole_b0/common/inc/sfpu/ckernel_sfpu_add_int32.h index 195df798..410d5fcd 100644 --- a/tt_llk_wormhole_b0/common/inc/sfpu/ckernel_sfpu_add_int32.h +++ b/tt_llk_wormhole_b0/common/inc/sfpu/ckernel_sfpu_add_int32.h @@ -33,8 +33,6 @@ inline void _add_int32_(const uint dst_offset) { // operand B - int32 TT_SFPLOAD(1, sfpload_instr_mod, 3, dst_offset * 64); TTI_SFPIADD(0, 1, 0, 4); - // MAD has a 2-cycle pipeline latency so we need one cycle latency until next instr can consume the result - TTI_NOP; // LREG_0 -> dest as int32 TTI_SFPSTORE(0, sfpload_instr_mod, 3, 0); dst_reg++; diff --git a/tt_llk_wormhole_b0/common/inc/sfpu/ckernel_sfpu_sub_int32.h b/tt_llk_wormhole_b0/common/inc/sfpu/ckernel_sfpu_sub_int32.h index 89210114..b1262a72 100644 --- a/tt_llk_wormhole_b0/common/inc/sfpu/ckernel_sfpu_sub_int32.h +++ b/tt_llk_wormhole_b0/common/inc/sfpu/ckernel_sfpu_sub_int32.h @@ -34,8 +34,6 @@ inline void _sub_int32_(const uint dst_offset) { TT_SFPLOAD(0 /*lreg*/, sfpload_instr_mod, 3 /*addr_mode*/, dst_offset * 64 /*dest*/); // Use 6 as imod to convert operand B to 2's complement TTI_SFPIADD(0 /*imm*/, 1 /*lreg*/, 0 /*ldest*/, 6 /*imod*/); - // MAD has a 2-cycle pipeline latency so we need one cycle latency until next instr can consume the result - TTI_NOP; // LREG_0 -> dest as int32 TTI_SFPSTORE(0 /*lreg_ind*/, sfpload_instr_mod, 3 /*addr_mode*/, 0 /*dest*/); dst_reg++;