tactcomplabs
diff --git a/‎common/include/RevCommon.h
+2 b/‎common/include/RevCommon.h
+2
diff --git a/‎include/RevInstHelpers.h
+87-2 b/‎include/RevInstHelpers.h
+87-2
diff --git a/‎include/RevRegFile.h
+69-27 b/‎include/RevRegFile.h
+69-27
diff --git a/‎include/insns/RV32D.h
+20-56 b/‎include/insns/RV32D.h
+20-56
@@ -34,6 +34,8 @@
 
 namespace SST::RevCPU {
 
+using float16 = _Float16;
+
 /// Zero-extend value of bits size
 template<typename T>
 constexpr auto ZeroExt( T val, size_t bits ) {
 
@@ -65,8 +65,8 @@ inline constexpr double fpmin<double, uint64_t> = 0x0p+0;
 /// General template for converting between Floating Point and Integer.
 /// FP values outside the range of the target integer type are clipped
 /// at the integer type's numerical limits, whether signed or unsigned.
-template<typename FP, typename INT>
-bool CvtFpToInt( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
+template<typename INT, typename FP>
+bool fcvtif( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
   // Read the FP register. Round to integer according to current rounding mode.
   FP fp = std::rint( R->GetFP<FP>( Inst.rs1 ) );
 
@@ -115,6 +115,10 @@ uint32_t fclass( T val ) {
       uint32_t i32;
       memcpy( &i32, &val, sizeof( i32 ) );
       return ( i32 & uint32_t{ 1 } << 22 ) != 0 ? QuietNaN : SignalingNaN;
+    } else if constexpr( std::is_same_v<T, float16> ) {
+      uint16_t i16;
+      memcpy( &i16, &val, sizeof( i16 ) );
+      return ( i16 & uint16_t{ 1 } << 9 ) != 0 ? QuietNaN : SignalingNaN;
     } else {
       uint64_t i64;
       memcpy( &i64, &val, sizeof( i64 ) );
@@ -443,6 +447,87 @@ bool fnmadd( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
   return true;
 }
 
+// Square root
+template<typename T>
+static bool fsqrt( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
+  R->SetFP( Inst.rd, std::sqrt( R->GetFP<T>( Inst.rs1 ) ) );
+  R->AdvancePC( Inst );
+  return true;
+}
+
+// Transfer sign bit
+template<typename T>
+static bool fsgnj( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
+  R->SetFP( Inst.rd, std::copysign( R->GetFP<T>( Inst.rs1 ), R->GetFP<T>( Inst.rs2 ) ) );
+  R->AdvancePC( Inst );
+  return true;
+}
+
+// Negated transfer sign bit
+template<typename T>
+static bool fsgnjn( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
+  R->SetFP( Inst.rd, std::copysign( R->GetFP<T>( Inst.rs1 ), negate( R->GetFP<T>( Inst.rs2 ) ) ) );
+  R->AdvancePC( Inst );
+  return true;
+}
+
+// Xor transfer sign bit
+template<typename T>
+static bool fsgnjx( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
+  T rs1 = R->GetFP<T>( Inst.rs1 ), rs2 = R->GetFP<T>( Inst.rs2 );
+  R->SetFP( Inst.rd, std::copysign( rs1, std::signbit( rs1 ) ? negate( rs2 ) : rs2 ) );
+  R->AdvancePC( Inst );
+  return true;
+}
+
+// Move floating-point register to integer register
+template<typename T>
+static bool fmvif( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
+  std::make_signed_t<uint_type_t<T>> i;
+  T                                  fp = R->GetFP<T, true>( Inst.rs1 );  // The FP value
+  static_assert( sizeof( i ) == sizeof( fp ) );
+  memcpy( &i, &fp, sizeof( i ) );  // Reinterpreted as int
+  R->SetX( Inst.rd, i );           // Copied to the destination register
+  R->AdvancePC( Inst );
+  return true;
+}
+
+// Move integer register to floating-point register
+template<typename T>
+static bool fmvfi( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
+  T    fp;
+  auto i = R->GetX<uint_type_t<T>>( Inst.rs1 );  // The X register
+  static_assert( sizeof( i ) == sizeof( fp ) );
+  memcpy( &fp, &i, sizeof( fp ) );  // Reinterpreted as FP
+  R->SetFP( Inst.rd, fp );          // Copied to the destination register
+  R->AdvancePC( Inst );
+  return true;
+}
+
+// Floating-point classify
+template<typename T>
+static bool fclassify( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
+  R->SetX( Inst.rd, fclass( R->GetFP<T>( Inst.rs1 ) ) );
+  R->AdvancePC( Inst );
+  return true;
+}
+
+// Convert integer to floating point
+template<typename FP, typename INT>
+static bool fcvtfi( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
+  R->SetFP( Inst.rd, static_cast<FP>( R->GetX<INT>( Inst.rs1 ) ) );
+  R->AdvancePC( Inst );
+  return true;
+}
+
+// Convert floating point to floating point
+template<typename FP2, typename FP1>
+static bool fcvtff( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
+  R->SetFP( Inst.rd, static_cast<FP2>( R->GetFP<FP1>( Inst.rs1 ) ) );
+  R->AdvancePC( Inst );
+  return true;
+}
+
 }  // namespace SST::RevCPU
 
 #endif
@@ -31,13 +31,66 @@ namespace SST::RevCPU {
 
 struct RevInst;
 
-/// BoxNaN: Store a boxed float inside a double
-inline void BoxNaN( double* dest, const float* value ) {
-  uint32_t i32;
-  memcpy( &i32, value, sizeof( i32 ) );                   // The FP32 value
-  uint64_t i64 = uint64_t{ i32 } | ~uint64_t{ 0 } << 32;  // Boxed NaN value
-  memcpy( dest, &i64, sizeof( i64 ) );                    // Store in FP64 register
-  static_assert( sizeof( i32 ) == sizeof( float ) && sizeof( i64 ) == sizeof( double ) );
+// Mappings from floating point to same-sized integer types
+template<typename T>
+struct uint_type {};
+
+template<>
+struct uint_type<double> {
+  using type = uint64_t;
+  static_assert( sizeof( type ) == sizeof( double ) );
+};
+
+template<>
+struct uint_type<float> {
+  using type = uint32_t;
+  static_assert( sizeof( type ) == sizeof( float ) );
+};
+
+template<>
+struct uint_type<float16> {
+  using type = uint16_t;
+  static_assert( sizeof( type ) == sizeof( float16 ) );
+};
+
+template<typename T>
+using uint_type_t = typename uint_type<T>::type;
+
+/// BoxNaN: Store a boxed floating point value inside a possibly larger one
+template<typename T, typename U, typename = std::enable_if_t<sizeof( T ) >= sizeof( U )>>
+inline void BoxNaN( T* dest, const U* value ) {
+  if constexpr( sizeof( T ) == sizeof( U ) ) {
+    *dest = *value;
+  } else {
+    uint_type_t<U> i;
+    memcpy( &i, value, sizeof( i ) );                                                    // The value
+    uint_type_t<T> box = uint_type_t<T>{ i } | ~uint_type_t<T>{ 0 } << sizeof( U ) * 8;  // Boxed NaN value
+    memcpy( dest, &box, sizeof( box ) );                                                 // Store in larger register
+    static_assert( sizeof( i ) == sizeof( U ) && sizeof( box ) == sizeof( T ) );
+  }
+}
+
+/// UnBoxNaN: Unbox a floating point value into a possibly smaller one
+// The second argument indicates whether it is a FMV/FS move/store
+// instruction which just transfers bits and not care about NaN-Boxing.
+template<typename T, bool FMV_FS = false, typename U, typename = std::enable_if_t<sizeof( T ) <= sizeof( U )>>
+inline T UnBoxNaN( const U* val ) {
+  if constexpr( sizeof( T ) == sizeof( U ) ) {
+    return *val;
+  } else {
+    uint_type_t<U> i;
+    memcpy( &i, val, sizeof( i ) );
+    static_assert( sizeof( i ) == sizeof( val ) );
+    T fp;
+    if( !FMV_FS && ~i >> sizeof( T ) * 8 ) {
+      fp = std::numeric_limits<T>::quiet_NaN();
+    } else {
+      auto ifp = static_cast<uint_type_t<T>>( i );
+      memcpy( &fp, &ifp, sizeof( fp ) );
+      static_assert( sizeof( ifp ) == sizeof( fp ) );
+    }
+    return fp;
+  }
 }
 
 /// RISC-V Register Mneumonics
@@ -303,34 +356,23 @@ class RevRegFile {
   template<typename T, bool FMV_FS = false, typename U>
   T GetFP( U rs ) const {
     if constexpr( std::is_same_v<T, double> ) {
-      return DPF[size_t( rs )];  // The FP64 register's value
+      return DPF[size_t( rs )];
+    } else if( HasD ) {
+      return UnBoxNaN<T, FMV_FS>( &DPF[size_t( rs )] );
     } else {
-      float fp32;
-      if( !HasD ) {
-        fp32 = SPF[size_t( rs )];  // The FP32 register's value
-      } else {
-        uint64_t i64;
-        memcpy( &i64, &DPF[size_t( rs )], sizeof( i64 ) );  // The FP64 register's value
-        if( !FMV_FS && ~i64 >> 32 ) {                       // Check for boxed NaN unless FMV/FS
-          fp32 = NAN;                                       // Return NaN if it's not boxed
-        } else {
-          auto i32 = static_cast<uint32_t>( i64 );  // For endian independence on host
-          memcpy( &fp32, &i32, sizeof( fp32 ) );    // The bottom half of FP64
-        }
-      }
-      return fp32;  // Reinterpreted as FP32
+      return UnBoxNaN<T, FMV_FS>( &SPF[size_t( rs )] );
     }
   }
 
   /// SetFP: Set a specific FP register to a floating-point value
   template<typename T, typename U>
   void SetFP( U rd, T value ) {
     if constexpr( std::is_same_v<T, double> ) {
-      DPF[size_t( rd )] = value;  // Store in FP64 register
+      DPF[size_t( rd )] = value;
     } else if( HasD ) {
-      BoxNaN( &DPF[size_t( rd )], &value );  // Store NaN-boxed float in FP64 register
+      BoxNaN( &DPF[size_t( rd )], &value );
     } else {
-      SPF[size_t( rd )] = value;  // Store in FP32 register
+      BoxNaN( &SPF[size_t( rd )], &value );
     }
   }
 
@@ -425,8 +467,8 @@ class RevRegFile {
   FCSR& GetFCSR() { return fcsr; }
 
   // Friend functions and classes to access internal register state
-  template<typename FP, typename INT>
-  friend bool CvtFpToInt( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst );
+  template<typename INT, typename FP>
+  friend bool fcvtif( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst );
 
   template<typename T>
   friend bool load( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst );
 
@@ -44,69 +44,33 @@ class RV32D : public RevExt {
   static constexpr auto& fled    = fcondop<double, std::less_equal>;
 
   // FP to Integer Conversion instructions
-  static constexpr auto& fcvtwd  = CvtFpToInt<double, int32_t>;
-  static constexpr auto& fcvtwud = CvtFpToInt<double, uint32_t>;
+  static constexpr auto& fcvtwd  = fcvtif<int32_t, double>;
+  static constexpr auto& fcvtwud = fcvtif<uint32_t, double>;
 
-  static bool fsqrtd( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
-    R->SetFP( Inst.rd, std::sqrt( R->GetFP<double>( Inst.rs1 ) ) );
-    R->AdvancePC( Inst );
-    return true;
-  }
+  // Square root
+  static constexpr auto& fsqrtd  = fsqrt<double>;
 
-  static bool fsgnjd( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
-    R->SetFP( Inst.rd, std::copysign( R->GetFP<double>( Inst.rs1 ), R->GetFP<double>( Inst.rs2 ) ) );
-    R->AdvancePC( Inst );
-    return true;
-  }
+  // Sign transfer
+  static constexpr auto& fsgnjd  = fsgnj<double>;
+  static constexpr auto& fsgnjnd = fsgnjn<double>;
+  static constexpr auto& fsgnjxd = fsgnjx<double>;
 
-  static bool fsgnjnd( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
-    R->SetFP( Inst.rd, std::copysign( R->GetFP<double>( Inst.rs1 ), -R->GetFP<double>( Inst.rs2 ) ) );
-    R->AdvancePC( Inst );
-    return true;
-  }
+  // Conversions between single and double precision FP
+  static constexpr auto& fcvtsd  = fcvtff<float, double>;
+  static constexpr auto& fcvtds  = fcvtff<double, float>;
 
-  static bool fsgnjxd( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
-    double rs1 = R->GetFP<double>( Inst.rs1 ), rs2 = R->GetFP<double>( Inst.rs2 );
-    R->SetFP( Inst.rd, std::copysign( rs1, std::signbit( rs1 ) ? -rs2 : rs2 ) );
-    R->AdvancePC( Inst );
-    return true;
-  }
+  // FP Classify
+  static constexpr auto& fclassd = fclassify<double>;
 
-  static bool fcvtsd( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
-    R->SetFP( Inst.rd, static_cast<float>( R->GetFP<double>( Inst.rs1 ) ) );
-    R->AdvancePC( Inst );
-    return true;
-  }
-
-  static bool fcvtds( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
-    R->SetFP( Inst.rd, static_cast<double>( R->GetFP<float>( Inst.rs1 ) ) );
-    R->AdvancePC( Inst );
-    return true;
-  }
-
-  static bool fclassd( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
-    R->SetX( Inst.rd, fclass( R->GetFP<double>( Inst.rs1 ) ) );
-    R->AdvancePC( Inst );
-    return true;
-  }
-
-  static bool fcvtdw( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
-    R->SetFP( Inst.rd, static_cast<double>( R->GetX<int32_t>( Inst.rs1 ) ) );
-    R->AdvancePC( Inst );
-    return true;
-  }
-
-  static bool fcvtdwu( RevFeature* F, RevRegFile* R, RevMem* M, const RevInst& Inst ) {
-    R->SetFP( Inst.rd, static_cast<double>( R->GetX<uint32_t>( Inst.rs1 ) ) );
-    R->AdvancePC( Inst );
-    return true;
-  }
+  // Conversion from integer to double
+  static constexpr auto& fcvtdw  = fcvtfi<double, int32_t>;
+  static constexpr auto& fcvtdwu = fcvtfi<double, uint32_t>;
 
   // Compressed instructions
-  static constexpr auto& cfldsp = fld;
-  static constexpr auto& cfsdsp = fsd;
-  static constexpr auto& cfld   = fld;
-  static constexpr auto& cfsd   = fsd;
+  static constexpr auto& cfldsp  = fld;
+  static constexpr auto& cfsdsp  = fsd;
+  static constexpr auto& cfld    = fld;
+  static constexpr auto& cfsd    = fsd;
 
   // ----------------------------------------------------------------------
   //