@@ -193,40 +193,40 @@ static constexpr FloatSemantics semHF8 = { 8, -6, 4, 8, true };
193
193
194
194
inline uint32_t Round (
195
195
uint32_t man,
196
- uint32_t manShift ,
196
+ uint32_t numLostBits ,
197
197
bool isNegative,
198
198
uint32_t roundingMode)
199
199
{
200
- uint32_t lostBitsMask = BITMASK (manShift );
200
+ uint32_t lostBitsMask = BITMASK (numLostBits );
201
201
uint32_t lostBits = man & lostBitsMask;
202
- uint32_t lostBitsHalfMinusOne = BITMASK (manShift - 1 );
203
- uint32_t tieToEvenBias = (man & BIT (manShift )) >> manShift ;
202
+ uint32_t lostBitsHalfMinusOne = BITMASK (numLostBits - 1 );
203
+ uint32_t tieToEvenBias = (man & BIT (numLostBits )) >> numLostBits ;
204
204
205
205
switch (roundingMode)
206
206
{
207
207
case ROUND_TO_NEAREST_EVEN:
208
208
man += lostBitsHalfMinusOne + tieToEvenBias;
209
- man >>= manShift ;
209
+ man >>= numLostBits ;
210
210
break ;
211
211
case ROUND_TO_NEGATIVE:
212
- man >>= manShift ;
212
+ man >>= numLostBits ;
213
213
if (lostBits != 0 && isNegative)
214
214
{
215
215
man += 1 ;
216
216
}
217
217
break ;
218
218
case ROUND_TO_POSITIVE:
219
- man >>= manShift ;
219
+ man >>= numLostBits ;
220
220
if (lostBits != 0 && !isNegative)
221
221
{
222
222
man += 1 ;
223
223
}
224
224
break ;
225
225
case ROUND_TO_ZERO:
226
- man >>= manShift ;
226
+ man >>= numLostBits ;
227
227
break ;
228
228
default :
229
- man >>= manShift ;
229
+ man >>= numLostBits ;
230
230
IGC_ASSERT_MESSAGE (0 , " Unsupported rounding mode" );
231
231
break ;
232
232
}
@@ -251,6 +251,10 @@ inline uint32_t ConvertFloat(
251
251
bool isPositive = !isNegative;
252
252
bool isDenorm = expBits == 0 && manBits > 0 ;
253
253
bool isZero = expBits == 0 && manBits == 0 ;
254
+ bool isInf = srcSem.hasNoInf ? false : (expBits == BITMASK (srcNumExpBits) && manBits == 0 );
255
+ bool isNan = srcSem.hasNoInf ?
256
+ ((BITMASK (srcSem.sizeInBits - 1 ) & intVal) == BITMASK (srcSem.sizeInBits - 1 )) :
257
+ (expBits == BITMASK (srcNumExpBits) && manBits != 0 );
254
258
255
259
int32_t srcExpBias = 1 - srcSem.minExponent ;
256
260
int32_t dstExpBias = 1 - dstSem.minExponent ;
@@ -271,6 +275,20 @@ inline uint32_t ConvertFloat(
271
275
maxVal = signVal | (BITMASK (dstSem.sizeInBits - 1 ) & ~1 );
272
276
}
273
277
278
+ // Handle special cases
279
+ if (isZero)
280
+ {
281
+ return signVal;
282
+ }
283
+ if (isInf)
284
+ {
285
+ return infVal;
286
+ }
287
+ if (isNan)
288
+ {
289
+ return nanVal;
290
+ }
291
+
274
292
// Normalize the mantissa
275
293
while ((man & BIT (srcNumManBits)) == 0 )
276
294
{
0 commit comments