|
| 1 | +/** |
| 2 | + * Sequence of 0s and 1s. |
| 3 | + * @typedef {number[]} Bits |
| 4 | + */ |
| 5 | + |
| 6 | +/** |
| 7 | + * @typedef {{ |
| 8 | + * signBitsCount: number, |
| 9 | + * exponentBitsCount: number, |
| 10 | + * fractionBitsCount: number, |
| 11 | + * }} PrecisionConfig |
| 12 | + */ |
| 13 | + |
| 14 | +/** |
| 15 | + * @typedef {{ |
| 16 | + * half: PrecisionConfig, |
| 17 | + * single: PrecisionConfig, |
| 18 | + * double: PrecisionConfig |
| 19 | + * }} PrecisionConfigs |
| 20 | + */ |
| 21 | + |
| 22 | +/** |
| 23 | + * ┌───────────────── sign bit |
| 24 | + * │ ┌───────────── exponent bits |
| 25 | + * │ │ ┌───── fraction bits |
| 26 | + * │ │ │ |
| 27 | + * X XXXXX XXXXXXXXXX |
| 28 | + * |
| 29 | + * @type {PrecisionConfigs} |
| 30 | + */ |
| 31 | +const precisionConfigs = { |
| 32 | + // @see: https://en.wikipedia.org/wiki/Half-precision_floating-point_format |
| 33 | + half: { |
| 34 | + signBitsCount: 1, |
| 35 | + exponentBitsCount: 5, |
| 36 | + fractionBitsCount: 10, |
| 37 | + }, |
| 38 | + // @see: https://en.wikipedia.org/wiki/Single-precision_floating-point_format |
| 39 | + single: { |
| 40 | + signBitsCount: 1, |
| 41 | + exponentBitsCount: 8, |
| 42 | + fractionBitsCount: 23, |
| 43 | + }, |
| 44 | + // @see: https://en.wikipedia.org/wiki/Double-precision_floating-point_format |
| 45 | + double: { |
| 46 | + signBitsCount: 1, |
| 47 | + exponentBitsCount: 11, |
| 48 | + fractionBitsCount: 52, |
| 49 | + }, |
| 50 | +}; |
| 51 | + |
| 52 | +/** |
| 53 | + * Converts the binary representation of the floating point number to decimal float number. |
| 54 | + * |
| 55 | + * @param {Bits} bits - sequence of bits that represents the floating point number. |
| 56 | + * @param {PrecisionConfig} precisionConfig - half/single/double precision config. |
| 57 | + * @return {number} - floating point number decoded from its binary representation. |
| 58 | + */ |
| 59 | +function bitsToFloat(bits, precisionConfig) { |
| 60 | + const { signBitsCount, exponentBitsCount } = precisionConfig; |
| 61 | + |
| 62 | + // Figuring out the sign. |
| 63 | + const sign = (-1) ** bits[0]; // -1^1 = -1, -1^0 = 1 |
| 64 | + |
| 65 | + // Calculating the exponent value. |
| 66 | + const exponentBias = 2 ** (exponentBitsCount - 1) - 1; |
| 67 | + const exponentBits = bits.slice(signBitsCount, signBitsCount + exponentBitsCount); |
| 68 | + const exponentUnbiased = exponentBits.reduce( |
| 69 | + (exponentSoFar, currentBit, bitIndex) => { |
| 70 | + const bitPowerOfTwo = 2 ** (exponentBitsCount - bitIndex - 1); |
| 71 | + return exponentSoFar + currentBit * bitPowerOfTwo; |
| 72 | + }, |
| 73 | + 0, |
| 74 | + ); |
| 75 | + const exponent = exponentUnbiased - exponentBias; |
| 76 | + |
| 77 | + // Calculating the fraction value. |
| 78 | + const fractionBits = bits.slice(signBitsCount + exponentBitsCount); |
| 79 | + const fraction = fractionBits.reduce( |
| 80 | + (fractionSoFar, currentBit, bitIndex) => { |
| 81 | + const bitPowerOfTwo = 2 ** -(bitIndex + 1); |
| 82 | + return fractionSoFar + currentBit * bitPowerOfTwo; |
| 83 | + }, |
| 84 | + 0, |
| 85 | + ); |
| 86 | + |
| 87 | + // Putting all parts together to calculate the final number. |
| 88 | + return sign * (2 ** exponent) * (1 + fraction); |
| 89 | +} |
| 90 | + |
| 91 | +/** |
| 92 | + * Converts the 16-bit binary representation of the floating point number to decimal float number. |
| 93 | + * |
| 94 | + * @param {Bits} bits - sequence of bits that represents the floating point number. |
| 95 | + * @return {number} - floating point number decoded from its binary representation. |
| 96 | + */ |
| 97 | +export function bitsToFloat16(bits) { |
| 98 | + return bitsToFloat(bits, precisionConfigs.half); |
| 99 | +} |
| 100 | + |
| 101 | +/** |
| 102 | + * Converts the 32-bit binary representation of the floating point number to decimal float number. |
| 103 | + * |
| 104 | + * @param {Bits} bits - sequence of bits that represents the floating point number. |
| 105 | + * @return {number} - floating point number decoded from its binary representation. |
| 106 | + */ |
| 107 | +export function bitsToFloat32(bits) { |
| 108 | + return bitsToFloat(bits, precisionConfigs.single); |
| 109 | +} |
| 110 | + |
| 111 | +/** |
| 112 | + * Converts the 64-bit binary representation of the floating point number to decimal float number. |
| 113 | + * |
| 114 | + * @param {Bits} bits - sequence of bits that represents the floating point number. |
| 115 | + * @return {number} - floating point number decoded from its binary representation. |
| 116 | + */ |
| 117 | +export function bitsToFloat64(bits) { |
| 118 | + return bitsToFloat(bits, precisionConfigs.double); |
| 119 | +} |
0 commit comments