Skip to content

Commit 74b4d79

Browse files
committed
finilize
1 parent 3ac6efd commit 74b4d79

7 files changed

+404
-53
lines changed

src/tokenizer.ts

+87-51
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,11 @@ import {
3131
isIdentifierPart,
3232
isDecimal,
3333
isOctal,
34+
isHex,
35+
isHexPart,
3436
isHighSurrogate,
35-
isLowSurrogate
37+
isLowSurrogate,
38+
parseHexFloat
3639
} from "./util";
3740

3841
/** Named token types. */
@@ -1258,18 +1261,34 @@ export class Tokenizer extends DiagnosticEmitter {
12581261
var text = this.source.text;
12591262
var pos = this.pos;
12601263
var end = this.end;
1261-
if (pos + 1 < end && text.charCodeAt(pos) == CharCode._0) {
1262-
switch (text.charCodeAt(pos + 2) | 32) {
1263-
case CharCode.x:
1264+
var hex = false;
1265+
if (pos + 2 < end && text.charCodeAt(pos) == CharCode._0) {
1266+
switch (text.charCodeAt(pos + 1) | 32) {
1267+
case CharCode.x: {
1268+
// Don't early return for CharCode.x
1269+
// It possible a hexadecimal float.
1270+
hex = true;
1271+
pos += 2;
1272+
break;
1273+
}
12641274
case CharCode.b:
12651275
case CharCode.o: return true;
1276+
case CharCode.DOT: return false;
12661277
}
12671278
}
12681279
while (pos < end) {
12691280
let c = text.charCodeAt(pos);
1270-
if (c == CharCode.DOT || (c | 32) == CharCode.e) return false;
1271-
if (c != CharCode._ && (c < CharCode._0 || c > CharCode._9)) break;
1281+
if (c == CharCode.DOT) return false;
12721282
// does not validate separator placement (this is done in readXYInteger)
1283+
if (c != CharCode._) {
1284+
if (hex) {
1285+
if ((c | 32) == CharCode.p) return false;
1286+
if (!isHex(c)) break;
1287+
} else {
1288+
if ((c | 32) == CharCode.e) return false;
1289+
if (!isDecimal(c)) break;
1290+
}
1291+
}
12731292
pos++;
12741293
}
12751294
return true;
@@ -1313,30 +1332,24 @@ export class Tokenizer extends DiagnosticEmitter {
13131332
var end = this.end;
13141333
var start = pos;
13151334
var sepEnd = start;
1316-
var value = i64_new(0);
1335+
var value = i64_zero;
13171336
var i64_4 = i64_new(4);
13181337
var nextValue = value;
13191338
var overflowOccurred = false;
13201339

13211340
while (pos < end) {
13221341
let c = text.charCodeAt(pos);
1323-
if (c >= CharCode._0 && c <= CharCode._9) {
1324-
// value = (value << 4) + c - CharCode._0;
1342+
if (isDecimal(c)) {
1343+
// (value << 4) + c - CharCode._0
13251344
nextValue = i64_add(
13261345
i64_shl(value, i64_4),
13271346
i64_new(c - CharCode._0)
13281347
);
1329-
} else if (c >= CharCode.A && c <= CharCode.F) {
1330-
// value = (value << 4) + 10 + c - CharCode.A;
1331-
nextValue = i64_add(
1332-
i64_shl(value, i64_4),
1333-
i64_new(10 + c - CharCode.A)
1334-
);
1335-
} else if (c >= CharCode.a && c <= CharCode.f) {
1336-
// value = (value << 4) + 10 + c - CharCode.a;
1348+
} else if (isHexPart(c)) {
1349+
// (value << 4) + (c | 32) + (10 - CharCode.a)
13371350
nextValue = i64_add(
13381351
i64_shl(value, i64_4),
1339-
i64_new(10 + c - CharCode.a)
1352+
i64_new((c | 32) + (10 - CharCode.a))
13401353
);
13411354
} else if (c == CharCode._) {
13421355
if (sepEnd == pos) {
@@ -1386,15 +1399,15 @@ export class Tokenizer extends DiagnosticEmitter {
13861399
var end = this.end;
13871400
var start = pos;
13881401
var sepEnd = start;
1389-
var value = i64_new(0);
1402+
var value = i64_zero;
13901403
var i64_10 = i64_new(10);
13911404
var nextValue = value;
13921405
var overflowOccurred = false;
13931406

13941407
while (pos < end) {
13951408
let c = text.charCodeAt(pos);
1396-
if (c >= CharCode._0 && c <= CharCode._9) {
1397-
// value = value * 10 + c - CharCode._0;
1409+
if (isDecimal(c)) {
1410+
// value = value * 10 + c - CharCode._0
13981411
nextValue = i64_add(
13991412
i64_mul(value, i64_10),
14001413
i64_new(c - CharCode._0)
@@ -1451,14 +1464,14 @@ export class Tokenizer extends DiagnosticEmitter {
14511464
var end = this.end;
14521465
var start = pos;
14531466
var sepEnd = start;
1454-
var value = i64_new(0);
1467+
var value = i64_zero;
14551468
var i64_3 = i64_new(3);
14561469
var nextValue = value;
14571470
var overflowOccurred = false;
14581471

14591472
while (pos < end) {
14601473
let c = text.charCodeAt(pos);
1461-
if (c >= CharCode._0 && c <= CharCode._7) {
1474+
if (isOctal(c)) {
14621475
// value = (value << 3) + c - CharCode._0;
14631476
nextValue = i64_add(
14641477
i64_shl(value, i64_3),
@@ -1511,21 +1524,20 @@ export class Tokenizer extends DiagnosticEmitter {
15111524
var end = this.end;
15121525
var start = pos;
15131526
var sepEnd = start;
1514-
var value = i64_new(0);
1515-
var i64_1 = i64_new(1);
1527+
var value = i64_zero;
15161528
var nextValue = value;
15171529
var overflowOccurred = false;
15181530

15191531
while (pos < end) {
15201532
let c = text.charCodeAt(pos);
15211533
if (c == CharCode._0) {
15221534
// value = (value << 1);
1523-
nextValue = i64_shl(value, i64_1);
1535+
nextValue = i64_shl(value, i64_one);
15241536
} else if (c == CharCode._1) {
15251537
// value = (value << 1) + 1;
15261538
nextValue = i64_add(
1527-
i64_shl(value, i64_1),
1528-
i64_1
1539+
i64_shl(value, i64_one),
1540+
i64_one
15291541
);
15301542
} else if (c == CharCode._) {
15311543
if (sepEnd == pos) {
@@ -1569,27 +1581,23 @@ export class Tokenizer extends DiagnosticEmitter {
15691581
}
15701582

15711583
readFloat(): f64 {
1572-
// var text = this.source.text;
1573-
// if (text.charCodeAt(this.pos) == CharCode._0 && this.pos + 2 < this.end) {
1574-
// switch (text.charCodeAt(this.pos + 1)) {
1575-
// case CharCode.X:
1576-
// case CharCode.x: {
1577-
// this.pos += 2;
1578-
// return this.readHexFloat();
1579-
// }
1580-
// }
1581-
// }
1584+
var text = this.source.text;
1585+
if (text.charCodeAt(this.pos) == CharCode._0 && this.pos + 2 < this.end) {
1586+
if ((text.charCodeAt(this.pos + 1) | 32) == CharCode.x) {
1587+
return this.readHexFloat();
1588+
}
1589+
}
15821590
return this.readDecimalFloat();
15831591
}
15841592

15851593
readDecimalFloat(): f64 {
15861594
var text = this.source.text;
15871595
var end = this.end;
15881596
var start = this.pos;
1589-
var sepCount = this.readDecimalFloatPartial(false);
1597+
var sepCount = this.readFloatPartial(false, false);
15901598
if (this.pos < end && text.charCodeAt(this.pos) == CharCode.DOT) {
15911599
++this.pos;
1592-
sepCount += this.readDecimalFloatPartial();
1600+
sepCount += this.readFloatPartial(true, false);
15931601
}
15941602
if (this.pos < end) {
15951603
let c = text.charCodeAt(this.pos);
@@ -1601,7 +1609,7 @@ export class Tokenizer extends DiagnosticEmitter {
16011609
) {
16021610
++this.pos;
16031611
}
1604-
sepCount += this.readDecimalFloatPartial();
1612+
sepCount += this.readFloatPartial(true, false);
16051613
}
16061614
}
16071615
let result = text.substring(start, this.pos);
@@ -1610,7 +1618,7 @@ export class Tokenizer extends DiagnosticEmitter {
16101618
}
16111619

16121620
/** Reads past one section of a decimal float literal. Returns the number of separators encountered. */
1613-
private readDecimalFloatPartial(allowLeadingZeroSep: bool = true): u32 {
1621+
private readFloatPartial(allowLeadingZeroSep: bool, isHexadecimal: bool): u32 {
16141622
var text = this.source.text;
16151623
var pos = this.pos;
16161624
var start = pos;
@@ -1620,7 +1628,6 @@ export class Tokenizer extends DiagnosticEmitter {
16201628

16211629
while (pos < end) {
16221630
let c = text.charCodeAt(pos);
1623-
16241631
if (c == CharCode._) {
16251632
if (sepEnd == pos) {
16261633
this.error(
@@ -1637,8 +1644,12 @@ export class Tokenizer extends DiagnosticEmitter {
16371644
}
16381645
sepEnd = pos + 1;
16391646
++sepCount;
1640-
} else if (!isDecimal(c)) {
1641-
break;
1647+
} else {
1648+
if (isHexadecimal) {
1649+
if (!isHex(c)) break;
1650+
} else {
1651+
if (!isDecimal(c)) break;
1652+
}
16421653
}
16431654
++pos;
16441655
}
@@ -1655,7 +1666,34 @@ export class Tokenizer extends DiagnosticEmitter {
16551666
}
16561667

16571668
readHexFloat(): f64 {
1658-
throw new Error("not implemented"); // TBD
1669+
var text = this.source.text;
1670+
var pos = this.pos;
1671+
var start = pos;
1672+
var end = this.end;
1673+
1674+
this.pos += 2; // skip 0x
1675+
var sepCount = this.readFloatPartial(false, true);
1676+
if (this.pos < end && text.charCodeAt(this.pos) == CharCode.DOT) {
1677+
++this.pos;
1678+
sepCount += this.readFloatPartial(true, true);
1679+
}
1680+
if (this.pos < end) {
1681+
let c = text.charCodeAt(this.pos);
1682+
if ((c | 32) == CharCode.p) {
1683+
if (
1684+
++this.pos < end &&
1685+
(c = text.charCodeAt(this.pos)) == CharCode.MINUS || c == CharCode.PLUS &&
1686+
isHex(text.charCodeAt(this.pos + 1))
1687+
) {
1688+
++this.pos;
1689+
}
1690+
sepCount += this.readFloatPartial(true, false);
1691+
}
1692+
}
1693+
let result = text.substring(start, this.pos);
1694+
if (sepCount) result = result.replaceAll("_", "");
1695+
// console.log(">>>> ", start, this.pos, result);
1696+
return parseHexFloat(result);
16591697
}
16601698

16611699
readHexadecimalEscape(remain: i32 = 2, startIfTaggedTemplate: i32 = -1): string {
@@ -1665,12 +1703,10 @@ export class Tokenizer extends DiagnosticEmitter {
16651703
var end = this.end;
16661704
while (pos < end) {
16671705
let c = text.charCodeAt(pos++);
1668-
if (c >= CharCode._0 && c <= CharCode._9) {
1706+
if (isDecimal(c)) {
16691707
value = (value << 4) + c - CharCode._0;
1670-
} else if (c >= CharCode.A && c <= CharCode.F) {
1671-
value = (value << 4) + c + (10 - CharCode.A);
1672-
} else if (c >= CharCode.a && c <= CharCode.f) {
1673-
value = (value << 4) + c + (10 - CharCode.a);
1708+
} else if (isHexPart(c)) {
1709+
value = (value << 4) + (c | 32) + (10 - CharCode.a);
16741710
} else if (~startIfTaggedTemplate) {
16751711
this.pos = --pos;
16761712
return text.substring(startIfTaggedTemplate, pos);

src/util/math.ts

+35
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,38 @@ export function accuratePow64(x: f64, y: f64): f64 {
2424
}
2525
return Math.pow(x, y);
2626
}
27+
28+
// see: https://git.musl-libc.org/cgit/musl/tree/src/math/scalbn.c
29+
/** Equivalent of `x * (2 ** n)` */
30+
export function scalbn(x: f64, n: i32): f64 {
31+
const
32+
Ox1p1023 = 8.98846567431158e+307, // 0x1p1023
33+
Ox1p_969 = 2.00416836000897278e-292; // 0x1p-1022 * 0x1p53
34+
35+
var y = x;
36+
if (n > 1023) {
37+
y *= Ox1p1023;
38+
n -= 1023;
39+
if (n > 1023) {
40+
y *= Ox1p1023;
41+
n -= 1023;
42+
if (n > 1023) n = 1023;
43+
}
44+
} else if (n < -1022) {
45+
// make sure final n < -53 to avoid double
46+
// rounding in the subnormal range
47+
y *= Ox1p_969;
48+
n += 1022 - 53;
49+
if (n < -1022) {
50+
y *= Ox1p_969;
51+
n += 1022 - 53;
52+
if (n < -1022) n = -1022;
53+
}
54+
}
55+
if (!ASC_TARGET) { // ASC_TARGET == JS
56+
return y * i64_as_f64(i64_new(0, 0x3FF + n << 20));
57+
} else {
58+
// @ts-ignore
59+
return y * reinterpret<f64>(u64(0x3FF + n) << 52);
60+
}
61+
}

src/util/text.ts

+39-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* @license Apache-2.0
44
*/
55

6+
import { scalbn } from "./math";
7+
68
/** An enum of named character codes. */
79
export const enum CharCode {
810

@@ -234,10 +236,15 @@ export function isOctal(c: i32): bool {
234236
return c >= CharCode._0 && c <= CharCode._7;
235237
}
236238

239+
/** Tests if the specified character code is a valid hexadecimal symbol [a-f]. */
240+
export function isHexPart(c: i32): bool {
241+
let c0 = c | 32; // unify uppercases and lowercases a|A - f|F
242+
return c0 >= CharCode.a && c0 <= CharCode.f;
243+
}
244+
237245
/** Tests if the specified character code is a valid hexadecimal digit. */
238246
export function isHex(c: i32): bool {
239-
let c0 = c | 32; // unify uppercases and lowercases a|A - f|F
240-
return isDecimal(c) || (c0 >= CharCode.a && c0 <= CharCode.f);
247+
return isDecimal(c) || isHexPart(c);
241248
}
242249

243250
/** Tests if the specified character code is trivially alphanumeric. */
@@ -559,3 +566,33 @@ export function escapeString(str: string, quote: CharCode): string {
559566
if (i > off) sb.push(str.substring(off, i));
560567
return sb.join("");
561568
}
569+
570+
export function parseHexFloat(str: string): f64 {
571+
var sign = 1, pPos = -1, dotPos = -1;
572+
for (let i = 0, k = str.length; i < k; ++i) {
573+
const c = str.charCodeAt(i);
574+
if (i == 0 && c == CharCode.MINUS) {
575+
sign = -1;
576+
} else if ((c | 32) == CharCode.p) {
577+
pPos = i;
578+
} else if (c == CharCode.DOT) {
579+
dotPos = i;
580+
}
581+
}
582+
var mant: f64;
583+
var mantissa = ~pPos ? str.substring(0, pPos) : str;
584+
if (~dotPos) {
585+
const integer = mantissa.substring(0, dotPos);
586+
const fraction = mantissa.substring(dotPos + 1);
587+
const intVal = parseInt(integer, 16);
588+
const fracVal = fraction.length
589+
? scalbn(parseInt(fraction, 16), -(fraction.length << 2))
590+
: 0;
591+
mant = intVal + sign * fracVal;
592+
} else {
593+
mant = parseInt(mantissa, 16);
594+
}
595+
return ~pPos
596+
? scalbn(mant, parseInt(str.substring(pPos + 1)))
597+
: mant;
598+
}

0 commit comments

Comments
 (0)