From 59f3973c8e9bcbbde68242784779d1be4ac4335a Mon Sep 17 00:00:00 2001 From: Spenser Black Date: Wed, 28 Aug 2024 14:07:31 +0000 Subject: [PATCH 1/2] Add `.peggy` for PEG.js --- lib/linguist/languages.yml | 1 + samples/PEG.js/semver.peggy | 55 +++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 samples/PEG.js/semver.peggy diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index aae547bd83..c4b1a90148 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -5087,6 +5087,7 @@ PEG.js: color: "#234d6b" extensions: - ".pegjs" + - ".peggy" tm_scope: source.pegjs ace_mode: javascript codemirror_mode: javascript diff --git a/samples/PEG.js/semver.peggy b/samples/PEG.js/semver.peggy new file mode 100644 index 0000000000..9dcbdf7f81 --- /dev/null +++ b/samples/PEG.js/semver.peggy @@ -0,0 +1,55 @@ +/** + * SemVer.org v2 + * https://semver.org/spec/v2.0.0.html + * For unit tests see: https://github.com/dselman/peggy-semver + */ + +semver + = versionCore:versionCore pre:('-' @preRelease)? build:('+' @build)? { + return { ...versionCore, pre, build }; + } + +versionCore + = major:numericIdentifier '.' minor:numericIdentifier '.' patch:numericIdentifier { + return { major, minor, patch }; + } + +preRelease + = head:preReleaseIdentifier tail:('.' @preReleaseIdentifier)* { + return [ head, ...tail ]; + } + +build + = head:buildIdentifier tail:('.' @buildIdentifier)* { + return [ head, ...tail ]; + } + +preReleaseIdentifier + = alphanumericIdentifier + / numericIdentifier + +buildIdentifier + = alphanumericIdentifier + / $digit+ // Not a number, buildIdentifiers aren't semantically significant. + +// If there is a non-digit anywhere, this label is alphanumeric, and +// is compared lexically. Return a string. +alphanumericIdentifier + = $(digit* nonDigit identifierChar*) + +// Any semantically significant numbers are turned into BigInts (note: there +// is no length maximum for numbers in semver) for later numeric comparison. +numericIdentifier + = n:('0' / $(positiveDigit digit*)) { return BigInt(n); } + +identifierChar + = [a-z0-9-]i + +nonDigit + = [a-z-]i + +digit + = [0-9] + +positiveDigit + = [1-9] From 4a852c33b2576ca48f48b3d43819231598dfe9d2 Mon Sep 17 00:00:00 2001 From: Spenser Black Date: Wed, 28 Aug 2024 14:45:40 +0000 Subject: [PATCH 2/2] Swap `semver` sample for `abnfp` for peggy --- samples/PEG.js/abnfp.peggy | 200 ++++++++++++++++++++++++++++++++++++ samples/PEG.js/semver.peggy | 55 ---------- 2 files changed, 200 insertions(+), 55 deletions(-) create mode 100644 samples/PEG.js/abnfp.peggy delete mode 100644 samples/PEG.js/semver.peggy diff --git a/samples/PEG.js/abnfp.peggy b/samples/PEG.js/abnfp.peggy new file mode 100644 index 0000000000..cb23c2d222 --- /dev/null +++ b/samples/PEG.js/abnfp.peggy @@ -0,0 +1,200 @@ +{{ +import * as ast from "./ast.js"; +}} + +{ +const rules = new ast.Rules(); +} + +rulelist = (rule / empty)+ { + rules.loc = location(); + return rules; +} + +empty + = WSP* comment + / WSP* CRLF + +// Allow starting whitespace before a rule +rule = WSP* name:rulename operator:defined_as children:elements c_nl { + if (operator === "=/") { + rules.addAlternate(name, children, location()) + } else { + rules.addRule(name, children, location()) + } +} + +rulename = $(ALPHA (ALPHA / DIGIT / "-")*) + +// RFC 2045 uses := +// RFC 1730 uses ::= +// `::=` is from the original BNF. +// I don't know where `:=` came from. +defined_as = c_wsp* @("=/" / ":"|0..2| "=") c_wsp* + +elements = @alternation c_wsp* + +comment = ";" $(WSP / VCHAR)* EOL + +alternation = head:concatenation tail:(c_wsp* "/" c_wsp* @concatenation)* { + if (tail.length > 0) { + return new ast.Alternation([head, ...tail], location()); + } + return head; +} + +concatenation = head:repetition tail:(c_wsp+ @repetition)* { + if (tail.length > 0) { + return new ast.Concatenation([head, ...tail], location()); + } + return head; +} + +repetition = repeat:repeat? e:element { + if (repeat) { + return new ast.Repetition(repeat, e, location()); + } + return e; +} + +repeat + = min:decnum? "*" max:decnum? { + min |= 0; + if (max === 0) { + error("max must not be zero"); + return; + } + if ((typeof max === "number") && (max < min)) { + error(`max must be greater than or equal to min, but ${max} < ${min}`) + return; + } + return new ast.Repeat(min, max, location()); + } + / n:decnum { return new ast.Repeat(n, n, location()); } + +element + = name:rulename { return rules.addRef(name, location()) } + / group + / option + / char_val + / num_val + / prose_val + +group = "(" c_wsp* alt:alternation c_wsp* ")" { + return new ast.Group(alt, location()); +} + +option = "[" c_wsp* alt:alternation c_wsp* "]" { + const loc = location(); + return new ast.Repetition(new ast.Repeat(0, 1, loc), alt, loc); +} + +char_val + = "%i"i? DQUOTE txt:$([\x20-\x21] / [\x23-\x7e])* DQUOTE { + return new ast.CaseInsensitiveString(txt, location()); + } + / "%s"i DQUOTE txt:$([\x20-\x21] / [\x23-\x7e])* DQUOTE { + return new ast.CaseSensitiveString(txt, 0, location()); + } + +num_val = "%" @(bin_val / dec_val / hex_val) + +prose_val = "<" prose:$([\x20-\x3d] / [\x3f-\x7e] / CRLF)* ">" { + return new ast.Prose(prose, location()); +} + +DQUOTE = "\"" + +bin_val + = "b"i nums:binnum|2..,"."| { + return new ast.CaseSensitiveString( + nums.map(b => String.fromCodePoint(b)).join(""), + 2, + location() + ); + } + / "b"i min:binnum "-" max:binnum { + return ast.Range.create(2, min, max, location()); + } + / "b"i n:binnum { + return new ast.CaseSensitiveString(String.fromCodePoint(n), 2, location()) + } + +dec_val + = "d"i nums:decnum|2..,"."| { + return new ast.CaseSensitiveString( + nums.map(d => String.fromCodePoint(d)).join(""), + 10, + location() + ); + } + / "d"i min:decnum "-" max:decnum { + return ast.Range.create(10, min, max, location()); + } + / "d"i n:decnum { + return new ast.CaseSensitiveString(String.fromCodePoint(n), 10, location()) + } + +hex_val + = "x"i nums:hexnum|2..,"."| { + return new ast.CaseSensitiveString( + nums.map(x => String.fromCodePoint(x)).join(""), + 16, + location() + ); + } + / "x"i min:hexnum "-" max:hexnum { + return ast.Range.create(16, min, max, location()); + } + / "x"i n:hexnum { + return new ast.CaseSensitiveString(String.fromCodePoint(n), 10, location()) + } + +binnum = b:$BIT+ { return parseInt(b, 2) } +decnum = d:$DIGIT+ { return parseInt(d, 10) } +hexnum = h:$HEXDIG+ { return parseInt(h, 16) } + +c_wsp + = c_nl WSP + / WSP + +c_nl + = EOL + / comment + +EOL + = CRLF + / EOF + +EOF + = !. + +BIT + = "0" + / "1" + +HEXDIG + = DIGIT + / [A-F]i + +ALPHA = [\x41-\x5a] / [\x61-\x7a] + +DIGIT = [\x30-\x39] + +WSP = SP / HTAB + +SP = " " + +HTAB = "\t" + +VCHAR = [\x21-\x7e] + +// Take any CR/LF sequence +CRLF + = CR LF // Windows + / LF // Unix + / CR // Mac + +CR = "\r" + +LF = "\n" diff --git a/samples/PEG.js/semver.peggy b/samples/PEG.js/semver.peggy deleted file mode 100644 index 9dcbdf7f81..0000000000 --- a/samples/PEG.js/semver.peggy +++ /dev/null @@ -1,55 +0,0 @@ -/** - * SemVer.org v2 - * https://semver.org/spec/v2.0.0.html - * For unit tests see: https://github.com/dselman/peggy-semver - */ - -semver - = versionCore:versionCore pre:('-' @preRelease)? build:('+' @build)? { - return { ...versionCore, pre, build }; - } - -versionCore - = major:numericIdentifier '.' minor:numericIdentifier '.' patch:numericIdentifier { - return { major, minor, patch }; - } - -preRelease - = head:preReleaseIdentifier tail:('.' @preReleaseIdentifier)* { - return [ head, ...tail ]; - } - -build - = head:buildIdentifier tail:('.' @buildIdentifier)* { - return [ head, ...tail ]; - } - -preReleaseIdentifier - = alphanumericIdentifier - / numericIdentifier - -buildIdentifier - = alphanumericIdentifier - / $digit+ // Not a number, buildIdentifiers aren't semantically significant. - -// If there is a non-digit anywhere, this label is alphanumeric, and -// is compared lexically. Return a string. -alphanumericIdentifier - = $(digit* nonDigit identifierChar*) - -// Any semantically significant numbers are turned into BigInts (note: there -// is no length maximum for numbers in semver) for later numeric comparison. -numericIdentifier - = n:('0' / $(positiveDigit digit*)) { return BigInt(n); } - -identifierChar - = [a-z0-9-]i - -nonDigit - = [a-z-]i - -digit - = [0-9] - -positiveDigit - = [1-9]