Skip to content

Commit

Permalink
Improve pseudocode; edit parser setting descriptions
Browse files Browse the repository at this point in the history
  • Loading branch information
acweathersby committed Oct 8, 2024
1 parent d06e102 commit 8892f3e
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 82 deletions.
102 changes: 66 additions & 36 deletions site/assets/js/lab/parser_info.ts
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ function create_pseudo_code(dv: StatefullDV, is_scanner: boolean = false): strin
let pseudo_code = "";
pseudo_code += `s_${dv.off.toString(16)}${is_scanner ? " SCANNER " : ""}(lex, ctx) {`;
pseudo_code += merge_lines(process_instructions(dv, gotos).split("\n",));
pseudo_code += merge_lines(gotos);
pseudo_code += merge_goto_lines(gotos);
pseudo_code += "\n}";

return pseudo_code;
Expand All @@ -643,7 +643,24 @@ function merge_lines(internal_data: string[], prefix: string = "\n ") {
return pseudo_code;
}

function process_instructions(dv: StatefullDV, gotos: string[], root_name: string = ""): string {
function merge_goto_lines(internal_data: string[], prefix: string = "\n ") {
let pseudo_code: string = "";
let i = -1;
for (const goto of internal_data) {
i++
if (!goto) continue

if (i == internal_data.length - 1) {
pseudo_code += prefix + "tail return " + goto;
} else {
pseudo_code += prefix + goto;
}

}
return pseudo_code;
}

function process_instructions(dv: StatefullDV, gotos: string[], root_name: string = "", is_scanner: boolean = false): string {
let pseudo_code: string = ""
let i = 0;
let have_root = false
Expand All @@ -655,13 +672,17 @@ function process_instructions(dv: StatefullDV, gotos: string[], root_name: strin
case radlr.Opcode.NoOp: {
} break;
case radlr.Opcode.Pass: {
gotos.push("return")
if (gotos.length == 0) {
pseudo_code += `\nreturn`;
}
} break outer;
case radlr.Opcode.Fail: {
pseudo_code += `\nthrow "could not continue"`;
if (is_scanner)
pseudo_code += `\nreturn`;
else
pseudo_code += `\nthrow "could not continue"`;
} break outer;
case radlr.Opcode.ShiftChar: {
pseudo_code += "\nctx.emit(SHIFT_CHAR)";
pseudo_code += "\nlex.shift_la(1)";
} break;
case radlr.Opcode.ShiftToken: {
Expand All @@ -676,13 +697,18 @@ function process_instructions(dv: StatefullDV, gotos: string[], root_name: strin
pseudo_code += "\nctx.tok_len = ctx.sym_len\nctx.peek(tok)";
} break;
case radlr.Opcode.SkipToken: {
pseudo_code += "\nctx.skip()\ngoto " + root_name;
pseudo_code += "\nlex.shift(ctx.sym_len)";
pseudo_code += "\nctx.emit(SKIP { id: ctx.tk_id, len: ctx.sym_len })";
pseudo_code += "\ntail return " + root_name + "( lex, ctx )";
} break outer;
case radlr.Opcode.SkipTokenScanless: {
pseudo_code += "\nctx.skip()\ngoto " + root_name;
pseudo_code += "\nlex.shift(ctx.sym_len)";
pseudo_code += "\nctx.emit(SKIP { id: ctx.tk_id, len: ctx.sym_len })";
pseudo_code += "\ntail return " + root_name + "( lex, ctx )";
} break outer;
case radlr.Opcode.PeekSkipToken: {
pseudo_code += "\nctx.peek_skip()\ngoto " + root_name;
pseudo_code += "\nlex.shift(ctx.sym_len)";
pseudo_code += "\ntail return " + root_name + "( lex, ctx )";
} break outer;
case radlr.Opcode.PeekReset: {
pseudo_code += "\nctx.peek_reset()";
Expand All @@ -700,9 +726,9 @@ function process_instructions(dv: StatefullDV, gotos: string[], root_name: strin
} break;
case radlr.Opcode.Goto: {
let parse_mode = dv.u8();
let addressw = dv.u32();
pseudo_code += `\ns_${addressw.toString(16)}(lex, ctx)`;
} break;
let address = dv.u32();
gotos.unshift(`s_${address.toString(16)}(lex, ctx)`);
} break outer;
case radlr.Opcode.AssignToken: {
let tok_id = dv.u32();
pseudo_code += `\nctx.tok_id = ${tok_id}`;
Expand All @@ -720,7 +746,7 @@ function process_instructions(dv: StatefullDV, gotos: string[], root_name: strin
pseudo_code += generate_table_string(dv);
} break;
case radlr.Opcode.HashBranch: {
pseudo_code += generate_table_string(dv);
pseudo_code += generate_table_string(dv, is_scanner);
} break outer;
case radlr.Opcode.ByteSequence: {
let off = dv.off - 1;
Expand All @@ -735,15 +761,15 @@ function process_instructions(dv: StatefullDV, gotos: string[], root_name: strin
pseudo_code += `\n lex.incr(${len})`

let gotos: string[] = [];
pseudo_code += merge_lines(process_instructions(dv.to(success_address), gotos).split("\n"));
pseudo_code += merge_lines(gotos);
pseudo_code += merge_lines(process_instructions(dv.to(success_address), gotos, root_name, true).split("\n"));
pseudo_code += merge_goto_lines(gotos);
pseudo_code += "\nelse"

if (offset > 0) {
let fail_address = off + offset;
let gotos: string[] = [];
pseudo_code += merge_lines(process_instructions(dv.to(fail_address), gotos).split("\n"));
pseudo_code += merge_lines(gotos);
pseudo_code += merge_lines(process_instructions(dv.to(fail_address), gotos, root_name, true).split("\n"));
pseudo_code += merge_goto_lines(gotos);
} else {
pseudo_code += `\n throw \"Lexer does not have sequence '${data_str}' at current offset\"`;
}
Expand All @@ -763,16 +789,17 @@ function process_instructions(dv: StatefullDV, gotos: string[], root_name: strin
}


function generate_table_string(dv: StatefullDV): string {
function generate_table_string(dv: StatefullDV, is_scanner: boolean = false): string {
let out_string = "";
let { table_base_address, input_type, scan_address, table_length, table_start_iter, default_address } = getLUTableData(dv);
let { table_base_address, input_type, scan_address, table_length, table_start_iter, default_address, } = getLUTableData(dv);

let val = "tok";
let error = "unrecognized symbol";
let convert_val_to_string = (val: number): string => val.toString();
let convert_val_to_bool = (val: number): string => (val > 0) + "";
let convert_codepoint = (val: number): string => `\"${String.fromCodePoint(val)}\"`;

let root_name = `\`root_${table_base_address}`
let root_name = `s_${table_base_address.toString(16)}`

switch (input_type) {
case radlr.MatchInputType.NonTerminal: {
Expand All @@ -783,25 +810,26 @@ function generate_table_string(dv: StatefullDV): string {
if (scan_address < 0xFFFF_FFFF) {
out_string += `\ns_${scan_address.toString(16)}(lex, ctx)`;
}
out_string += `\ntok = ctx.tok_id`
val = "ctx.tok_id"
} break;
case radlr.MatchInputType.Class: {
val = "cp_class"
out_string += `\ncp_class = lex.codepoint_class()`
val = "lex.codepoint_class()"
is_scanner = true;
} break;
case radlr.MatchInputType.Codepoint: {
val = "cp"
out_string += `\ncp = lex.codepoint()`
val = "lex.codepoint()"
convert_val_to_string = convert_codepoint;
is_scanner = true;
} break;
case radlr.MatchInputType.Byte: {
val = "byte"
out_string += `\nbyte = lex.byte()`
val = "lex.byte()"
convert_val_to_string = convert_codepoint;
is_scanner = true;
} break;
case radlr.MatchInputType.EndOfFile: {
val = "eof"
out_string += `\neof = lex.is_eof()`
val = "lex.is_eof()"
convert_val_to_string = convert_val_to_bool;
is_scanner = true;
} break;
case radlr.MatchInputType.ByteScanless: {
val = "byte"
Expand Down Expand Up @@ -852,27 +880,29 @@ function generate_table_string(dv: StatefullDV): string {

for (const [address, entries] of map.entries()) {
if (address == default_address) {
out_string += `\n default:`;
out_string += `\n default`;
inlined_default = true;
} else {
for (const entry of entries) {
out_string += `\n case ${convert_val_to_string(entry)}:`;
out_string += `\n case ${convert_val_to_string(entry)}`;
}
}

let gotos: string[] = [];
out_string += merge_lines(process_instructions(dv.to(address), gotos).split("\n"), "\n ");
out_string += merge_lines(gotos, "\n ");
out_string += merge_lines(process_instructions(dv.to(address), gotos, root_name, is_scanner).split("\n"), "\n ");
out_string += merge_goto_lines(gotos, "\n ");
}

if (!inlined_default) {
if (default_address < dv.dv.byteLength) {
out_string += `\n default:`;
out_string += `\n default`;
let gotos: string[] = [];
out_string += merge_lines(process_instructions(dv.to(default_address), gotos).split("\n"), "\n ");
out_string += merge_lines(gotos, "\n ");
out_string += merge_lines(process_instructions(dv.to(default_address), gotos, root_name, is_scanner).split("\n"), "\n ");
out_string += merge_goto_lines(gotos, "\n ");
} else if (is_scanner) {
out_string += `\n default\n return"`;
} else {
out_string += `\n default:\n throw "${error}"`;
out_string += `\n default\n throw "${error}"`;
}
}

Expand Down
95 changes: 49 additions & 46 deletions site/layouts/page/lab.html
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,14 @@
<div id="settings-content">
<div class="close-button">x</div>
<h1>Parser Settings</h1>
<h2>Algorithm</h2>
<label id="ALLOW_CALLS">
<div class=title>Allow calls</div>
<div class=title>Enable recursive descent parsing</div>
<div class="note">When enable, recursive descent style <i>"call"</i> states will be generated</div>
<input type="checkbox" />
</label>
<label id="ALLOW_LR">
<div class=title>Allow LR</div>
<div class=title>Enable bottom up parsing</div>
<div class="note">When enable, LR style states may be produced. In general, this
allows more advanced grammar constructs to be parsed, such
as left recursive rules.
Expand All @@ -68,91 +69,93 @@ <h1>Parser Settings</h1>
ant errors will be reported.</div>
<input type="checkbox" />
</label>
<label id="ALLOW_CONTEXT_SPLITTING">
<div class=title>Enable context forks</div>
<div class="note">Allow the parser to split its context to handle ambiguity. This
may lead to a CSF (Concrete Syntax Forest) or a CSDAG (Concrete Syntax
DAG) being returned by the parser instead of a CST</div>
<input type="checkbox" />
</label>
<h3>Peeking</h3>
<label id="ALLOW_PEEKING">
<div class=title>Allow peeking</div>
<div class=title>Enable Peek</div>
<div class="note">
<p>When enabled, unrestricted lookahead states states will be generated</p>

<p>When disabled, grammars with rules that require a lookahead that is
`k>1` will be rejected, and relevant errors will be reported.</p>
</div>
<input type="checkbox" />
</label>
<label id="ALLOW_CONTEXT_SPLITTING">
<div class=title>Allow context splitting (Fork)</div>
<div class="note">Allow the parser to split its context to handle ambiguity. This
may lead to a CSF (Concrete Syntax Forest) or a CSDAG (Concrete Syntax
DAG) being returned by the parser instead of a CST</div>
<label id="max_k">
<div class=title>Maximum lookahead</div>
<div class="note">
<p>The maximum number of lookead symbols allowed before parser construction
is aborted or a different disambiguating strategy is employed.</p>
<p>A value of zero represents unlimited lookahead</p>
</div>
<input type="number" min="0" max="`128" />
</label>
<h2>Parser</h2>
<label id="ALLOW_ANONYMOUS_NONTERM_INLINING">
<div class=title>Enable anonymous nonterm inlining</div>
<div class="note">An anonymous non-terminal, aka grouped rules `e.g ( symA symB | symC | ..
)`, may be inlined into the body of its host rule if none of the grouped
rules contain semantic actions, such as `:ast` definitions.

Parsers created with this type of optimization tend to perform poorly when
used for error correcting.</div>
<input type="checkbox" />
</label>
<label id="CONTEXT_FREE">
<div class=title>Context free</div>
<div class="note">Creates a single scanner instead of multiple contextual scanners. More
likely to report terminal conflicts.</div>
<label id="EXPORT_ALL_NONTERMS">
<div class=title>Export all nonterms</div>
<div class="note">Export all non-terminals as parser entry points. This implies
an RD or RAD parser.

<p style="color:red">Requires a lab-host server.</p>
</div>
<input type="checkbox" />
</label>
<label id="AllOW_CST_MERGING">
<div class=title>Allow CST merging</div>
<div class=title>CST merging</div>
<div class="note">Creates states that directly handle transitions on terminals, allowing the
creation of parsers that can patch existing CST data structures.</div>
<input type="checkbox" />
</label>
<label id="ALLOW_CST_NONTERM_SHIFT">
<div class=title>Allow CST nonterm shift</div>
<div class=title>CST nonterm shift</div>
<div class="note">Allow the parser to shift on CST non-term nodes.</div>
<input type="checkbox" />
</label>
<h2>Lexer</h2>
<label id="CONTEXT_FREE">
<div class=title>Context free</div>
<div class="note">Creates a single scanner instead of multiple contextual scanners. More
likely to report terminal conflicts.</div>
<input type="checkbox" />
</label>
<label id="ALLOW_SCANNER_INLINING">
<div class=title>Allow scanner inlining</div>
<div class=title>Inline scanner states</div>
<div class="note">Allow inlining of scanners that yield single codepoint tokens.

Parsers created with this type of optimization tend to perform poorly when
used for error correction.</div>
<input type="checkbox" />
</label>
<label id="ALLOW_ANONYMOUS_NONTERM_INLINING">
<div class=title>Allow anonymous nonterm inlining</div>
<div class="note">An anonymous non-terminal, aka grouped rules `e.g ( symA symB | symC | ..
)`, may be inlined into the body of its host rule if none of the grouped
rules contain semantic actions, such as `:ast` definitions.

Parsers created with this type of optimization tend to perform poorly when
used for error correcting.</div>
<input type="checkbox" />
</label>
<label id="ALLOW_BYTE_SEQUENCES">
<div class=title>Allow byte sequences</div>
<div class=title>Scan byte sequences</div>
<div class="note"> Enables using wide data types ( u16 | u32 | u64 | u128 ) to recognize a
sequence of characters.</div>
<input type="checkbox" />
</label>
<label id="ALLOW_LOOKAHEAD_SCANNERS">
<div class=title>Allow lookahead scanners</div>
<div class=title>Enable scanner lookahead</div>
<div class="note">Enables FOLLOW context sensitive scanners, which will consider the tokens
that _follow_ the states which the scanner is constructing tokens
for.

May significantly increase the number scanner states.</div>
<input type="checkbox" />
</label>
<label id="max_k">
<div class=title>Max K</div>
<div class="note">
<p>The maximum number of lookead symbols allowed before parser construction
is aborted or a different disambiguating strategy is employed.</p>
<p>A value of zero represents unlimited lookahead</p>
</div>
<input type="number" min="0" max="`128" />
</label>
<label id="EXPORT_ALL_NONTERMS">
<div class=title>Export all nonterms</div>
<div class="note">Export all non-terminals as parser entry points. This implies
an RD or RAD parser.

<p style="color:red">Requires a lab-host server.</p>
</div>
<input type="checkbox" />
</label>
</div>
</div>
<div id="settings-panel" class="inactive">
Expand Down

0 comments on commit 8892f3e

Please sign in to comment.