From c06f5eecbd887565c176700d2e90741a0445942c Mon Sep 17 00:00:00 2001 From: HeronErin Date: Thu, 25 Apr 2024 12:11:25 -0400 Subject: [PATCH] Full scope parsing and testing --- source/parsing/treegen/astTypes.d | 9 +- source/parsing/treegen/scopeParser.d | 125 ++++++++++++-------- source/parsing/treegen/tokenRelationships.d | 53 ++++++++- source/tests/treegen.d | 4 +- 4 files changed, 133 insertions(+), 58 deletions(-) diff --git a/source/parsing/treegen/astTypes.d b/source/parsing/treegen/astTypes.d index a64ad28..1a5c1e3 100644 --- a/source/parsing/treegen/astTypes.d +++ b/source/parsing/treegen/astTypes.d @@ -58,7 +58,7 @@ struct DefineVariableNodeData struct AssignVariableNodeData { - NameUnit[] name; // Name of variable(s) to assign Ex: x = y = z = 5; + AstNode[] name; // Name of variable(s) to assign Ex: x = y = z = 5; AstNode value; } @@ -150,8 +150,6 @@ class AstNode union { KeywordNodeData keywordNodeData; // Keyword - AstNode[] scopeContents; // Scope - DefineFunctionNodeData defineFunctionNodeData; // DefineFunction DefineVariableNodeData defineVariableNodeData; // DefineVariable AssignVariableNodeData assignVariableNodeData; // AssignVariable @@ -225,6 +223,7 @@ class AstNode callNodeData.args.tree(tabCount + 1); break; case AstAction.DoubleArgumentOperation: + write("opr "); writeln(doubleArgumentOperationNodeData.operationVariety.to!string ~ ":"); doubleArgumentOperationNodeData.left.tree(tabCount + 1); doubleArgumentOperationNodeData.right.tree(tabCount + 1); @@ -250,8 +249,8 @@ class AstNode break; case AstAction.AssignVariable: write("Assigning variable(s): "); - foreach (NameUnit name; assignVariableNodeData.name) - write(name.names.to!string ~ ", "); + foreach (AstNode nameNode; assignVariableNodeData.name) + write(nameNode.namedUnit.names.to!string ~ ", "); writeln(": "); assignVariableNodeData.value.tree(tabCount + 1); break; diff --git a/source/parsing/treegen/scopeParser.d b/source/parsing/treegen/scopeParser.d index 2dc7540..0b2b941 100644 --- a/source/parsing/treegen/scopeParser.d +++ b/source/parsing/treegen/scopeParser.d @@ -35,19 +35,6 @@ class ScopeData Array!AstNode instructions; } -enum LineVariety -{ - TotalImport, - SelectiveImport, - ModuleDeclaration, - - SimpleExpression, - IfStatementWithScope, - IfStatementWithoutScope, - DeclarationLine, - DeclarationAndAssignment, -} - struct LineVarietyTestResult { LineVariety lineVariety; @@ -55,37 +42,19 @@ struct LineVarietyTestResult TokenGrepResult[] tokenMatches; } -LineVarietyTestResult getLineVarietyTestResult(Token[] tokens, size_t index) +LineVarietyTestResult getLineVarietyTestResult( + const(VarietyTestPair[]) scopeParseMethod, Token[] tokens, size_t index) { size_t temp_index = index; - static foreach (i, func; [ - TotalImport, - SelectiveImport, - ModuleDeclaration, - - IfStatementWithScope, - IfStatementWithoutScope, - DeclarationLine, - DeclarationAndAssignment - ]) + foreach (method; scopeParseMethod) { - { - Nullable!(TokenGrepResult[]) grepResults = func.matchesToken(tokens, temp_index); - if (null != grepResults) - return LineVarietyTestResult( - [ - LineVariety.TotalImport, - LineVariety.SelectiveImport, - LineVariety.ModuleDeclaration, - LineVariety.IfStatementWithScope, - LineVariety.IfStatementWithoutScope, - LineVariety.DeclarationLine, - LineVariety.DeclarationAndAssignment - ][i], temp_index - index, grepResults.value - ); - temp_index = index; - } + Nullable!(TokenGrepResult[]) grepResults = method.test.matchesToken(tokens, temp_index); + if (null != grepResults) + return LineVarietyTestResult( + method.variety, temp_index - index, grepResults.value + ); + temp_index = index; } return LineVarietyTestResult(LineVariety.SimpleExpression, -1); @@ -118,11 +87,11 @@ NameUnit[] commaSeperatedNameUnits(Token[] tokens, ref size_t index) import std.stdio; -LineVarietyTestResult parseLine(Token[] tokens, ref size_t index, ScopeData parent) +LineVarietyTestResult parseLine(const(VarietyTestPair[]) scopeParseMethod, Token[] tokens, ref size_t index, ScopeData parent) { dchar[][] keywords = tokens.skipAndExtractKeywords(index); - LineVarietyTestResult lineVariety = tokens.getLineVarietyTestResult(index); + LineVarietyTestResult lineVariety = getLineVarietyTestResult(scopeParseMethod, tokens, index); switch (lineVariety.lineVariety) { case LineVariety.ModuleDeclaration: @@ -169,8 +138,15 @@ LineVarietyTestResult parseLine(Token[] tokens, ref size_t index, ScopeData pare NameUnit declarationType = lineVariety.tokenMatches[DECLARATION_TYPE].name; NameUnit[] declarationNames = lineVariety.tokenMatches[DECLARATION_VARS].commaSeperated.collectNameUnits(); + AstNode[] nameNodes; foreach (NameUnit name; declarationNames) + { parent.declaredVariables ~= DeclaredVariable(name, declarationType); + AstNode nameNode = new AstNode(); + nameNode.action = AstAction.NamedUnit; + nameNode.namedUnit = name; + nameNodes ~= nameNode; + } if (lineVariety.lineVariety == LineVariety.DeclarationLine) break; @@ -183,7 +159,7 @@ LineVarietyTestResult parseLine(Token[] tokens, ref size_t index, ScopeData pare AstNode result = nodes[0]; AstNode assignment = new AstNode; assignment.action = AstAction.AssignVariable; - assignment.assignVariableNodeData.name = declarationNames; + assignment.assignVariableNodeData.name = nameNodes; assignment.assignVariableNodeData.value = result; parent.instructions ~= assignment; @@ -210,13 +186,13 @@ LineVarietyTestResult parseLine(Token[] tokens, ref size_t index, ScopeData pare return lineVariety; } -ScopeData parseMultilineScope(Token[] tokens, ref size_t index, Nullable!ScopeData parent) +ScopeData parseMultilineScope(const(VarietyTestPair[]) scopeParseMethod, Token[] tokens, ref size_t index, Nullable!ScopeData parent) { ScopeData scopeData = new ScopeData; scopeData.parent = parent; while (index < tokens.length) { - LineVarietyTestResult lineData = parseLine(tokens, index, scopeData); + LineVarietyTestResult lineData = parseLine(scopeParseMethod, tokens, index, scopeData); Nullable!Token testToken = tokens.nextNonWhiteToken(index); if (testToken == null) break; @@ -233,16 +209,65 @@ unittest import parsing.treegen.scopeParser; size_t index = 0; - auto newScope = parseMultilineScope(" + auto newScope = parseMultilineScope(FUNCTION_SCOPE_PARSE, " int x, y; x = 5; y = 1; x = 3; + string axolotl = `Hello world`; int tv = x++ + y; float floaty = tv / 2; ".tokenizeText(), index, nullable!ScopeData(null)); - newScope.declaredVariables.writeln; - - foreach (x; newScope.instructions) - x.tree(-1); + assert( + newScope.declaredVariables + == + [ + DeclaredVariable(NameUnit(["x".makeUnicodeString]), NameUnit([ + "int".makeUnicodeString + ])), + DeclaredVariable(NameUnit(["y".makeUnicodeString]), NameUnit([ + "int".makeUnicodeString + ])), + DeclaredVariable(NameUnit(["axolotl".makeUnicodeString]), NameUnit( + ["string".makeUnicodeString])), + DeclaredVariable(NameUnit(["tv".makeUnicodeString]), NameUnit([ + "int".makeUnicodeString + ])), + DeclaredVariable(NameUnit(["floaty".makeUnicodeString]), NameUnit( + ["float".makeUnicodeString])) + ] + ); + assert(newScope.instructions[0].action == AstAction.AssignVariable); + assert(newScope.instructions[1].action == AstAction.AssignVariable); + assert(newScope.instructions[2].action == AstAction.AssignVariable); + assert(newScope.instructions[3].action == AstAction.AssignVariable); + + assert(newScope.instructions[0].assignVariableNodeData.name.length == 1); + assert( + newScope.instructions[0].assignVariableNodeData.name[0].namedUnit.names == [ + [cast(dchar) 'x'] + ]); + assert( + newScope.instructions[1].assignVariableNodeData.name[0].namedUnit.names == [ + [cast(dchar) 'y'] + ]); + assert( + newScope.instructions[2].assignVariableNodeData.name[0].namedUnit.names == [ + [cast(dchar) 'x'] + ]); + assert(newScope.instructions[3].assignVariableNodeData.name[0].namedUnit.names == [ + "axolotl".makeUnicodeString + ]); + assert(newScope.instructions[3].assignVariableNodeData.value.action == AstAction.TokenHolder); + assert(newScope.instructions[3].assignVariableNodeData.value.tokenBeingHeld == Token( + TokenType.Quotation, "`Hello world`".makeUnicodeString, 109)); + + assert( + newScope.instructions[4].assignVariableNodeData.name[0].namedUnit.names == [ + "tv".makeUnicodeString + ]); + assert(newScope.instructions[5].assignVariableNodeData.name[0].namedUnit.names == [ + "floaty".makeUnicodeString + ]); } + diff --git a/source/parsing/treegen/tokenRelationships.d b/source/parsing/treegen/tokenRelationships.d index d2fc09a..061f010 100644 --- a/source/parsing/treegen/tokenRelationships.d +++ b/source/parsing/treegen/tokenRelationships.d @@ -4,6 +4,13 @@ import parsing.treegen.astTypes; import parsing.treegen.treeGenUtils; import tern.typecons.common : Nullable, nullable; +/+ + This file contains a couple of things: + 1. The "Token Grep" system, a dogshit version of regex of parsing tokenized code + 2. The order of operation used for grouping ++/ + + enum TokenGrepMethod { Glob, @@ -169,6 +176,40 @@ const TokenGrepPacket[] ModuleDeclaration = [ Token(TokenType.Semicolon, []) ]) ]; +enum LineVariety +{ + TotalImport, + SelectiveImport, + ModuleDeclaration, + + SimpleExpression, + IfStatementWithScope, + IfStatementWithoutScope, + DeclarationLine, + DeclarationAndAssignment, +} + +struct VarietyTestPair +{ + LineVariety variety; + const(TokenGrepPacket[]) test; +} +// Defines what you are allowed to do in what types of scope +const VarietyTestPair[] ABSTRACT_SCOPE_PARSE = [ + VarietyTestPair(LineVariety.TotalImport, TotalImport), + VarietyTestPair(LineVariety.SelectiveImport, SelectiveImport), + VarietyTestPair(LineVariety.DeclarationLine, DeclarationLine), + VarietyTestPair(LineVariety.DeclarationAndAssignment, DeclarationAndAssignment), +]; +const VarietyTestPair[] GLOBAL_SCOPE_PARSE = [ + VarietyTestPair(LineVariety.ModuleDeclaration, ModuleDeclaration) +] ~ ABSTRACT_SCOPE_PARSE; + +const VarietyTestPair[] FUNCTION_SCOPE_PARSE = [ + VarietyTestPair(LineVariety.IfStatementWithoutScope, IfStatementWithoutScope), + VarietyTestPair(LineVariety.IfStatementWithScope, IfStatementWithScope), +] ~ ABSTRACT_SCOPE_PARSE; + Nullable!(TokenGrepResult[]) matchesToken(in TokenGrepPacket[] testWith, Token[] tokens) { @@ -575,6 +616,15 @@ private bool testAndJoin(const(OperationPrecedenceEntry) entry, ref Array!AstNod } AstNode oprNode = new AstNode(); + if (entry.operation == OperationVariety.Assignment){ + oprNode.action = AstAction.AssignVariable; + oprNode.assignVariableNodeData = AssignVariableNodeData( + [operands[0]], + operands[1] + ); + goto trim; + } + oprNode.action = AstAction.DoubleArgumentOperation; if (operands.length == 0) assert(0); @@ -592,7 +642,8 @@ private bool testAndJoin(const(OperationPrecedenceEntry) entry, ref Array!AstNod operands[0], operands[1] ); - + + trim: nodes[startIndex] = oprNode; nodes.linearRemove(nodes[startIndex + 1 .. nodeIndex]); return true; diff --git a/source/tests/treegen.d b/source/tests/treegen.d index 03ad03f..679ff6f 100644 --- a/source/tests/treegen.d +++ b/source/tests/treegen.d @@ -107,7 +107,7 @@ unittest size_t index = 0; auto scopeData = new ScopeData; - parseLine("partial public module the.fat.rat.r8te.my.foo;".tokenizeText, index, scopeData); + parseLine(GLOBAL_SCOPE_PARSE, "partial public module the.fat.rat.r8te.my.foo;".tokenizeText, index, scopeData); assert(scopeData.moduleName.value.names == [ "the".makeUnicodeString, "fat".makeUnicodeString, @@ -126,6 +126,6 @@ unittest size_t index = 0; auto scopeData = new ScopeData; - parseLine("int x, y, z = 4*5+2;".tokenizeText, index, scopeData); + parseLine(GLOBAL_SCOPE_PARSE, "int x, y, z = 4*5+2;".tokenizeText, index, scopeData); }