Skip to content

Commit d1d1da2

Browse files
pengnamafterdusk
andauthored
Add PQL parser support for full grammar (#217)
* Add support for AND parsing for relationship and pattern * Add relationship specifications for Next,Affect,Call * Refactor for proper pattern and parsing * Add while and if pattern parsing * Add multiple result parsing * Change lexer delimiter due to stmt# usage in attrRef * Add with statement parsing * Add definitions for new PqlResult * Define definitions for ref types * Change order of structs in PQL.h * Fix valid relationships Co-authored-by: Evan (Liang Jun) <[email protected]>
1 parent 0450d4b commit d1d1da2

File tree

6 files changed

+464
-74
lines changed

6 files changed

+464
-74
lines changed

Team20/Code20/UnitTesting/TestPQLParser.cpp

+168-4
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ TEST_METHOD(TestParse_DeclarationsMultipleSynonyms) {
135135
Assert::IsTrue(actualResult == expectedDeclarations);
136136
} // namespace UnitTesting
137137

138-
TEST_METHOD(TestParse_Select) {
138+
TEST_METHOD(TestParse_SimpleSelect) {
139139
const std::vector<PqlToken> input = {
140140
{TokenType::STMT}, {TokenType::SYNONYM, "p"},
141141
{TokenType::SEMICOLON}, {TokenType::STMT},
@@ -148,8 +148,46 @@ TEST_METHOD(TestParse_Select) {
148148
};
149149

150150
const auto actualResult = Pql::parse(input).results;
151-
const std::vector<std::string> expectedDeclarations = {"p"};
152-
Assert::IsTrue(actualResult == expectedDeclarations);
151+
const std::vector<std::string> expectedResults = {"p"};
152+
Assert::IsTrue(actualResult == expectedResults);
153+
} // namespace UnitTesting
154+
155+
TEST_METHOD(TestParse_SimpleSelectAttrRef) {
156+
const std::vector<PqlToken> input = {
157+
{TokenType::STMT}, {TokenType::SYNONYM, "p"},
158+
{TokenType::SEMICOLON}, {TokenType::STMT},
159+
{TokenType::SYNONYM, "q"}, {TokenType::SEMICOLON},
160+
{TokenType::SELECT}, {TokenType::SYNONYM, "p"},
161+
{TokenType::DOT}, {TokenType::STATEMENT_NUM},
162+
{TokenType::SUCH}, {TokenType::THAT},
163+
{TokenType::FOLLOWS}, {TokenType::OPEN_PARENTHESIS},
164+
{TokenType::SYNONYM, "p"}, {TokenType::COMMA},
165+
{TokenType::SYNONYM, "q"}, {TokenType::CLOSED_PARENTHESIS},
166+
};
167+
168+
const auto actualResult = Pql::parse(input).results;
169+
const std::vector<std::string> expectedResults = {"p"};
170+
Assert::IsTrue(actualResult == expectedResults);
171+
} // namespace UnitTesting
172+
173+
TEST_METHOD(TestParse_MultipleSelect) {
174+
const std::vector<PqlToken> input = {
175+
{TokenType::STMT}, {TokenType::SYNONYM, "p"},
176+
{TokenType::SEMICOLON}, {TokenType::STMT},
177+
{TokenType::SYNONYM, "q"}, {TokenType::SEMICOLON},
178+
{TokenType::SELECT}, {TokenType::OPEN_ANGLED_BRACKET},
179+
{TokenType::SYNONYM, "p"}, {TokenType::DOT},
180+
{TokenType::STATEMENT_NUM}, {TokenType::COMMA},
181+
{TokenType::SYNONYM, "q"}, {TokenType::CLOSED_ANGLED_BRACKET},
182+
{TokenType::SUCH}, {TokenType::THAT},
183+
{TokenType::FOLLOWS}, {TokenType::OPEN_PARENTHESIS},
184+
{TokenType::SYNONYM, "p"}, {TokenType::COMMA},
185+
{TokenType::SYNONYM, "q"}, {TokenType::CLOSED_PARENTHESIS},
186+
};
187+
188+
const auto actualResult = Pql::parse(input).results;
189+
const std::vector<std::string> expectedResults = {"p", "q"};
190+
Assert::IsTrue(actualResult == expectedResults);
153191
} // namespace UnitTesting
154192

155193
TEST_METHOD(TestParse_FollowsRelationshipTwoSynonyms) {
@@ -288,7 +326,133 @@ TEST_METHOD(TestParse_LHSPatternSynonymButNotVariable_ThrowsException) {
288326
{TokenType::UNDERSCORE}, {TokenType::CLOSED_PARENTHESIS}};
289327
Assert::ExpectException<const char *>([input] { Pql::parse(input); });
290328
} // namespace UnitTesting
329+
TEST_METHOD(TestParse_MultiplePatternsWithAnd) {
330+
const std::vector<PqlToken> input = {{TokenType::ASSIGN},
331+
{TokenType::SYNONYM, "a"},
332+
{TokenType::SEMICOLON},
333+
{TokenType::VARIABLE},
334+
{TokenType::SYNONYM, "v"},
335+
{TokenType::SEMICOLON},
336+
{TokenType::SELECT},
337+
{TokenType::SYNONYM, "a"},
338+
{TokenType::PATTERN},
339+
{TokenType::SYNONYM, "a"},
340+
{TokenType::OPEN_PARENTHESIS},
341+
{TokenType::SYNONYM, "v"},
342+
{TokenType::COMMA},
343+
{TokenType::UNDERSCORE},
344+
{TokenType::CLOSED_PARENTHESIS},
345+
{TokenType::AND},
346+
{TokenType::SYNONYM, "a"},
347+
{TokenType::OPEN_PARENTHESIS},
348+
{TokenType::SYNONYM, "v"},
349+
{TokenType::COMMA},
350+
{TokenType::UNDERSCORE},
351+
{TokenType::CLOSED_PARENTHESIS},
352+
{TokenType::AND},
353+
{TokenType::SYNONYM, "a"},
354+
{TokenType::OPEN_PARENTHESIS},
355+
{TokenType::SYNONYM, "v"},
356+
{TokenType::COMMA},
357+
{TokenType::UNDERSCORE},
358+
{TokenType::CLOSED_PARENTHESIS}};
359+
const auto actualResult = Pql::parse(input).patterns;
360+
const std::vector<ParsedPattern> expectedPatterns{
361+
ParsedPattern{{TokenType::ASSIGN, "a"},
362+
{TokenType::VARIABLE, "v"},
363+
PatternSpec{PatternMatchType::Any}},
364+
ParsedPattern{{TokenType::ASSIGN, "a"},
365+
{TokenType::VARIABLE, "v"},
366+
PatternSpec{PatternMatchType::Any}},
367+
ParsedPattern{{TokenType::ASSIGN, "a"},
368+
{TokenType::VARIABLE, "v"},
369+
PatternSpec{PatternMatchType::Any}}};
370+
Assert::IsTrue(actualResult == expectedPatterns);
371+
}
372+
TEST_METHOD(TestParse_MultipleRelationshipsWithAnd) {
373+
const std::vector<PqlToken> input = {
374+
{TokenType::STMT},
375+
{TokenType::SYNONYM, "s"},
376+
{TokenType::SEMICOLON},
377+
{TokenType::SELECT},
378+
{TokenType::SYNONYM, "s"},
379+
{TokenType::SUCH},
380+
{TokenType::THAT},
381+
{TokenType::MODIFIES},
382+
{TokenType::OPEN_PARENTHESIS},
383+
{TokenType::SYNONYM, "s"},
384+
{TokenType::COMMA},
385+
{TokenType::STRING, "x"},
386+
{TokenType::CLOSED_PARENTHESIS},
387+
{TokenType::AND},
388+
{TokenType::USES},
389+
{TokenType::OPEN_PARENTHESIS},
390+
{TokenType::SYNONYM, "s"},
391+
{TokenType::COMMA},
392+
{TokenType::STRING, "x"},
393+
{TokenType::CLOSED_PARENTHESIS},
394+
};
395+
const auto actualResult = Pql::parse(input).relationships;
396+
const std::vector<ParsedRelationship> expectedRelationships = {
397+
ParsedRelationship{TokenType::MODIFIES,
398+
{TokenType::STMT, "s"},
399+
{TokenType::STRING, "x"}},
400+
ParsedRelationship{
401+
TokenType::USES, {TokenType::STMT, "s"}, {TokenType::STRING, "x"}}};
402+
Assert::IsTrue(actualResult == expectedRelationships);
403+
}
404+
TEST_METHOD(TestParse_WhilePatternStatement) {
405+
const std::vector<PqlToken> input = {
406+
{TokenType::WHILE},
407+
{TokenType::SYNONYM, "w"},
408+
{TokenType::SEMICOLON},
409+
{TokenType::VARIABLE},
410+
{TokenType::SYNONYM, "v"},
411+
{TokenType::SEMICOLON},
412+
{TokenType::SELECT},
413+
{TokenType::SYNONYM, "w"},
414+
{TokenType::PATTERN},
415+
{TokenType::SYNONYM, "w"},
416+
{TokenType::OPEN_PARENTHESIS},
417+
{TokenType::SYNONYM, "v"},
418+
{TokenType::COMMA},
419+
{TokenType::UNDERSCORE},
420+
{TokenType::CLOSED_PARENTHESIS},
421+
};
422+
const auto actualResult = Pql::parse(input).patterns;
423+
const std::vector<ParsedPattern> expectedPatterns{
424+
ParsedPattern{{TokenType::WHILE, "w"},
425+
{TokenType::VARIABLE, "v"},
426+
PatternSpec{PatternMatchType::Any}}};
427+
Assert::IsTrue(actualResult == expectedPatterns);
428+
}
429+
TEST_METHOD(TestParse_IfPatternStatement) {
430+
const std::vector<PqlToken> input = {
431+
{TokenType::IF},
432+
{TokenType::SYNONYM, "i"},
433+
{TokenType::SEMICOLON},
434+
{TokenType::VARIABLE},
435+
{TokenType::SYNONYM, "v"},
436+
{TokenType::SEMICOLON},
437+
{TokenType::SELECT},
438+
{TokenType::SYNONYM, "v"},
439+
{TokenType::PATTERN},
440+
{TokenType::SYNONYM, "i"},
441+
{TokenType::OPEN_PARENTHESIS},
442+
{TokenType::SYNONYM, "v"},
443+
{TokenType::COMMA},
444+
{TokenType::UNDERSCORE},
445+
{TokenType::COMMA},
446+
{TokenType::UNDERSCORE},
447+
{TokenType::CLOSED_PARENTHESIS},
448+
};
449+
const auto actualResult = Pql::parse(input).patterns;
450+
const std::vector<ParsedPattern> expectedPatterns{
451+
ParsedPattern{{TokenType::IF, "i"},
452+
{TokenType::VARIABLE, "v"},
453+
PatternSpec{PatternMatchType::Any}}};
454+
Assert::IsTrue(actualResult == expectedPatterns);
455+
}
291456
}
292457
;
293458
}
294-
; // namespace UnitTesting

Team20/Code20/source/PQL.cpp

+12-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,18 @@ std::unordered_map<std::string, TokenType> stringTokenMap = {
2929
{"_", TokenType::UNDERSCORE},
3030
{"(", TokenType::OPEN_PARENTHESIS},
3131
{")", TokenType::CLOSED_PARENTHESIS},
32-
{",", TokenType::COMMA}};
32+
{"<", TokenType::OPEN_ANGLED_BRACKET},
33+
{">", TokenType::CLOSED_ANGLED_BRACKET},
34+
{"BOOLEAN", TokenType::BOOLEAN},
35+
{".", TokenType::DOT},
36+
{"procName", TokenType::PROCNAME},
37+
{"varName", TokenType::VARNAME},
38+
{"value", TokenType::VALUE},
39+
{"stmt#", TokenType::STATEMENT_NUM},
40+
{",", TokenType::COMMA},
41+
{"with", TokenType::WITH},
42+
{"=", TokenType::EQUALS},
43+
{"prog_line", TokenType::PROG_LINE}};
3344

3445
ParsedQuery Pql::parse(std::vector<PqlToken> query) {
3546
auto parser = PqlParser(query);

Team20/Code20/source/PQL.h

+65-1
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010

1111
enum class TokenType {
1212
STMT,
13+
PROG_LINE,
1314
READ,
1415
PRINT,
15-
CALL,
1616
WHILE,
1717
IF,
1818
ELSE,
@@ -31,18 +31,36 @@ enum class TokenType {
3131
PARENT_T,
3232
FOLLOWS,
3333
FOLLOWS_T,
34+
AFFECTS,
35+
AFFECTS_T,
36+
NEXT,
37+
NEXT_T,
38+
CALL,
39+
CALL_T,
3440
MODIFIES,
3541
USES,
3642
MATCH, // Representation of pattern clause as a relationship
3743

3844
SUCH,
3945
THAT,
46+
AND,
4047
PATTERN,
48+
WITH,
4149

4250
SEMICOLON,
4351
COMMA,
52+
DOT,
4453
OPEN_PARENTHESIS,
4554
CLOSED_PARENTHESIS,
55+
OPEN_ANGLED_BRACKET,
56+
CLOSED_ANGLED_BRACKET,
57+
58+
PROCNAME,
59+
VARNAME,
60+
VALUE,
61+
STATEMENT_NUM,
62+
BOOLEAN,
63+
EQUALS
4664
};
4765

4866
/** @brief Token used as intermediary between the lexer and the parser
@@ -53,9 +71,55 @@ struct PqlToken {
5371
bool operator==(const PqlToken &other) const {
5472
return type == other.type && value == other.value;
5573
}
74+
// Default constructor to construct an empty PqlToken
75+
PqlToken() : type{TokenType::STRING}, value{""} {}
5676
PqlToken(TokenType specifiedTokenType, std::string specifiedValue = "")
5777
: type{specifiedTokenType}, value{specifiedValue} {}
5878
};
79+
enum class AttributeRefType {
80+
PROCNAME,
81+
VARNAME,
82+
VALUE,
83+
STATEMENT_NUM,
84+
NONE,
85+
};
86+
87+
struct Element {
88+
std::string synonym;
89+
AttributeRefType refType;
90+
bool operator==(const Element &other) const {
91+
return synonym == other.synonym && refType == other.refType;
92+
}
93+
};
94+
95+
// Used in attrCompare
96+
enum class ReferenceType { ELEMENT, RAW_VALUE };
97+
struct Reference {
98+
ReferenceType referenceType;
99+
PqlToken pqlToken;
100+
Element element;
101+
Reference(PqlToken specifiedToken)
102+
: referenceType{ReferenceType::RAW_VALUE}, pqlToken{specifiedToken} {}
103+
Reference(Element specifiedElement)
104+
: referenceType{ReferenceType::ELEMENT}, element{specifiedElement} {}
105+
106+
bool operator==(const Reference &other) const {
107+
return referenceType == other.referenceType && pqlToken == other.pqlToken &&
108+
element == other.element;
109+
}
110+
};
111+
112+
typedef std::vector<Element> TUPLE;
113+
114+
enum class PqlResultType { Boolean, Tuple };
115+
116+
struct PqlResult {
117+
PqlResultType resultType;
118+
TUPLE results;
119+
bool operator==(const PqlResult &other) const {
120+
return resultType == other.resultType && results == other.results;
121+
}
122+
};
59123

60124
/** @brief ParsedRelationship holds the information of a parsed relationship
61125
*/

Team20/Code20/source/PQLLexer.cpp

+16-6
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include <unordered_set>
1111
#include <vector>
1212

13+
const char delimiter = '^';
14+
1315
bool isAlphaNumeric(std::string s) {
1416
for (const auto c : s) {
1517
if (!isalnum(c)) {
@@ -90,7 +92,7 @@ std::vector<std::string> PqlLexer::delimit(std::string s) {
9092
case '\n':
9193
case '\t':
9294
if (!isWithinStringLiterals) {
93-
result.push_back('#');
95+
result.push_back(delimiter);
9496
continue;
9597
}
9698
break;
@@ -99,14 +101,18 @@ std::vector<std::string> PqlLexer::delimit(std::string s) {
99101
case ')':
100102
case '(':
101103
if (!isWithinStringLiterals) {
102-
result.push_back('#');
104+
result.push_back(delimiter);
103105
}
104106
break;
105107
// 1B. Characters that wont appear within string literals
106108
case ',':
107109
case ';':
108110
case '_':
109-
result.push_back('#');
111+
case '<':
112+
case '>':
113+
case '.':
114+
case '=':
115+
result.push_back(delimiter);
110116
break;
111117
default:
112118
break;
@@ -119,13 +125,17 @@ std::vector<std::string> PqlLexer::delimit(std::string s) {
119125
case '(':
120126
case ')':
121127
if (!isWithinStringLiterals) {
122-
result.push_back('#');
128+
result.push_back(delimiter);
123129
}
124130
break;
125131
// 3B. Characters that won't appear within string literals
126132
case '_':
127133
case ',':
128-
result.push_back('#');
134+
case '<':
135+
case '>':
136+
case '.':
137+
case '=':
138+
result.push_back(delimiter);
129139
break;
130140
default:
131141
break;
@@ -135,5 +145,5 @@ std::vector<std::string> PqlLexer::delimit(std::string s) {
135145
// TODO:
136146
std::cout << "EXCEPTION";
137147
}
138-
return split(std::string(result.begin(), result.end()), '#');
148+
return split(std::string(result.begin(), result.end()), delimiter);
139149
}

0 commit comments

Comments
 (0)