Skip to content

Commit 74d1205

Browse files
authored
Add improvements to PQL Parser for full PQL grammar parsing (#230)
* Add sanitize in PQlLexer * Fix test * Add semantic check of assignment of attribute name to proper synonym * Add check to throw exception if raw refs not of same type * Format
1 parent d3d5d41 commit 74d1205

File tree

6 files changed

+103
-29
lines changed

6 files changed

+103
-29
lines changed

Team20/Code20/IntegrationTesting/TestPQLParserLexerIntegration.cpp

+29-9
Original file line numberDiff line numberDiff line change
@@ -78,24 +78,44 @@ TEST_METHOD(TestLexAndParse_SynonymDeclaredWithKeyword) {
7878

7979
TEST_METHOD(TestLexAndParse_TupleResultWithWithClause) {
8080
const std::string input =
81-
"assign a; variable v;\n\nSelect <a, v.stmt#> such that Uses "
82-
"(a, v) pattern a (v, _) with a.procName = v.procName";
81+
"print p; variable v;\n\nSelect <p.stmt#, v> such that Uses "
82+
"(p, v) with p.varName = v.varName";
8383
const auto actualResult = Pql::parse(Pql::lex(input));
84-
const DECLARATIONS expectedDeclarations{{"a", TokenType::ASSIGN},
84+
const DECLARATIONS expectedDeclarations{{"p", TokenType::PRINT},
8585
{"v", TokenType::VARIABLE}};
8686
const RESULTS expectedResults{PqlResultType::Tuple,
8787
{
88-
{"a", AttributeRefType::NONE},
89-
{"v", AttributeRefType::STATEMENT_NUM},
88+
{"p", AttributeRefType::STATEMENT_NUM},
89+
{"v", AttributeRefType::NONE},
9090
}};
9191
const RELATIONSHIPS expectedRelationships{
92-
{TokenType::USES, {TokenType::ASSIGN, "a"}, {TokenType::VARIABLE, "v"}}};
92+
{TokenType::USES, {TokenType::PRINT, "p"}, {TokenType::VARIABLE, "v"}}};
93+
const PATTERNS expectedPatterns{};
94+
const WITHS expectedWiths = {
95+
{Reference{Element{"p", AttributeRefType::VARNAME}},
96+
Reference{Element{"v", AttributeRefType::VARNAME}}}};
97+
Assert::IsTrue(actualResult.declarations == expectedDeclarations);
98+
Assert::IsTrue(actualResult.results == expectedResults);
99+
Assert::IsTrue(actualResult.relationships == expectedRelationships);
100+
Assert::IsTrue(actualResult.patterns == expectedPatterns);
101+
Assert::IsTrue(actualResult.withs == expectedWiths);
102+
} // namespace UnitTesting
103+
TEST_METHOD(TestLexAndParse_BooleanWithProgline) {
104+
const std::string input =
105+
"prog_line pl;assign a; variable v; \n\nSelect BOOLEAN such that Uses "
106+
"(pl, v) pattern a (v, _)";
107+
const auto actualResult = Pql::parse(Pql::lex(input));
108+
const DECLARATIONS expectedDeclarations{{"a", TokenType::ASSIGN},
109+
{"v", TokenType::VARIABLE},
110+
{"pl", TokenType::PROG_LINE}};
111+
const RESULTS expectedResults{PqlResultType::Boolean};
112+
const RELATIONSHIPS expectedRelationships{{TokenType::USES,
113+
{TokenType::PROG_LINE, "pl"},
114+
{TokenType::VARIABLE, "v"}}};
93115
const PATTERNS expectedPatterns{{{TokenType::ASSIGN, "a"},
94116
{TokenType::VARIABLE, "v"},
95117
{PatternMatchType::Any}}};
96-
const WITHS expectedWiths = {
97-
{Reference{Element{"a", AttributeRefType::PROCNAME}},
98-
Reference{Element{"v", AttributeRefType::PROCNAME}}}};
118+
const WITHS expectedWiths = {};
99119
Assert::IsTrue(actualResult.declarations == expectedDeclarations);
100120
Assert::IsTrue(actualResult.results == expectedResults);
101121
Assert::IsTrue(actualResult.relationships == expectedRelationships);

Team20/Code20/UnitTesting/TestPQLParser.cpp

+22
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,16 @@ TEST_METHOD(TestLex_BracketWithinString) {
100100

101101
Assert::IsTrue(expectedTokens == actualTokens);
102102
} // namespace UnitTesting
103+
TEST_METHOD(TestLex_SanitizeProgLine) {
104+
const auto actualTokens = Pql::lex("prog_line p,q;");
105+
const std::vector<PqlToken> expectedTokens = {{TokenType::PROG_LINE},
106+
{TokenType::SYNONYM, "p"},
107+
{TokenType::COMMA},
108+
{TokenType::SYNONYM, "q"},
109+
{TokenType::SEMICOLON}};
110+
111+
Assert::IsTrue(expectedTokens == actualTokens);
112+
} // namespace UnitTesting
103113
}
104114
;
105115
TEST_CLASS(TestPqlParser){
@@ -473,6 +483,18 @@ TEST_METHOD(TestParse_WithClause) {
473483
Reference{PqlToken{TokenType::NUMBER, "34"}}}};
474484
Assert::IsTrue(actual == expected);
475485
}
486+
TEST_METHOD(TestParse_WithClauseRawRefsNotMatching_ThrowsException) {
487+
const std::vector<PqlToken> input = {
488+
{TokenType::IF}, {TokenType::SYNONYM, "i"},
489+
{TokenType::SEMICOLON}, {TokenType::VARIABLE},
490+
{TokenType::SYNONYM, "v"}, {TokenType::SEMICOLON},
491+
{TokenType::SELECT}, {TokenType::SYNONYM, "v"},
492+
{TokenType::WITH}, {TokenType::STRING, "i"},
493+
{TokenType::EQUALS}, {TokenType::NUMBER, "34"},
494+
};
495+
496+
Assert::ExpectException<const char *>([input] { Pql::parse(input); });
497+
} // namespace UnitTesting
476498
}
477499
;
478500
}

Team20/Code20/source/PQL.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ std::unordered_map<std::string, TokenType> stringTokenMap = {
4040
{"with", TokenType::WITH},
4141
{"and", TokenType::AND},
4242
{"=", TokenType::EQUALS},
43-
{"BOOLEAN", TokenType::BOOLEAN}};
43+
{"BOOLEAN", TokenType::BOOLEAN},
44+
{"progline", TokenType::PROG_LINE}};
4445

4546
ParsedQuery Pql::parse(std::vector<PqlToken> query) {
4647
auto parser = PqlParser(query);

Team20/Code20/source/PQLLexer.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ bool isStringToken(std::string token) {
3939
PqlLexer::PqlLexer(std::string query) { this->query = query; }
4040

4141
std::vector<PqlToken> PqlLexer::lex() {
42+
sanitize(query);
4243
std::vector<std::string> rawTokens = delimit(query);
4344
std::vector<PqlToken> result;
4445
for (const auto token : rawTokens) {
@@ -75,6 +76,16 @@ std::vector<std::string> PqlLexer::split(const std::string &s, char delim) {
7576
return result;
7677
}
7778

79+
void PqlLexer::sanitize(std::string &s) {
80+
size_t pos = 0;
81+
std::string search = "prog_line";
82+
std::string replace = "progline";
83+
while ((pos = s.find(search, pos)) != std::string::npos) {
84+
s.replace(pos, search.length(), replace);
85+
pos += replace.length();
86+
}
87+
}
88+
7889
std::vector<std::string> PqlLexer::delimit(std::string s) {
7990
std::vector<char> result;
8091
bool isWithinStringLiterals = false;
@@ -127,6 +138,7 @@ std::vector<std::string> PqlLexer::delimit(std::string s) {
127138
}
128139
break;
129140
// 3B. Characters that won't appear within string literals
141+
case ';':
130142
case '_':
131143
case ',':
132144
case '<':

Team20/Code20/source/PQLLexer.h

+1
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@ class PqlLexer {
1414
std::string query;
1515
std::vector<std::string> split(const std::string &s, char delim);
1616
std::vector<std::string> delimit(std::string s);
17+
void sanitize(std::string &s);
1718
};

Team20/Code20/source/PQLParser.cpp

+37-19
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,10 @@
1414
#include "SimpleTokenizer.h"
1515

1616
std::unordered_set<TokenType> entities = {
17-
TokenType::STMT, TokenType::READ, TokenType::PRINT,
18-
TokenType::CALL, TokenType::WHILE, TokenType::IF,
19-
TokenType::ASSIGN, TokenType::VARIABLE, TokenType::CONSTANT,
20-
TokenType::PROCEDURE,
21-
};
17+
TokenType::STMT, TokenType::PROG_LINE, TokenType::READ,
18+
TokenType::PRINT, TokenType::CALL, TokenType::WHILE,
19+
TokenType::IF, TokenType::ASSIGN, TokenType::VARIABLE,
20+
TokenType::CONSTANT, TokenType::PROCEDURE};
2221

2322
std::unordered_set<TokenType> attributeNames = {
2423
TokenType::PROCNAME, TokenType::VARNAME, TokenType::VALUE,
@@ -132,6 +131,16 @@ std::unordered_map<TokenType, std::vector<std::unordered_set<TokenType>>>
132131
}
133132

134133
}};
134+
std::unordered_map<AttributeRefType, std::unordered_set<TokenType>> attributes =
135+
{
136+
{AttributeRefType::PROCNAME, {TokenType::PROCEDURE, TokenType::CALL}},
137+
{AttributeRefType::VARNAME,
138+
{TokenType::VARIABLE, TokenType::READ, TokenType::PRINT}},
139+
{AttributeRefType::VALUE, {TokenType::CONSTANT}},
140+
{AttributeRefType::STATEMENT_NUM,
141+
{TokenType::STMT, TokenType::READ, TokenType::PRINT, TokenType::CALL,
142+
TokenType::WHILE, TokenType::IF, TokenType::ASSIGN}},
143+
};
135144

136145
template <class T> bool contains(std::unordered_set<T> set, T item) {
137146
return set.find(item) != set.end();
@@ -384,20 +393,23 @@ std::unordered_map<TokenType, AttributeRefType> tokenTypeToAttributeRefType{
384393
{TokenType::STATEMENT_NUM, AttributeRefType::STATEMENT_NUM}};
385394

386395
Element PqlParser::getElem() {
387-
auto nextToken = getNextExpectedToken(TokenType::SYNONYM);
388-
getDeclarationForSynonym(nextToken);
389-
Element result{nextToken.value, AttributeRefType::NONE};
390-
if (it != end && it->type == TokenType::DOT) {
391-
getNextExpectedToken(TokenType::DOT);
392-
const auto nextToken = getNextToken();
393-
// TODO: store attribute names
394-
if (!contains(attributeNames, nextToken.type)) {
395-
throw "ERROR: Expected next token to be an attribute name but attribute "
396-
"name not found";
397-
}
398-
result.refType = tokenTypeToAttributeRefType[nextToken.type];
396+
auto synonym = getNextExpectedToken(TokenType::SYNONYM);
397+
const auto declaration = getDeclarationForSynonym(synonym);
398+
if (it == end || it->type != TokenType::DOT) {
399+
return Element{synonym.value, AttributeRefType::NONE};
400+
}
401+
getNextExpectedToken(TokenType::DOT);
402+
const auto nextToken = getNextToken();
403+
if (!contains(attributeNames, nextToken.type)) {
404+
throw "ERROR: Expected next token to be an attribute name but attribute "
405+
"name not found";
399406
}
400-
return result;
407+
const auto refType = tokenTypeToAttributeRefType[nextToken.type];
408+
const auto acceptableEntityTypes = attributes[refType];
409+
if (acceptableEntityTypes.find(declaration) == acceptableEntityTypes.end()) {
410+
throw "ERROR: Declaration not found in acceptable entity types";
411+
}
412+
return Element{synonym.value, refType};
401413
}
402414

403415
void PqlParser::parseWithClause() {
@@ -415,7 +427,13 @@ void PqlParser::parseAttributeCompare() {
415427
const auto secondRef = getRef();
416428
// TODO: Perform check on attribute compare to check if ref is valid, and to
417429
// check if at least one is an elem
418-
// TODO: Check if reference can be of type null
430+
if (firstRef.referenceType == ReferenceType::RAW_VALUE &&
431+
secondRef.referenceType == ReferenceType::RAW_VALUE) {
432+
if (firstRef.pqlToken.type != secondRef.pqlToken.type) {
433+
throw "ERROR: Expecting string to compare with string or number to "
434+
"compare with number";
435+
}
436+
}
419437
pq.withs.push_back({firstRef, secondRef});
420438
}
421439

0 commit comments

Comments
 (0)