1212#include < regex>
1313#include < iostream>
1414
15+ using namespace RDFRegex ;
1516
1617namespace {
1718
1819 // return vector of strings after splitting input string into lines
1920 std::vector<std::string> splitLines (std::string &input) {
2021 std::vector<std::string> lines;
21- std::regex rgx (RDFRegex:: EOLN);
22+ std::regex rgx (EOLN);
2223 std::sregex_token_iterator i (input.begin (), input.end (), rgx, -1 );
2324 std::sregex_token_iterator end;
2425
@@ -75,7 +76,9 @@ namespace {
7576 }
7677
7778 void extractUnicodeCodepoint (const std::smatch &match, std::string &u) {
78- std::string hex = match[2 ].matched ? match[2 ].str () : match[3 ].str (); // todo: magic numbers?
79+ std::string hex = match[UNICODE_BASIC_MULTILINGUAL_PLANE].matched ?
80+ match[UNICODE_BASIC_MULTILINGUAL_PLANE].str () :
81+ match[UNICODE_HIGHER_PLANE].str ();
7982 long v = std::stol (hex, nullptr , 16 );
8083
8184 auto it = std::back_inserter (u);
@@ -97,7 +100,7 @@ namespace {
97100 }
98101
99102 bool extractControlCharacter (const std::smatch &match, std::string &u) {
100- char c = match[1 ].str ()[0 ];
103+ char c = match[UNICODE_CONTROL_CHARS ].str ()[0 ];
101104 switch (c) {
102105 case ' b' :
103106 u = " \b " ;
@@ -108,9 +111,6 @@ namespace {
108111 case ' n' :
109112 u = " \n " ;
110113 break ;
111- case ' v' : // todo: why here?
112- u = " \v " ;
113- break ;
114114 case ' f' :
115115 u = " \f " ;
116116 break ;
@@ -141,7 +141,7 @@ namespace {
141141 if (str.empty ())
142142 return ;
143143
144- std::regex charsRgx (RDFRegex::UCHAR_MATCHED );
144+ std::regex charsRgx (UNICODE_CODEPOINT );
145145 auto chars_begin = std::sregex_iterator (str.begin (), str.end (), charsRgx);
146146 auto chars_end = std::sregex_iterator ();
147147
@@ -160,7 +160,7 @@ namespace {
160160 out = std::copy (match.prefix ().first , match.prefix ().second , out);
161161
162162 std::string u;
163- if (!match[1 ].matched ) {
163+ if (!match[UNICODE_CONTROL_CHARS ].matched ) {
164164 extractUnicodeCodepoint (match, u);
165165 } else {
166166 if (!extractControlCharacter (match, u))
@@ -293,11 +293,11 @@ std::string NQuadsSerialization::toNQuad(const RDF::RDFTriple& triple) {
293293}
294294
295295RDF::RDFDataset NQuadsSerialization::parse (std::string input) {
296- RDF::RDFDataset dataset ((JsonLdOptions ()));// todo: should be a version of this that passes in existing options object?
296+ RDF::RDFDataset dataset ((JsonLdOptions ()));
297297
298298 std::vector<std::string> lines = ::splitLines (input);
299- std::regex emptyRgx (RDFRegex:: EMPTY);
300- std::regex quadRgx (RDFRegex:: QUAD);
299+ std::regex emptyRgx (EMPTY);
300+ std::regex quadRgx (QUAD);
301301 std::smatch match;
302302
303303 int lineNumber = 0 ;
@@ -311,44 +311,44 @@ RDF::RDFDataset NQuadsSerialization::parse(std::string input) {
311311 // parse quad with regex
312312 if (!std::regex_match (line, match, quadRgx))
313313 throw JsonLdError (JsonLdError::SyntaxError,
314- " Error while parsing N-Quads; invalid quad. line:" + std::to_string (lineNumber));
314+ " Error while parsing N-Quads; invalid quad. line:" + std::to_string (lineNumber));
315315
316316 // extract subject from matches
317317 std::shared_ptr<RDF::Node> subject;
318- if (match[1 ].matched )
319- subject = std::make_shared<RDF::IRI>(unescape (match[1 ].str ()));
318+ if (match[QUAD_SUBJECT_AS_IRI ].matched )
319+ subject = std::make_shared<RDF::IRI>(unescape (match[QUAD_SUBJECT_AS_IRI ].str ()));
320320 else
321- subject = std::make_shared<RDF::BlankNode>(unescape (match[2 ].str ()));
321+ subject = std::make_shared<RDF::BlankNode>(unescape (match[QUAD_SUBJECT_AS_BNODE ].str ()));
322322
323323 // extract predicate from matches
324- std::shared_ptr<RDF::Node> predicate = std::make_shared<RDF::IRI>(unescape (match[3 ].str ()));
324+ std::shared_ptr<RDF::Node> predicate = std::make_shared<RDF::IRI>(unescape (match[QUAD_PREDICATE ].str ()));
325325
326326 // extract object from matches
327327 std::shared_ptr<RDF::Node> object;
328- if (match[4 ].matched )
329- object = std::make_shared<RDF::IRI>(unescape (match[4 ].str ()));
330- else if (match[5 ].matched )
331- object = std::make_shared<RDF::BlankNode>(unescape (match[5 ].str ()));
328+ if (match[QUAD_OBJECT_AS_IRI ].matched )
329+ object = std::make_shared<RDF::IRI>(unescape (match[QUAD_OBJECT_AS_IRI ].str ()));
330+ else if (match[QUAD_OBJECT_AS_BNODE ].matched )
331+ object = std::make_shared<RDF::BlankNode>(unescape (match[QUAD_OBJECT_AS_BNODE ].str ()));
332332 else {
333- std::string language = unescape (match[8 ].str ());
333+ std::string language = unescape (match[QUAD_OBJECT_AS_LITERAL_LANGUAGETAG ].str ());
334334 std::string datatype;
335- if (match[7 ].matched )
336- datatype = unescape (match[7 ].str ());
335+ if (match[QUAD_OBJECT_AS_LITERAL_DATATYPE ].matched )
336+ datatype = unescape (match[QUAD_OBJECT_AS_LITERAL_DATATYPE ].str ());
337337 else {
338- if (match[8 ].matched )
338+ if (match[QUAD_OBJECT_AS_LITERAL_LANGUAGETAG ].matched )
339339 datatype = JsonLdConsts::RDF_LANGSTRING;
340340 else
341341 datatype = JsonLdConsts::XSD_STRING;
342342 }
343- object = std::make_shared<RDF::Literal>(unescape (match[6 ].str ()), &datatype, &language);
343+ object = std::make_shared<RDF::Literal>(unescape (match[QUAD_OBJECT_AS_LITERAL ].str ()), &datatype, &language);
344344 }
345345
346346 // extract graph name from matches ('@default' is used for the default graph)
347347 std::string name = " @default" ;
348- if (match[9 ].matched ) {
349- name = unescape (match[9 ].str ());
350- } else if (match[10 ].matched ) {
351- name = unescape (match[10 ].str ());
348+ if (match[QUAD_GRAPH_AS_IRI ].matched ) {
349+ name = unescape (match[QUAD_GRAPH_AS_IRI ].str ());
350+ } else if (match[QUAD_GRAPH_AS_BNODE ].matched ) {
351+ name = unescape (match[QUAD_GRAPH_AS_BNODE ].str ());
352352 }
353353
354354 // add RDFTriple to graph in dataset
0 commit comments