-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstringref-parse.h
198 lines (151 loc) · 6.69 KB
/
stringref-parse.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
// stringref-parse.h
// StringRefParse class.
#ifndef STRINGREF_PARSE_H
#define STRINGREF_PARSE_H
#include "llvm/ADT/StringRef.h" // llvm::StringRef
// This class allows ad-hoc parsing within a given StringRef.
class StringRefParse {
private: // data
// The text we are parsing.
llvm::StringRef m_text;
// Current position. We consider the "current character" to be the
// one at 'm_text[m_cursor]'. But note that the cursor is allowed to
// point to the end+1 character.
//
// Invariant: m_lowerBound <= m_cursor
// Invariant: m_cursor <= m_upperBound
unsigned m_cursor;
// Do not move the cursor before this point.
unsigned m_lowerBound;
// Do not move the cursor after this point.
//
// Invariant: m_upperBound <= m_text.size()
unsigned m_upperBound;
public: // methods
// The upper bound is set to 'text.size()'.
explicit StringRefParse(
llvm::StringRef const &text,
unsigned cursor = 0,
unsigned lowerBound = 0);
StringRefParse(
llvm::StringRef const &text,
unsigned cursor,
unsigned lowerBound,
unsigned upperBound);
StringRefParse(StringRefParse const &obj) = default;
StringRefParse& operator= (StringRefParse const &obj) = default;
// Getters.
llvm::StringRef getText() const { return m_text; }
unsigned getLowerBound() const { return m_lowerBound; }
unsigned getUpperBound() const { return m_upperBound; }
unsigned getCursor() const { return m_cursor; }
// True if there is still text to scan.
bool hasText() const { return m_cursor < m_upperBound; }
// Get the character at `m_cursor` without advancing. Requires
// `hasText()`.
char peekNextChar() const;
// Get and advance. Requires `hasText()`.
char getNextChar();
// Set the offsets. This asserts that the invariants hold.
void setCursorAndBounds(
unsigned cursor, unsigned lowerBound, unsigned upperBound);
// Set the cursor, asserting it is within bounds.
void setCursor(unsigned cursor);
// Set the cursor, but clamp it to be within bounds.
void setCursorClamp(unsigned cursor);
// Set both the cursor and the lower bound to 'cursor'.
void setCursorAndLowerBound(unsigned cursor);
// Set the upper bound to equal the current cursor.
void setUpperBoundToCursor();
// Allow the object to be used like an unsigned offset.
StringRefParse& operator= (unsigned cursor)
{ setCursor(cursor); return *this; }
operator unsigned () const
{ return getCursor(); }
// Given that the cursor is pointing at a token, move past that token
// and the following blank lines. Stop just after a newline character
// if possible.
void advancePastBlankLinesAfterToken();
// Check if the character at the cursor is part of a C++ comment. If
// so, set it to point at the first slash of that comment and return
// true. Otherwise, leave it unchanged and return false.
//
// BUG: This ignores the possibility of string literals. I should
// explore what I can do with clang::Lexer.
bool backupToCppCommentStart();
// Return true if cursor is on a line with a preprocessor directive,
// but the cursor is not at the very start of the line.
//
// BUG: This does not handle string literals or C comments properly.
bool onPPDirectiveLine() const;
// Move cursor forward to just past the next newline.
void advancePastNextNL();
// If the character before the cursor is whitespace, move it backward
// to the first WS character in the contiguous sequence.
//
// If 'throughCppComments', back up through any C++ comments as well
// (but not C comments).
void backupToWSStart(bool throughCppComments);
// Move backward until we are just past a newline.
void backupToLineStart();
// Move the cursor backwards by 'amt', silently stopping if we hit the
// lower bound.
void backup(unsigned amt);
// Move forward until we are just past the last #include such that
// all of the text between the start and end locations is #includes,
// whitespace, and comments.
void advancePastContiguousIncludes();
// Skip past whitespace, returning true if any was skipped.
bool skipWS();
// Skip past non-whitespace, return true if any skipped.
bool skipNonWS();
// If the cursor is on the start of a C comment, skip to the first
// character after that comment and return true. Otherwise return
// false. If the comment is unterminated, return true and skip to the
// upper bound.
bool skipCCommentIf();
// If the cursor is on the start of a C++ comment, skip to the first
// character after it (after the newline) and return true. Otherwise
// return false.
bool skipCppCommentIf();
// Skip all comments and whitespace, returning true if any was
// skipped.
bool skipCommentsAndWhitespace();
// If the cursor is on "#include", skip past that line.
bool skipIncludeIf();
// If looking at 'prefix', then skip it and return true; else false.
bool skipStringIf(char const *prefix);
// Return true if the characters at the cursor match 'prefix'.
bool lookingAt(char const *prefix) const;
// Search ahead for 'searchString'. If it is found, leave the cursor
// pointing at the first character *after* that string, and return
// true. Otherwise return false with an unchanged cursor.
bool searchFor(char const *searchString);
// Search forward for the next whitespace-separated token, and return
// its text, leaving the cursor on the character just after the last
// one in the returned text. Return "" if there are no more
// non-whitespace characters before the upper bound.
std::string getNextWSSeparatedToken();
// Collect characters satisfying the constraints of a C identifier,
// starting at the cursor. Stop when the first non-identifier
// character is encountered, leaving the cursor there and not
// including it in the returned value. This returns an empty string
// (without advancing!) if the first character does not conform.
std::string getNextIdentifier();
// Return all text from the cursor up to, but not including, the
// character at 'endOffset', or at the upper bound, whichever is less.
// If 'endOffset' is at or less than the cursor, return the empty
// string.
std::string textUpTo(unsigned endOffset);
// Return the 1-based line/col of the current cursor position, taking
// the lower bound position as the (1,1) start position. This does a
// linear scan, so is not very efficient.
void getLineCol(int /*OUT*/ &line, int /*OUT*/ &col) const;
// Return line/col as "<line>:<col>".
std::string getLineColStr() const;
// Fail an assertion if invariants do not hold.
void assertInvariants() const;
};
// Unit tests, defined in stringref-parse-test.cc.
void stringref_parse_unit_tests();
#endif // STRINGREF_PARSE_H