Skip to content

Commit dfc03e4

Browse files
authored
Migrate C preprocessor directive handling (#106)
This commit migrates the handling of C preprocessor directives to the parser unit. This change unifies the behavior of the lexer unit, enabling it to focus solely on lexical analysis. Additionally, this enhances the parsing of preprocessor directives by processing them as tokens instead of raw strings. Note: In this commit, several temporary hacks have been introduced to address issues arising from the previous design's shortcomings (see lexer.c#lex_accept for details).
1 parent 4785259 commit dfc03e4

File tree

2 files changed

+234
-176
lines changed

2 files changed

+234
-176
lines changed

src/lexer.c

+24-124
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,6 @@ typedef enum {
5858
T_while,
5959
T_for,
6060
T_do,
61-
T_define,
62-
T_undef,
63-
T_error,
64-
T_include,
6561
T_typedef,
6662
T_enum,
6763
T_struct,
@@ -71,7 +67,16 @@ typedef enum {
7167
T_case,
7268
T_break,
7369
T_default,
74-
T_continue
70+
T_continue,
71+
T_cppd_include,
72+
T_cppd_define,
73+
T_cppd_undef,
74+
T_cppd_error,
75+
T_cppd_if,
76+
T_cppd_elif,
77+
T_cppd_else,
78+
T_cppd_endif,
79+
T_cppd_ifdef
7580
} token_t;
7681

7782
char token_str[MAX_TOKEN_LEN];
@@ -189,39 +194,6 @@ char peek_char(int offset)
189194
return SOURCE[source_idx + offset];
190195
}
191196

192-
void if_elif_skip_lines()
193-
{
194-
char peek_c;
195-
int i;
196-
197-
do {
198-
skip_whitespace();
199-
i = 0;
200-
do {
201-
token_str[i++] = next_char;
202-
} while (read_char(0) != '\n');
203-
token_str[i] = 0;
204-
read_char(1);
205-
peek_c = peek_char(1);
206-
} while (next_char != '#' || (next_char == '#' && peek_c == 'd'));
207-
skip_whitespace();
208-
}
209-
210-
void ifdef_else_skip_lines()
211-
{
212-
int i;
213-
214-
do {
215-
skip_whitespace();
216-
i = 0;
217-
do {
218-
token_str[i++] = next_char;
219-
} while (read_char(0) != '\n');
220-
token_str[i] = 0;
221-
} while (strcmp(token_str, "#else") && strcmp(token_str, "#endif"));
222-
skip_whitespace();
223-
}
224-
225197
/* check alias defined or not */
226198
void chk_def(int defined)
227199
{
@@ -253,108 +225,31 @@ token_t get_next_token()
253225
skip_whitespace();
254226

255227
if (!strcmp(token_str, "#include")) {
256-
do {
257-
token_str[i++] = next_char;
258-
} while (read_char(0) != '\n');
259-
skip_whitespace();
260-
return T_include;
228+
return T_cppd_include;
261229
}
262230
if (!strcmp(token_str, "#define")) {
263-
skip_whitespace();
264-
return T_define;
231+
return T_cppd_define;
265232
}
266233
if (!strcmp(token_str, "#undef")) {
267-
skip_whitespace();
268-
return T_undef;
234+
return T_cppd_undef;
269235
}
270236
if (!strcmp(token_str, "#error")) {
271-
skip_whitespace();
272-
return T_error;
237+
return T_cppd_error;
273238
}
274239
if (!strcmp(token_str, "#if")) {
275-
preproc_match = 0;
276-
i = 0;
277-
do {
278-
token_str[i++] = next_char;
279-
} while (read_char(0) != '\n');
280-
token_str[i] = 0;
281-
282-
if (!strncmp(token_str, "defined", 7)) {
283-
chk_def(1);
284-
if (preproc_match) {
285-
skip_whitespace();
286-
return get_next_token();
287-
}
288-
289-
/* skip lines until #elif or #else or #endif */
290-
if_elif_skip_lines();
291-
return get_next_token();
292-
}
240+
return T_cppd_if;
293241
}
294242
if (!strcmp(token_str, "#elif")) {
295-
if (preproc_match) {
296-
do {
297-
skip_whitespace();
298-
i = 0;
299-
do {
300-
token_str[i++] = next_char;
301-
} while (read_char(0) != '\n');
302-
token_str[i] = 0;
303-
} while (strcmp(token_str, "#endif"));
304-
skip_whitespace();
305-
return get_next_token();
306-
}
307-
308-
i = 0;
309-
do {
310-
token_str[i++] = next_char;
311-
} while (read_char(0) != '\n');
312-
token_str[i] = 0;
313-
314-
if (!strncmp(token_str, "defined", 7)) {
315-
chk_def(1);
316-
if (preproc_match) {
317-
skip_whitespace();
318-
return get_next_token();
319-
}
320-
/* skip lines until #elif or #else or #endif */
321-
if_elif_skip_lines();
322-
return get_next_token();
323-
}
243+
return T_cppd_elif;
324244
}
325245
if (!strcmp(token_str, "#ifdef")) {
326-
preproc_match = 0;
327-
i = 0;
328-
do {
329-
token_str[i++] = next_char;
330-
} while (read_char(0) != '\n');
331-
token_str[i] = 0;
332-
chk_def(0);
333-
if (preproc_match) {
334-
skip_whitespace();
335-
return get_next_token();
336-
}
337-
/* skip lines until #else or #endif */
338-
ifdef_else_skip_lines();
339-
return get_next_token();
246+
return T_cppd_ifdef;
340247
}
341248
if (!strcmp(token_str, "#else")) {
342-
/* reach here has 2 possible cases:
343-
* 1. reach #ifdef preprocessor directive
344-
* 2. conditional expression in #elif is false
345-
*/
346-
if (!preproc_match) {
347-
skip_whitespace();
348-
return get_next_token();
349-
}
350-
/* skip lines until #else or #endif */
351-
ifdef_else_skip_lines();
352-
return get_next_token();
249+
return T_cppd_else;
353250
}
354251
if (!strcmp(token_str, "#endif")) {
355-
preproc_match = 0;
356-
skip_whitespace();
357-
return get_next_token();
252+
return T_cppd_endif;
358253
}
359254
error("Unknown directive");
360255
}
@@ -709,7 +604,12 @@ void skip_macro_body()
709604
int lex_accept(token_t token)
710605
{
711606
if (next_token == token) {
607+
/* FIXME: this is a hack, fix aggressive aliasing first */
608+
if (token == T_cppd_ifdef)
609+
preproc_aliasing = 0;
712610
next_token = get_next_token();
611+
if (token == T_cppd_ifdef)
612+
preproc_aliasing = 1;
713613
return 1;
714614
}
715615
return 0;

0 commit comments

Comments
 (0)