diff --git a/src/lexer.c b/src/lexer.c index a82590af..05a67ea5 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -87,11 +87,6 @@ int skip_newline = 1; int preproc_match; -/* Allow replacing identifiers with alias value if alias exists. This is - * disabled in certain cases, e.g. #undef. - */ -int preproc_aliasing = 1; - /* Point to the first character after where the macro has been called. It is * needed when returning from the macro body. */ @@ -174,44 +169,15 @@ char read_char(int is_skip_space) return next_char; } -/* get alias name from defined() directive - * i.e., get __arm__ from defined(__arm__) - */ -void read_alias_name_from_defined(char *alias_name, char *src) -{ - int i; - - src = src + 8; /* skip defined( */ - i = 0; - while (src[i] != ')') { - alias_name[i] = src[i]; - i++; - } - alias_name[i] = 0; -} - char peek_char(int offset) { return SOURCE[source_idx + offset]; } -/* check alias defined or not */ -void chk_def(int defined) -{ - char *alias = NULL; - char alias_name[MAX_TOKEN_LEN]; - - if (defined) { - read_alias_name_from_defined(alias_name, token_str); - alias = find_alias(alias_name); - } else - alias = find_alias(token_str); - - if (alias) - preproc_match = 1; -} - -token_t get_next_token() +/* Lex next token and returns its token type. Parameter `aliasing` is used for + * disable preprocessor aliasing on identifier tokens. + */ +token_t lex_token_internal(int aliasing) { token_str[0] = 0; @@ -257,7 +223,7 @@ token_t get_next_token() read_char(0); if (next_char == '/') { read_char(1); - return get_next_token(); + return lex_token_internal(aliasing); } } } while (next_char); @@ -549,7 +515,7 @@ token_t get_next_token() if (!strcmp(token_str, "continue")) return T_continue; - if (preproc_aliasing) { + if (aliasing) { alias = find_alias(token_str); if (alias) { token_t t = is_numeric(alias) ? T_numeric : T_string; @@ -570,7 +536,7 @@ token_t get_next_token() next_char = SOURCE[source_idx]; } else next_char = read_char(1); - return get_next_token(); + return lex_token_internal(aliasing); } if (next_char == 0) @@ -582,30 +548,45 @@ token_t get_next_token() return T_eof; } +/* Lex next token and returns its token type. To disable aliasing on next + * token, use `lex_token_internal`. */ +token_t lex_token() +{ + return lex_token_internal(1); +} + /* Skip the content. We only need the index where the macro body begins. */ void skip_macro_body() { while (!is_newline(next_char)) - next_token = get_next_token(); + next_token = lex_token(); skip_newline = 1; - next_token = get_next_token(); + next_token = lex_token(); } -int lex_accept(token_t token) +/* Accepts next token if token types are matched. */ +int lex_accept_internal(token_t token, int aliasing) { if (next_token == token) { - /* FIXME: this is a hack, fix aggressive aliasing first */ - if (token == T_cppd_ifdef) - preproc_aliasing = 0; - next_token = get_next_token(); - if (token == T_cppd_ifdef) - preproc_aliasing = 1; + next_token = lex_token_internal(aliasing); return 1; } + return 0; } +/* Accepts next token if token types are matched. To disable aliasing + * on next token, use `lex_accept_internal`. + */ +int lex_accept(token_t token) +{ + return lex_accept_internal(token, 1); +} + +/* Peeks next token and copy token's literal to value if token types + * are matched. + */ int lex_peek(token_t token, char *value) { if (next_token == token) { @@ -617,17 +598,38 @@ int lex_peek(token_t token, char *value) return 0; } -void lex_ident(token_t token, char *value) +/* Strictly match next token with given token type and copy token's + * literal to value. + */ +void lex_ident_internal(token_t token, char *value, int aliasing) { if (next_token != token) error("Unexpected token"); strcpy(value, token_str); - next_token = get_next_token(); + next_token = lex_token_internal(aliasing); } -void lex_expect(token_t token) +/* Strictly match next token with given token type and copy token's + * literal to value. To disable aliasing on next token, use + * `lex_ident_internal`. + */ +void lex_ident(token_t token, char *value) +{ + lex_ident_internal(token, value, 1); +} + +/* Strictly match next token with given token type. */ +void lex_expect_internal(token_t token, int aliasing) { if (next_token != token) error("Unexpected token"); - next_token = get_next_token(); + next_token = lex_token_internal(aliasing); +} + +/* Strictly match next token with given token type. To disable aliasing + * on next token, use `lex_expect_internal`. + */ +void lex_expect(token_t token) +{ + lex_expect_internal(token, 1); } diff --git a/src/parser.c b/src/parser.c index afdddddd..2825a39b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -66,6 +66,7 @@ int get_size(var_t *var, type_t *type) * whitespace */ void skip_line(int invalidate) { + /* FIXME: Comments will causes current validation failed. */ skip_whitespace(); do { if (invalidate && !is_whitespace(peek_char(0)) && @@ -75,28 +76,14 @@ void skip_line(int invalidate) } while (read_char(0) != '\n'); } -void if_elif_skip_lines() -{ - char peek_c; - int i; - - do { - skip_whitespace(); - i = 0; - do { - token_str[i++] = next_char; - } while (read_char(0) != '\n'); - token_str[i] = 0; - read_char(1); - peek_c = peek_char(1); - } while (next_char != '#' || (next_char == '#' && peek_c == 'd')); - skip_whitespace(); -} - -void ifdef_else_skip_lines() +/* Skips lines where preprocessor match is false, this will stop once next + * token is either `T_cppd_elif`, `T_cppd_else` or `cppd_endif`. + */ +void cppd_control_flow_skip_lines() { - while (!lex_peek(T_cppd_else, NULL) && !lex_peek(T_cppd_endif, NULL)) { - next_token = get_next_token(); + while (!lex_peek(T_cppd_elif, NULL) && !lex_peek(T_cppd_else, NULL) && + !lex_peek(T_cppd_endif, NULL)) { + next_token = lex_token(); } skip_whitespace(); } @@ -111,12 +98,10 @@ void read_defined_macro() { char lookup_alias[MAX_TOKEN_LEN]; - preproc_aliasing = 0; /* to prevent aggressive aliasing */ lex_expect(T_identifier); /* defined */ - lex_expect(T_open_bracket); + lex_expect_internal(T_open_bracket, 0); lex_ident(T_identifier, lookup_alias); lex_expect(T_close_bracket); - preproc_aliasing = 1; check_def(lookup_alias); } @@ -169,10 +154,8 @@ int read_preproc_directive() if (lex_peek(T_cppd_undef, token)) { char alias[MAX_VAR_LEN]; - preproc_aliasing = 0; - lex_expect(T_cppd_undef); + lex_expect_internal(T_cppd_undef, 0); lex_peek(T_identifier, alias); - preproc_aliasing = 1; lex_expect(T_identifier); remove_alias(alias); @@ -201,7 +184,7 @@ int read_preproc_directive() return 1; } - if_elif_skip_lines(); + cppd_control_flow_skip_lines(); } else { /* TODO: parse and evaluate constant expression here */ } @@ -210,7 +193,7 @@ int read_preproc_directive() if (lex_accept(T_cppd_elif)) { if (preproc_match) { while (!lex_peek(T_cppd_endif, NULL)) { - next_token = get_next_token(); + next_token = lex_token(); } return 1; } @@ -223,7 +206,7 @@ int read_preproc_directive() return 1; } - if_elif_skip_lines(); + cppd_control_flow_skip_lines(); } else { /* TODO: parse and evaluate constant expression here */ } @@ -240,8 +223,7 @@ int read_preproc_directive() return 1; } - /* skip lines until #else or #endif */ - ifdef_else_skip_lines(); + cppd_control_flow_skip_lines(); return 1; } if (lex_accept(T_cppd_endif)) { @@ -249,7 +231,7 @@ int read_preproc_directive() skip_whitespace(); return 1; } - if (lex_accept(T_cppd_ifdef)) { + if (lex_accept_internal(T_cppd_ifdef, 0)) { preproc_match = 0; lex_ident(T_identifier, token); check_def(token); @@ -259,8 +241,7 @@ int read_preproc_directive() return 1; } - /* skip lines until #else or #endif */ - ifdef_else_skip_lines(); + cppd_control_flow_skip_lines(); return 1; } @@ -677,12 +658,12 @@ void read_expr_operand(block_t *parent, basic_block_t **bb) for (i = 0; i < remainder; i++) { source_idx = macro->params[macro->num_params - remainder + i]; next_char = SOURCE[source_idx]; - next_token = get_next_token(); + next_token = lex_token(); read_expr(parent, bb); } source_idx = t; next_char = SOURCE[source_idx]; - next_token = get_next_token(); + next_token = lex_token(); } else if (mac) { if (parent->macro) error("Nested macro is not yet supported"); @@ -695,7 +676,7 @@ void read_expr_operand(block_t *parent, basic_block_t **bb) while (!lex_peek(T_close_bracket, NULL)) { mac->params[mac->num_params++] = source_idx; do { - next_token = get_next_token(); + next_token = lex_token(); } while (next_token != T_comma && next_token != T_close_bracket); } @@ -717,11 +698,11 @@ void read_expr_operand(block_t *parent, basic_block_t **bb) int t = source_idx; source_idx = macro_param_idx; next_char = SOURCE[source_idx]; - next_token = get_next_token(); + next_token = lex_token(); read_expr(parent, bb); source_idx = t; next_char = SOURCE[source_idx]; - next_token = get_next_token(); + next_token = lex_token(); } else if (con) { ph1_ir = add_ph1_ir(OP_load_constant); vd = require_var(parent); @@ -1599,7 +1580,7 @@ void eval_ternary_imm(int cond, char *token) { if (cond == 0) { while (next_token != T_colon) { - next_token = get_next_token(); + next_token = lex_token(); } lex_accept(T_colon); read_global_assignment(token); @@ -1607,7 +1588,7 @@ void eval_ternary_imm(int cond, char *token) read_global_assignment(token); lex_expect(T_colon); while (!lex_peek(T_semicolon, NULL)) { - next_token = get_next_token(); + next_token = lex_token(); } } } @@ -2453,7 +2434,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb) while (!lex_peek(T_close_bracket, NULL)) { mac->params[mac->num_params++] = source_idx; do { - next_token = get_next_token(); + next_token = lex_token(); } while (next_token != T_comma && next_token != T_close_bracket); } /* move `source_idx` to the macro body */