From 35362f8137b2c5109e6bc39cb12048c016b5b580 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 21 Apr 2016 15:48:13 -0700 Subject: [PATCH] better parsing of C macros See #88 --- CMakeLists.txt | 1 + src/c_tokenizer.cpp | 651 ++++++++++++++++++++++++++++++++++++++++++++ src/c_tokenizer.hpp | 70 +++++ src/parseh.cpp | 284 ++++++------------- test/run_tests.cpp | 4 + 5 files changed, 805 insertions(+), 205 deletions(-) create mode 100644 src/c_tokenizer.cpp create mode 100644 src/c_tokenizer.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 502eef6053..5d9cc6abd0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ set(ZIG_SOURCES "${CMAKE_SOURCE_DIR}/src/ast_render.cpp" "${CMAKE_SOURCE_DIR}/src/bignum.cpp" "${CMAKE_SOURCE_DIR}/src/tokenizer.cpp" + "${CMAKE_SOURCE_DIR}/src/c_tokenizer.cpp" "${CMAKE_SOURCE_DIR}/src/parser.cpp" "${CMAKE_SOURCE_DIR}/src/eval.cpp" "${CMAKE_SOURCE_DIR}/src/analyze.cpp" diff --git a/src/c_tokenizer.cpp b/src/c_tokenizer.cpp new file mode 100644 index 0000000000..ddcb5ba152 --- /dev/null +++ b/src/c_tokenizer.cpp @@ -0,0 +1,651 @@ +/* + * Copyright (c) 2016 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + +#include "c_tokenizer.hpp" +#include + +#define WHITESPACE_EXCEPT_N \ + ' ': \ + case '\t': \ + case '\v': \ + case '\f' + +#define DIGIT_NON_ZERO \ + '1': \ + case '2': \ + case '3': \ + case '4': \ + case '5': \ + case '6': \ + case '7': \ + case '8': \ + case '9' + +#define DIGIT \ + '0': \ + case DIGIT_NON_ZERO + +#define ALPHA \ + 'a': \ + case 'b': \ + case 'c': \ + case 'd': \ + case 'e': \ + case 'f': \ + case 'g': \ + case 'h': \ + case 'i': \ + case 'j': \ + case 'k': \ + case 'l': \ + case 'm': \ + case 'n': \ + case 'o': \ + case 'p': \ + case 'q': \ + case 'r': \ + case 's': \ + case 't': \ + case 'u': \ + case 'v': \ + case 'w': \ + case 'x': \ + case 'y': \ + case 'z': \ + case 'A': \ + case 'B': \ + case 'C': \ + case 'D': \ + case 'E': \ + case 'F': \ + case 'G': \ + case 'H': \ + case 'I': \ + case 'J': \ + case 'K': \ + case 'L': \ + case 'M': \ + case 'N': \ + case 'O': \ + case 'P': \ + case 'Q': \ + case 'R': \ + case 'S': \ + case 'T': \ + case 'U': \ + case 'V': \ + case 'W': \ + case 'X': \ + case 'Y': \ + case 'Z' + +#define IDENT_START \ + ALPHA: \ + case '_' + +#define IDENT \ + IDENT_START: \ + case DIGIT + + +static void begin_token(CTokenize *ctok, CTokId id) { + assert(ctok->cur_tok == nullptr); + ctok->tokens.add_one(); + ctok->cur_tok = &ctok->tokens.last(); + ctok->cur_tok->id = id; + + switch (id) { + case CTokIdStrLit: + memset(&ctok->cur_tok->data.str_lit, 0, sizeof(Buf)); + buf_resize(&ctok->cur_tok->data.str_lit, 0); + break; + case CTokIdSymbol: + memset(&ctok->cur_tok->data.symbol, 0, sizeof(Buf)); + buf_resize(&ctok->cur_tok->data.symbol, 0); + break; + case CTokIdCharLit: + case CTokIdNumLitInt: + case CTokIdNumLitFloat: + case CTokIdMinus: + break; + } +} + +static void end_token(CTokenize *ctok) { + ctok->cur_tok = nullptr; +} + +static void mark_error(CTokenize *ctok) { + ctok->error = true; +} + +static void add_char(CTokenize *ctok, uint8_t c) { + assert(ctok->cur_tok); + if (ctok->cur_tok->id == CTokIdCharLit) { + ctok->cur_tok->data.char_lit = c; + ctok->state = CTokStateExpectEndQuot; + } else if (ctok->cur_tok->id == CTokIdStrLit) { + buf_append_char(&ctok->cur_tok->data.str_lit, c); + ctok->state = CTokStateString; + } else { + zig_unreachable(); + } +} + +static void hex_digit(CTokenize *ctok, uint8_t value) { + // TODO @mul_with_overflow + ctok->cur_tok->data.num_lit_int *= 16; + // TODO @add_with_overflow + ctok->cur_tok->data.num_lit_int += value; + + static const uint8_t hex_digit[] = "0123456789abcdef"; + buf_append_char(&ctok->buf, hex_digit[value]); +} + +static void end_float(CTokenize *ctok) { + // TODO detect errors, overflow, and underflow + double value = strtod(buf_ptr(&ctok->buf), nullptr); + + ctok->cur_tok->data.num_lit_float = value; + + end_token(ctok); + ctok->state = CTokStateStart; + +} + +void tokenize_c_macro(CTokenize *ctok, const uint8_t *c) { + ctok->tokens.resize(0); + ctok->state = CTokStateStart; + ctok->error = false; + ctok->cur_tok = nullptr; + + buf_resize(&ctok->buf, 0); + + for (; *c; c += 1) { + switch (ctok->state) { + case CTokStateStart: + switch (*c) { + case WHITESPACE_EXCEPT_N: + break; + case '\'': + ctok->state = CTokStateExpectChar; + begin_token(ctok, CTokIdCharLit); + break; + case '\"': + ctok->state = CTokStateString; + begin_token(ctok, CTokIdStrLit); + break; + case '/': + ctok->state = CTokStateOpenComment; + break; + case '\\': + ctok->state = CTokStateBackslash; + break; + case '\n': + goto found_end_of_macro; + case IDENT_START: + ctok->state = CTokStateIdentifier; + begin_token(ctok, CTokIdSymbol); + buf_append_char(&ctok->cur_tok->data.symbol, *c); + break; + case DIGIT_NON_ZERO: + ctok->state = CTokStateDecimal; + ctok->unsigned_suffix = false; + ctok->long_suffix = false; + begin_token(ctok, CTokIdNumLitInt); + ctok->cur_tok->data.num_lit_int = *c - '0'; + buf_resize(&ctok->buf, 0); + buf_append_char(&ctok->buf, *c); + break; + case '0': + ctok->state = CTokStateGotZero; + ctok->unsigned_suffix = false; + ctok->long_suffix = false; + begin_token(ctok, CTokIdNumLitInt); + ctok->cur_tok->data.num_lit_int = 0; + buf_resize(&ctok->buf, 0); + buf_append_char(&ctok->buf, '0'); + break; + case '.': + begin_token(ctok, CTokIdNumLitFloat); + ctok->state = CTokStateFloat; + buf_init_from_str(&ctok->buf, "0."); + break; + default: + return mark_error(ctok); + } + break; + case CTokStateFloat: + switch (*c) { + case 'e': + case 'E': + buf_append_char(&ctok->buf, 'e'); + ctok->state = CTokStateExpSign; + break; + case 'f': + case 'F': + case 'l': + case 'L': + end_float(ctok); + break; + case DIGIT: + buf_append_char(&ctok->buf, *c); + break; + default: + c -= 1; + end_float(ctok); + continue; + } + break; + case CTokStateExpSign: + switch (*c) { + case '+': + case '-': + ctok->state = CTokStateFloatExpFirst; + buf_append_char(&ctok->buf, *c); + break; + case DIGIT: + ctok->state = CTokStateFloatExp; + buf_append_char(&ctok->buf, *c); + break; + default: + return mark_error(ctok); + } + break; + case CTokStateFloatExpFirst: + switch (*c) { + case DIGIT: + buf_append_char(&ctok->buf, *c); + ctok->state = CTokStateFloatExp; + break; + default: + return mark_error(ctok); + } + break; + case CTokStateFloatExp: + switch (*c) { + case DIGIT: + buf_append_char(&ctok->buf, *c); + break; + case 'f': + case 'F': + case 'l': + case 'L': + end_float(ctok); + break; + default: + c -= 1; + end_float(ctok); + continue; + } + break; + case CTokStateDecimal: + switch (*c) { + case DIGIT: + buf_append_char(&ctok->buf, *c); + + // TODO @mul_with_overflow + ctok->cur_tok->data.num_lit_int *= 10; + // TODO @add_with_overflow + ctok->cur_tok->data.num_lit_int += *c - '0'; + break; + case '\'': + break; + case 'u': + case 'U': + ctok->unsigned_suffix = true; + ctok->state = CTokStateIntSuffix; + break; + case 'l': + case 'L': + ctok->long_suffix = true; + ctok->state = CTokStateIntSuffixLong; + break; + case '.': + buf_append_char(&ctok->buf, '.'); + ctok->cur_tok->id = CTokIdNumLitFloat; + ctok->state = CTokStateFloat; + break; + default: + c -= 1; + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateIntSuffix: + switch (*c) { + case 'l': + case 'L': + if (ctok->long_suffix) { + return mark_error(ctok); + } + ctok->long_suffix = true; + ctok->state = CTokStateIntSuffixLong; + break; + case 'u': + case 'U': + if (ctok->unsigned_suffix) { + return mark_error(ctok); + } + ctok->unsigned_suffix = true; + break; + default: + c -= 1; + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateIntSuffixLong: + switch (*c) { + case 'l': + case 'L': + ctok->state = CTokStateIntSuffix; + break; + case 'u': + case 'U': + if (ctok->unsigned_suffix) { + return mark_error(ctok); + } + ctok->unsigned_suffix = true; + break; + default: + c -= 1; + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateGotZero: + switch (*c) { + case 'x': + case 'X': + ctok->state = CTokStateHex; + break; + case '.': + ctok->state = CTokStateFloat; + ctok->cur_tok->id = CTokIdNumLitFloat; + buf_append_char(&ctok->buf, '.'); + break; + default: + c -= 1; + ctok->state = CTokStateOctal; + continue; + } + break; + case CTokStateOctal: + switch (*c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + // TODO @mul_with_overflow + ctok->cur_tok->data.num_lit_int *= 8; + // TODO @add_with_overflow + ctok->cur_tok->data.num_lit_int += *c - '0'; + break; + case '8': + case '9': + return mark_error(ctok); + case '\'': + break; + default: + c -= 1; + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateHex: + switch (*c) { + case '0': + hex_digit(ctok, 0); + break; + case '1': + hex_digit(ctok, 1); + break; + case '2': + hex_digit(ctok, 2); + break; + case '3': + hex_digit(ctok, 3); + break; + case '4': + hex_digit(ctok, 4); + break; + case '5': + hex_digit(ctok, 5); + break; + case '6': + hex_digit(ctok, 6); + break; + case '7': + hex_digit(ctok, 7); + break; + case '8': + hex_digit(ctok, 8); + break; + case '9': + hex_digit(ctok, 9); + break; + case 'a': + case 'A': + hex_digit(ctok, 10); + break; + case 'b': + case 'B': + hex_digit(ctok, 11); + break; + case 'c': + case 'C': + hex_digit(ctok, 12); + break; + case 'd': + case 'D': + hex_digit(ctok, 13); + break; + case 'e': + case 'E': + hex_digit(ctok, 14); + break; + case 'f': + case 'F': + hex_digit(ctok, 15); + break; + case 'p': + case 'P': + ctok->cur_tok->id = CTokIdNumLitFloat; + ctok->state = CTokStateExpSign; + break; + default: + c -= 1; + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateIdentifier: + switch (*c) { + case IDENT: + buf_append_char(&ctok->cur_tok->data.symbol, *c); + break; + default: + c -= 1; + end_token(ctok); + ctok->state = CTokStateStart; + continue; + } + break; + case CTokStateString: + switch (*c) { + case '\\': + ctok->state = CTokStateCharEscape; + break; + case '\"': + end_token(ctok); + ctok->state = CTokStateStart; + break; + default: + buf_append_char(&ctok->cur_tok->data.str_lit, *c); + } + break; + case CTokStateExpectChar: + switch (*c) { + case '\\': + ctok->state = CTokStateCharEscape; + break; + case '\'': + return mark_error(ctok); + default: + ctok->cur_tok->data.char_lit = *c; + ctok->state = CTokStateExpectEndQuot; + } + break; + case CTokStateCharEscape: + switch (*c) { + case '\'': + case '"': + case '?': + case '\\': + add_char(ctok, *c); + break; + case 'a': + add_char(ctok, '\a'); + break; + case 'b': + add_char(ctok, '\b'); + break; + case 'f': + add_char(ctok, '\f'); + break; + case 'n': + add_char(ctok, '\n'); + break; + case 'r': + add_char(ctok, '\r'); + break; + case 't': + add_char(ctok, '\t'); + break; + case 'v': + add_char(ctok, '\v'); + break; + case DIGIT: + zig_panic("TODO octal"); + break; + case 'x': + zig_panic("TODO hex"); + break; + case 'u': + zig_panic("TODO unicode"); + break; + case 'U': + zig_panic("TODO Unicode"); + break; + default: + return mark_error(ctok); + } + break; + case CTokStateExpectEndQuot: + switch (*c) { + case '\'': + end_token(ctok); + ctok->state = CTokStateStart; + break; + default: + return mark_error(ctok); + } + break; + case CTokStateOpenComment: + switch (*c) { + case '/': + ctok->state = CTokStateLineComment; + break; + case '*': + ctok->state = CTokStateComment; + break; + default: + return mark_error(ctok); + } + break; + case CTokStateLineComment: + if (*c == '\n') { + ctok->state = CTokStateStart; + goto found_end_of_macro; + } + break; + case CTokStateComment: + switch (*c) { + case '*': + ctok->state = CTokStateCommentStar; + break; + default: + break; + } + break; + case CTokStateCommentStar: + switch (*c) { + case '/': + ctok->state = CTokStateStart; + break; + case '*': + break; + default: + ctok->state = CTokStateComment; + break; + } + break; + case CTokStateBackslash: + switch (*c) { + case '\n': + ctok->state = CTokStateStart; + break; + default: + return mark_error(ctok); + } + break; + } + } +found_end_of_macro: + + switch (ctok->state) { + case CTokStateStart: + break; + case CTokStateIdentifier: + case CTokStateDecimal: + case CTokStateHex: + case CTokStateOctal: + case CTokStateGotZero: + case CTokStateIntSuffix: + case CTokStateIntSuffixLong: + end_token(ctok); + break; + case CTokStateFloat: + case CTokStateFloatExp: + end_float(ctok); + break; + case CTokStateExpectChar: + case CTokStateExpectEndQuot: + case CTokStateOpenComment: + case CTokStateLineComment: + case CTokStateComment: + case CTokStateCommentStar: + case CTokStateCharEscape: + case CTokStateBackslash: + case CTokStateString: + case CTokStateExpSign: + case CTokStateFloatExpFirst: + return mark_error(ctok); + } + + assert(ctok->cur_tok == nullptr); +} diff --git a/src/c_tokenizer.hpp b/src/c_tokenizer.hpp new file mode 100644 index 0000000000..bf8fa1a841 --- /dev/null +++ b/src/c_tokenizer.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + + +#ifndef ZIG_C_TOKENIZER_HPP +#define ZIG_C_TOKENIZER_HPP + +#include "buffer.hpp" + +enum CTokId { + CTokIdCharLit, + CTokIdStrLit, + CTokIdNumLitInt, + CTokIdNumLitFloat, + CTokIdSymbol, + CTokIdMinus, +}; + +struct CTok { + enum CTokId id; + union { + uint8_t char_lit; + Buf str_lit; + uint64_t num_lit_int; + double num_lit_float; + Buf symbol; + } data; +}; + +enum CTokState { + CTokStateStart, + CTokStateExpectChar, + CTokStateCharEscape, + CTokStateExpectEndQuot, + CTokStateOpenComment, + CTokStateLineComment, + CTokStateComment, + CTokStateCommentStar, + CTokStateBackslash, + CTokStateString, + CTokStateIdentifier, + CTokStateDecimal, + CTokStateOctal, + CTokStateGotZero, + CTokStateHex, + CTokStateIntSuffix, + CTokStateIntSuffixLong, + CTokStateFloat, + CTokStateExpSign, + CTokStateFloatExp, + CTokStateFloatExpFirst, +}; + +struct CTokenize { + ZigList tokens; + CTokState state; + bool error; + CTok *cur_tok; + Buf buf; + bool unsigned_suffix; + bool long_suffix; +}; + +void tokenize_c_macro(CTokenize *ctok, const uint8_t *c); + +#endif diff --git a/src/parseh.cpp b/src/parseh.cpp index 0b2eb68c3b..9806f3cfb3 100644 --- a/src/parseh.cpp +++ b/src/parseh.cpp @@ -12,6 +12,7 @@ #include "parser.hpp" #include "all_types.hpp" #include "tokenizer.hpp" +#include "c_tokenizer.hpp" #include "analyze.hpp" #include @@ -176,6 +177,19 @@ static AstNode *create_str_lit_node(Context *c, Buf *buf) { return node; } +static AstNode *create_num_lit_float(Context *c, double x) { + AstNode *node = create_node(c, NodeTypeNumberLiteral); + node->data.number_literal.kind = NumLitFloat; + node->data.number_literal.data.x_float = x; + return node; +} + +static AstNode *create_num_lit_float_negative(Context *c, double x, bool negative) { + AstNode *num_lit_node = create_num_lit_float(c, x); + if (!negative) return num_lit_node; + return create_prefix_node(c, PrefixOpNegation, num_lit_node); +} + static AstNode *create_num_lit_unsigned(Context *c, uint64_t x) { AstNode *node = create_node(c, NodeTypeNumberLiteral); node->data.number_literal.kind = NumLitUInt; @@ -183,6 +197,12 @@ static AstNode *create_num_lit_unsigned(Context *c, uint64_t x) { return node; } +static AstNode *create_num_lit_unsigned_negative(Context *c, uint64_t x, bool negative) { + AstNode *num_lit_node = create_num_lit_unsigned(c, x); + if (!negative) return num_lit_node; + return create_prefix_node(c, PrefixOpNegation, num_lit_node); +} + static AstNode *create_num_lit_signed(Context *c, int64_t x) { if (x >= 0) { return create_num_lit_unsigned(c, x); @@ -1244,209 +1264,70 @@ static void render_macros(Context *c) { } } -static int parse_c_char_lit(Buf *value, uint8_t *out_c) { - enum State { - StateExpectStartQuot, - StateExpectChar, - StateExpectEndQuot, - StateExpectEnd, - }; - State state = StateExpectStartQuot; - for (int i = 0; i < buf_len(value); i += 1) { - uint8_t c = buf_ptr(value)[i]; - switch (state) { - case StateExpectStartQuot: - switch (c) { - case '\'': - state = StateExpectChar; - break; - default: - return -1; - } - break; - case StateExpectChar: - switch (c) { - case '\\': - case '\'': - return -1; - default: - *out_c = c; - state = StateExpectEndQuot; - } - break; - case StateExpectEndQuot: - switch (c) { - case '\'': - state = StateExpectEnd; - break; - default: - return -1; - } - break; - case StateExpectEnd: - return -1; - } - } - return (state == StateExpectEnd) ? 0 : -1; -} - -static int parse_c_num_lit_unsigned(Buf *buf, uint64_t *out_val) { - char *temp; - *out_val = strtoull(buf_ptr(buf), &temp, 0); - - if (temp == buf_ptr(buf) || *temp != 0 || *out_val == ULLONG_MAX) { - return -1; - } - - return 0; -} - -static bool is_simple_symbol(Buf *buf) { - bool first = true; - for (int i = 0; i < buf_len(buf); i += 1) { - uint8_t c = buf_ptr(buf)[i]; - bool valid_alpha = (c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || c == '_'; - bool valid_digit = (c >= '0' && c <= '9'); - - bool ok = (valid_alpha || (!first && valid_digit)); - first = false; - - if (!ok) { - return false; - } - } - return true; -} - -enum ParseCStrState { - ParseCStrStateExpectQuot, - ParseCStrStateNormal, - ParseCStrStateEscape, -}; - -static int parse_c_str_lit(Buf *buf, Buf *out_str) { - ParseCStrState state = ParseCStrStateExpectQuot; - buf_resize(out_str, 0); - - for (int i = 0; i < buf_len(buf); i += 1) { - uint8_t c = buf_ptr(buf)[i]; - switch (state) { - case ParseCStrStateExpectQuot: - if (c == '"') { - state = ParseCStrStateNormal; - } else { - return -1; - } - break; - case ParseCStrStateNormal: - switch (c) { - case '\\': - state = ParseCStrStateEscape; - break; - case '\n': - return -1; - case '"': - return 0; - default: - buf_append_char(out_str, c); - } - break; - case ParseCStrStateEscape: - switch (c) { - case '\'': - buf_append_char(out_str, '\''); - state = ParseCStrStateNormal; - break; - case '"': - buf_append_char(out_str, '"'); - state = ParseCStrStateNormal; - break; - case '?': - buf_append_char(out_str, '\?'); - state = ParseCStrStateNormal; - break; - case '\\': - buf_append_char(out_str, '\\'); - state = ParseCStrStateNormal; - break; - case 'a': - buf_append_char(out_str, '\a'); - state = ParseCStrStateNormal; - break; - case 'b': - buf_append_char(out_str, '\b'); - state = ParseCStrStateNormal; - break; - case 'f': - buf_append_char(out_str, '\f'); - state = ParseCStrStateNormal; - break; - case 'n': - buf_append_char(out_str, '\n'); - state = ParseCStrStateNormal; - break; - case 'r': - buf_append_char(out_str, '\r'); - state = ParseCStrStateNormal; - break; - case 't': - buf_append_char(out_str, '\t'); - state = ParseCStrStateNormal; - break; - case 'v': - buf_append_char(out_str, '\v'); - state = ParseCStrStateNormal; - break; - default: - // TODO octal escape sequence, hexadecimal escape sequence, and - // universal character name - return -1; - } - break; - } - } - - return -1; -} - -static void process_macro(Context *c, Buf *name, Buf *value) { - //fprintf(stderr, "macro '%s' = '%s'\n", buf_ptr(name), buf_ptr(value)); +static void process_macro(Context *c, CTokenize *ctok, Buf *name, const char *char_ptr) { if (is_zig_keyword(name)) { return; } - // maybe it's a character literal - uint8_t ch; - if (!parse_c_char_lit(value, &ch)) { - AstNode *var_node = create_var_decl_node(c, buf_ptr(name), create_char_lit_node(c, ch)); - c->macro_table.put(name, var_node); - return; - } - // maybe it's a string literal - Buf str_lit = BUF_INIT; - if (!parse_c_str_lit(value, &str_lit)) { - AstNode *var_node = create_var_decl_node(c, buf_ptr(name), create_str_lit_node(c, &str_lit)); - c->macro_table.put(name, var_node); + tokenize_c_macro(ctok, (const uint8_t *)char_ptr); + + if (ctok->error) { return; } - // maybe it's an unsigned integer - uint64_t uint; - if (!parse_c_num_lit_unsigned(value, &uint)) { - AstNode *var_node = create_var_decl_node(c, buf_ptr(name), create_num_lit_unsigned(c, uint)); - c->macro_table.put(name, var_node); - return; - } - - // maybe it's a symbol - if (is_simple_symbol(value)) { - // if it equals itself, ignore. for example, from stdio.h: - // #define stdin stdin - if (buf_eql_buf(name, value)) { - return; + bool negate = false; + for (int i = 0; i < ctok->tokens.length; i += 1) { + bool is_first = (i == 0); + bool is_last = (i == ctok->tokens.length - 1); + CTok *tok = &ctok->tokens.at(i); + switch (tok->id) { + case CTokIdCharLit: + if (is_last && is_first) { + AstNode *var_node = create_var_decl_node(c, buf_ptr(name), + create_char_lit_node(c, tok->data.char_lit)); + c->macro_table.put(name, var_node); + } + return; + case CTokIdStrLit: + if (is_last && is_first) { + AstNode *var_node = create_var_decl_node(c, buf_ptr(name), + create_str_lit_node(c, &tok->data.str_lit)); + c->macro_table.put(name, var_node); + } + return; + case CTokIdNumLitInt: + if (is_last) { + AstNode *var_node = create_var_decl_node(c, buf_ptr(name), + create_num_lit_unsigned_negative(c, tok->data.num_lit_int, negate)); + c->macro_table.put(name, var_node); + } + return; + case CTokIdNumLitFloat: + if (is_last) { + AstNode *var_node = create_var_decl_node(c, buf_ptr(name), + create_num_lit_float_negative(c, tok->data.num_lit_float, negate)); + c->macro_table.put(name, var_node); + } + return; + case CTokIdSymbol: + if (is_last && is_first) { + // if it equals itself, ignore. for example, from stdio.h: + // #define stdin stdin + Buf *symbol_name = buf_create_from_buf(&tok->data.symbol); + if (buf_eql_buf(name, symbol_name)) { + return; + } + c->macro_symbols.append({name, symbol_name}); + return; + } + case CTokIdMinus: + if (is_first) { + negate = true; + break; + } else { + return; + } } - c->macro_symbols.append({name, value}); } } @@ -1473,6 +1354,8 @@ static void process_symbol_macros(Context *c) { } static void process_preprocessor_entities(Context *c, ASTUnit &unit) { + CTokenize ctok = {{0}}; + for (PreprocessedEntity *entity : unit.getLocalPreprocessingEntities()) { switch (entity->getKind()) { case PreprocessedEntity::InvalidKind: @@ -1494,16 +1377,7 @@ static void process_preprocessor_entities(Context *c, ASTUnit &unit) { } const char *end_c = c->source_manager->getCharacterData(end_loc); - Buf *value = buf_alloc(); - while (*end_c && *end_c != '\n') { - buf_append_char(value, *end_c); - if (end_c[0] == '\\' && end_c[1] == '\n') { - end_c += 2; - } else { - end_c += 1; - } - } - process_macro(c, buf_create_from_str(name), value); + process_macro(c, &ctok, buf_create_from_str(name), end_c); } } } diff --git a/test/run_tests.cpp b/test/run_tests.cpp index 3e665dbef5..a08b3ea047 100644 --- a/test/run_tests.cpp +++ b/test/run_tests.cpp @@ -1390,6 +1390,10 @@ extern void (*fn_ptr)(void); add_parseh_case("__cdecl doesn't mess up function pointers", R"SOURCE( void foo(void (__cdecl *fn_ptr)(void)); )SOURCE", 1, "pub extern fn foo(fn_ptr: ?extern fn());"); + + add_parseh_case("comment after integer literal", R"SOURCE( +#define SDL_INIT_VIDEO 0x00000020 /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */ + )SOURCE", 1, "pub const SDL_INIT_VIDEO = 32;"); } static void run_self_hosted_test(void) {