stage2: tokenizer does not emit line comments anymore

only std.zig.render cares about these, and it can find them in the
original source easily enough.
This commit is contained in:
Andrew Kelley 2021-01-31 21:57:48 -07:00
parent 4dca99d3f6
commit bf8fafc37d
2 changed files with 19 additions and 39 deletions

View File

@ -28,7 +28,6 @@ pub fn parse(gpa: *Allocator, source: []const u8) Allocator.Error!Tree {
var tokenizer = std.zig.Tokenizer.init(source);
while (true) {
const token = tokenizer.next();
if (token.tag == .LineComment) continue;
try tokens.append(gpa, .{
.tag = token.tag,
.start = @intCast(u32, token.loc.start),

View File

@ -43,7 +43,6 @@ pub const Token = struct {
.{ "if", .Keyword_if },
.{ "inline", .Keyword_inline },
.{ "noalias", .Keyword_noalias },
.{ "noasync", .Keyword_nosuspend }, // TODO: remove this
.{ "noinline", .Keyword_noinline },
.{ "nosuspend", .Keyword_nosuspend },
.{ "null", .Keyword_null },
@ -141,10 +140,8 @@ pub const Token = struct {
Tilde,
IntegerLiteral,
FloatLiteral,
LineComment,
DocComment,
ContainerDocComment,
ShebangLine,
Keyword_align,
Keyword_allowzero,
Keyword_and,
@ -211,10 +208,8 @@ pub const Token = struct {
.Builtin => "Builtin",
.IntegerLiteral => "IntegerLiteral",
.FloatLiteral => "FloatLiteral",
.LineComment => "LineComment",
.DocComment => "DocComment",
.ContainerDocComment => "ContainerDocComment",
.ShebangLine => "ShebangLine",
.Bang => "!",
.Pipe => "|",
@ -1016,7 +1011,6 @@ pub const Tokenizer = struct {
.slash => switch (c) {
'/' => {
state = .line_comment_start;
result.tag = .LineComment;
},
'=' => {
result.tag = .SlashEqual;
@ -1036,7 +1030,7 @@ pub const Tokenizer = struct {
result.tag = .ContainerDocComment;
state = .container_doc_comment;
},
'\n' => break,
'\n' => state = .start,
'\t', '\r' => state = .line_comment,
else => {
state = .line_comment;
@ -1061,7 +1055,12 @@ pub const Tokenizer = struct {
self.checkLiteralCharacter();
},
},
.line_comment, .doc_comment, .container_doc_comment => switch (c) {
.line_comment => switch (c) {
'\n' => state = .start,
'\t', '\r' => {},
else => self.checkLiteralCharacter(),
},
.doc_comment, .container_doc_comment => switch (c) {
'\n' => break,
'\t', '\r' => {},
else => self.checkLiteralCharacter(),
@ -1324,6 +1323,8 @@ pub const Tokenizer = struct {
.string_literal, // find this error later
.multiline_string_literal_line,
.builtin,
.line_comment,
.line_comment_start,
=> {},
.identifier => {
@ -1331,9 +1332,6 @@ pub const Tokenizer = struct {
result.tag = tag;
}
},
.line_comment, .line_comment_start => {
result.tag = .LineComment;
},
.doc_comment, .doc_comment_start => {
result.tag = .DocComment;
},
@ -1614,77 +1612,63 @@ test "tokenizer - invalid literal/comment characters" {
.Invalid,
});
testTokenize("//\x00", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\x1f", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\x7f", &[_]Token.Tag{
.LineComment,
.Invalid,
});
}
test "tokenizer - utf8" {
testTokenize("//\xc2\x80", &[_]Token.Tag{.LineComment});
testTokenize("//\xf4\x8f\xbf\xbf", &[_]Token.Tag{.LineComment});
testTokenize("//\xc2\x80", &[_]Token.Tag{});
testTokenize("//\xf4\x8f\xbf\xbf", &[_]Token.Tag{});
}
test "tokenizer - invalid utf8" {
testTokenize("//\x80", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\xbf", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\xf8", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\xff", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\xc2\xc0", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\xe0", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\xf0", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\xf0\x90\x80\xc0", &[_]Token.Tag{
.LineComment,
.Invalid,
});
}
test "tokenizer - illegal unicode codepoints" {
// unicode newline characters.U+0085, U+2028, U+2029
testTokenize("//\xc2\x84", &[_]Token.Tag{.LineComment});
testTokenize("//\xc2\x84", &[_]Token.Tag{});
testTokenize("//\xc2\x85", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\xc2\x86", &[_]Token.Tag{.LineComment});
testTokenize("//\xe2\x80\xa7", &[_]Token.Tag{.LineComment});
testTokenize("//\xc2\x86", &[_]Token.Tag{});
testTokenize("//\xe2\x80\xa7", &[_]Token.Tag{});
testTokenize("//\xe2\x80\xa8", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\xe2\x80\xa9", &[_]Token.Tag{
.LineComment,
.Invalid,
});
testTokenize("//\xe2\x80\xaa", &[_]Token.Tag{.LineComment});
testTokenize("//\xe2\x80\xaa", &[_]Token.Tag{});
}
test "tokenizer - string identifier and builtin fns" {
@ -1719,10 +1703,8 @@ test "tokenizer - comments with literal tab" {
\\/// foo
\\/// /foo
, &[_]Token.Tag{
.LineComment,
.ContainerDocComment,
.DocComment,
.LineComment,
.DocComment,
.DocComment,
});
@ -1736,12 +1718,12 @@ test "tokenizer - pipe and then invalid" {
}
test "tokenizer - line comment and doc comment" {
testTokenize("//", &[_]Token.Tag{.LineComment});
testTokenize("// a / b", &[_]Token.Tag{.LineComment});
testTokenize("// /", &[_]Token.Tag{.LineComment});
testTokenize("//", &[_]Token.Tag{});
testTokenize("// a / b", &[_]Token.Tag{});
testTokenize("// /", &[_]Token.Tag{});
testTokenize("/// a", &[_]Token.Tag{.DocComment});
testTokenize("///", &[_]Token.Tag{.DocComment});
testTokenize("////", &[_]Token.Tag{.LineComment});
testTokenize("////", &[_]Token.Tag{});
testTokenize("//!", &[_]Token.Tag{.ContainerDocComment});
testTokenize("//!!", &[_]Token.Tag{.ContainerDocComment});
}
@ -1754,7 +1736,6 @@ test "tokenizer - line comment followed by identifier" {
, &[_]Token.Tag{
.Identifier,
.Comma,
.LineComment,
.Identifier,
.Comma,
});