From dfbb8254ca97154b5314bde03655417c1dca86ae Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 12 Feb 2018 21:25:38 -0500 Subject: [PATCH 1/4] fix self hosted tokenizer handling of EOF --- std/zig/ast.zig | 32 ++++++------- std/zig/tokenizer.zig | 102 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 107 insertions(+), 27 deletions(-) diff --git a/std/zig/ast.zig b/std/zig/ast.zig index a966c0316e..60824b22b8 100644 --- a/std/zig/ast.zig +++ b/std/zig/ast.zig @@ -18,6 +18,7 @@ pub const Node = struct { PrefixOp, IntegerLiteral, FloatLiteral, + BuiltinCall, }; pub fn iterate(base: &Node, index: usize) ?&Node { @@ -32,21 +33,7 @@ pub const Node = struct { Id.PrefixOp => @fieldParentPtr(NodePrefixOp, "base", base).iterate(index), Id.IntegerLiteral => @fieldParentPtr(NodeIntegerLiteral, "base", base).iterate(index), Id.FloatLiteral => @fieldParentPtr(NodeFloatLiteral, "base", base).iterate(index), - }; - } - - pub fn destroy(base: &Node, allocator: &mem.Allocator) void { - return switch (base.id) { - Id.Root => allocator.destroy(@fieldParentPtr(NodeRoot, "base", base)), - Id.VarDecl => allocator.destroy(@fieldParentPtr(NodeVarDecl, "base", base)), - Id.Identifier => allocator.destroy(@fieldParentPtr(NodeIdentifier, "base", base)), - Id.FnProto => allocator.destroy(@fieldParentPtr(NodeFnProto, "base", base)), - Id.ParamDecl => allocator.destroy(@fieldParentPtr(NodeParamDecl, "base", base)), - Id.Block => allocator.destroy(@fieldParentPtr(NodeBlock, "base", base)), - Id.InfixOp => allocator.destroy(@fieldParentPtr(NodeInfixOp, "base", base)), - Id.PrefixOp => allocator.destroy(@fieldParentPtr(NodePrefixOp, "base", base)), - Id.IntegerLiteral => allocator.destroy(@fieldParentPtr(NodeIntegerLiteral, "base", base)), - Id.FloatLiteral => allocator.destroy(@fieldParentPtr(NodeFloatLiteral, "base", base)), + Id.BuiltinCall => @fieldParentPtr(NodeBuiltinCall, "base", base).iterate(index), }; } }; @@ -269,3 +256,18 @@ pub const NodeFloatLiteral = struct { return null; } }; + +pub const NodeBuiltinCall = struct { + base: Node, + builtin_token: Token, + params: ArrayList(&Node), + + pub fn iterate(self: &NodeBuiltinCall, index: usize) ?&Node { + var i = index; + + if (i < self.params.len) return self.params.at(i); + i -= self.params.len; + + return null; + } +}; diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig index 546356caa3..694a036f97 100644 --- a/std/zig/tokenizer.zig +++ b/std/zig/tokenizer.zig @@ -68,6 +68,7 @@ pub const Token = struct { Invalid, Identifier, StringLiteral: StrLitKind, + StringIdentifier, Eof, Builtin, Bang, @@ -205,6 +206,7 @@ pub const Tokenizer = struct { Ampersand, Period, Period2, + SawAtSign, }; pub fn next(self: &Tokenizer) Token { @@ -238,8 +240,7 @@ pub const Tokenizer = struct { result.id = Token.Id.Identifier; }, '@' => { - state = State.Builtin; - result.id = Token.Id.Builtin; + state = State.SawAtSign; }, '=' => { state = State.Equal; @@ -313,6 +314,20 @@ pub const Tokenizer = struct { break; }, }, + + State.SawAtSign => switch (c) { + '"' => { + result.id = Token.Id.StringIdentifier; + state = State.StringLiteral; + }, + else => { + // reinterpret as a builtin + self.index -= 1; + state = State.Builtin; + result.id = Token.Id.Builtin; + }, + }, + State.Ampersand => switch (c) { '=' => { result.id = Token.Id.AmpersandEqual; @@ -512,7 +527,59 @@ pub const Tokenizer = struct { } } result.end = self.index; + if (self.index == self.buffer.len) { + switch (state) { + State.Start, + State.C, + State.IntegerLiteral, + State.IntegerLiteralWithRadix, + State.FloatFraction, + State.FloatExponentNumber, + State.StringLiteral, // find this error later + State.Builtin => {}, + State.Identifier => { + if (Token.getKeyword(self.buffer[result.start..self.index])) |id| { + result.id = id; + } + }, + State.LineComment => { + result.id = Token.Id.Eof; + }, + + State.NumberDot, + State.FloatExponentUnsigned, + State.SawAtSign, + State.StringLiteralBackslash => { + result.id = Token.Id.Invalid; + }, + + State.Equal => { + result.id = Token.Id.Equal; + }, + State.Bang => { + result.id = Token.Id.Bang; + }, + State.Minus => { + result.id = Token.Id.Minus; + }, + State.Slash => { + result.id = Token.Id.Slash; + }, + State.Zero => { + result.id = Token.Id.IntegerLiteral; + }, + State.Ampersand => { + result.id = Token.Id.Ampersand; + }, + State.Period => { + result.id = Token.Id.Period; + }, + State.Period2 => { + result.id = Token.Id.Ellipsis2; + }, + } + } if (result.id == Token.Id.Eof) { if (self.pending_invalid_token) |token| { self.pending_invalid_token = null; @@ -551,7 +618,7 @@ pub const Tokenizer = struct { } else { // check utf8-encoded character. const length = std.unicode.utf8ByteSequenceLength(c0) catch return 1; - if (self.index + length >= self.buffer.len) { + if (self.index + length > self.buffer.len) { return u3(self.buffer.len - self.index); } const bytes = self.buffer[self.index..self.index + length]; @@ -632,15 +699,25 @@ test "tokenizer - illegal unicode codepoints" { testTokenize("//\xe2\x80\xaa", []Token.Id{}); } -fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void { - // (test authors, just make this bigger if you need it) - var padded_source: [0x100]u8 = undefined; - std.mem.copy(u8, padded_source[0..source.len], source); - padded_source[source.len + 0] = '\n'; - padded_source[source.len + 1] = '\n'; - padded_source[source.len + 2] = '\n'; +test "tokenizer - string identifier and builtin fns" { + testTokenize( + \\const @"if" = @import("std"); + , + []Token.Id{ + Token.Id.Keyword_const, + Token.Id.StringIdentifier, + Token.Id.Equal, + Token.Id.Builtin, + Token.Id.LParen, + Token.Id {.StringLiteral = Token.StrLitKind.Normal}, + Token.Id.RParen, + Token.Id.Semicolon, + } + ); +} - var tokenizer = Tokenizer.init(padded_source[0..source.len + 3]); +fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void { + var tokenizer = Tokenizer.init(source); for (expected_tokens) |expected_token_id| { const token = tokenizer.next(); std.debug.assert(@TagType(Token.Id)(token.id) == @TagType(Token.Id)(expected_token_id)); @@ -651,5 +728,6 @@ fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void { else => {}, } } - std.debug.assert(tokenizer.next().id == Token.Id.Eof); + const last_token = tokenizer.next(); + std.debug.assert(last_token.id == Token.Id.Eof); } From 2dcff95bd2fb8f377491ac48b0ecf961183abcd7 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 13 Feb 2018 10:28:55 -0500 Subject: [PATCH 2/4] self hosted: add tokenizer test fix eof handling --- std/zig/tokenizer.zig | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig index 694a036f97..de1263ac55 100644 --- a/std/zig/tokenizer.zig +++ b/std/zig/tokenizer.zig @@ -72,6 +72,8 @@ pub const Token = struct { Eof, Builtin, Bang, + Pipe, + PipeEqual, Equal, EqualEqual, BangEqual, @@ -193,6 +195,7 @@ pub const Tokenizer = struct { StringLiteralBackslash, Equal, Bang, + Pipe, Minus, Slash, LineComment, @@ -248,6 +251,9 @@ pub const Tokenizer = struct { '!' => { state = State.Bang; }, + '|' => { + state = State.Pipe; + }, '(' => { result.id = Token.Id.LParen; self.index += 1; @@ -394,6 +400,18 @@ pub const Tokenizer = struct { }, }, + State.Pipe => switch (c) { + '=' => { + result.id = Token.Id.PipeEqual; + self.index += 1; + break; + }, + else => { + result.id = Token.Id.Pipe; + break; + }, + }, + State.Equal => switch (c) { '=' => { result.id = Token.Id.EqualEqual; @@ -525,9 +543,7 @@ pub const Tokenizer = struct { else => break, }, } - } - result.end = self.index; - if (self.index == self.buffer.len) { + } else if (self.index == self.buffer.len) { switch (state) { State.Start, State.C, @@ -578,6 +594,9 @@ pub const Tokenizer = struct { State.Period2 => { result.id = Token.Id.Ellipsis2; }, + State.Pipe => { + result.id = Token.Id.Pipe; + }, } } if (result.id == Token.Id.Eof) { @@ -587,6 +606,7 @@ pub const Tokenizer = struct { } } + result.end = self.index; return result; } @@ -716,6 +736,13 @@ test "tokenizer - string identifier and builtin fns" { ); } +test "tokenizer - pipe and then invalid" { + testTokenize("||=", []Token.Id{ + Token.Id.Pipe, + Token.Id.PipeEqual, + }); +} + fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void { var tokenizer = Tokenizer.init(source); for (expected_tokens) |expected_token_id| { From 02f70cda8a9fb038705e03b6e65625119bdef4e7 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 13 Feb 2018 10:54:46 -0500 Subject: [PATCH 3/4] zig_llvm.cpp uses new(std::nothrow) This fixes a mismatched malloc/delete because we were allocating with malloc and then llvm was freeing with delete. --- src/zig_llvm.cpp | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/src/zig_llvm.cpp b/src/zig_llvm.cpp index 3e92752d9f..97c07ab820 100644 --- a/src/zig_llvm.cpp +++ b/src/zig_llvm.cpp @@ -43,31 +43,8 @@ #include -#if defined(_MSC_VER) -#define ATTRIBUTE_RETURNS_NOALIAS __declspec(restrict) -#else -#define ATTRIBUTE_RETURNS_NOALIAS __attribute__((__malloc__)) -#endif - using namespace llvm; -template -ATTRIBUTE_RETURNS_NOALIAS static inline T * create(Args... args) { - T * ptr = reinterpret_cast(malloc(sizeof(T))); - if (ptr == nullptr) - return nullptr; - new (ptr) T(args...); - return ptr; -} - -template -static inline void destroy(T * ptr) { - if (ptr != nullptr) { - ptr[0].~T(); - } - free(ptr); -} - void ZigLLVMInitializeLoopStrengthReducePass(LLVMPassRegistryRef R) { initializeLoopStrengthReducePass(*unwrap(R)); } @@ -116,7 +93,11 @@ bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMM Module* module = unwrap(module_ref); - PassManagerBuilder *PMBuilder = create(); + PassManagerBuilder *PMBuilder = new(std::nothrow) PassManagerBuilder(); + if (PMBuilder == nullptr) { + *error_message = strdup("memory allocation failure"); + return true; + } PMBuilder->OptLevel = target_machine->getOptLevel(); PMBuilder->SizeLevel = 0; @@ -150,7 +131,8 @@ bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMM // Set up the per-function pass manager. legacy::FunctionPassManager FPM = legacy::FunctionPassManager(module); - FPM.add(create(tlii)); + auto tliwp = new(std::nothrow) TargetLibraryInfoWrapperPass(tlii); + FPM.add(tliwp); FPM.add(createTargetTransformInfoWrapperPass(target_machine->getTargetIRAnalysis())); if (assertions_on) { FPM.add(createVerifierPass()); @@ -446,10 +428,9 @@ unsigned ZigLLVMTag_DW_union_type(void) { } ZigLLVMDIBuilder *ZigLLVMCreateDIBuilder(LLVMModuleRef module, bool allow_unresolved) { - DIBuilder *di_builder = reinterpret_cast(malloc(sizeof(DIBuilder))); + DIBuilder *di_builder = new(std::nothrow) DIBuilder(*unwrap(module), allow_unresolved); if (di_builder == nullptr) return nullptr; - new (di_builder) DIBuilder(*unwrap(module), allow_unresolved); return reinterpret_cast(di_builder); } From c721354b73508ec53bf72d8e7fb304147676625d Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 13 Feb 2018 11:17:26 -0500 Subject: [PATCH 4/4] correct doc comment in self hosted parser --- std/zig/parser.zig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/std/zig/parser.zig b/std/zig/parser.zig index 4dbca8501e..792cc2d834 100644 --- a/std/zig/parser.zig +++ b/std/zig/parser.zig @@ -95,7 +95,8 @@ pub const Parser = struct { }; /// Returns an AST tree, allocated with the parser's allocator. - /// Result should be freed with `freeAst` when done. + /// Result should be freed with tree.deinit() when there are + /// no more references to any AST nodes of the tree. pub fn parse(self: &Parser) !Tree { var stack = self.initUtilityArrayList(State); defer self.deinitUtilityArrayList(stack);