From dfbb8254ca97154b5314bde03655417c1dca86ae Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 12 Feb 2018 21:25:38 -0500
Subject: [PATCH 1/4] fix self hosted tokenizer handling of EOF

---
 std/zig/ast.zig       |  32 ++++++-------
 std/zig/tokenizer.zig | 102 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 107 insertions(+), 27 deletions(-)

diff --git a/std/zig/ast.zig b/std/zig/ast.zig
index a966c0316e..60824b22b8 100644
--- a/std/zig/ast.zig
+++ b/std/zig/ast.zig
@@ -18,6 +18,7 @@ pub const Node = struct {
         PrefixOp,
         IntegerLiteral,
         FloatLiteral,
+        BuiltinCall,
     };
 
     pub fn iterate(base: &Node, index: usize) ?&Node {
@@ -32,21 +33,7 @@ pub const Node = struct {
             Id.PrefixOp => @fieldParentPtr(NodePrefixOp, "base", base).iterate(index),
             Id.IntegerLiteral => @fieldParentPtr(NodeIntegerLiteral, "base", base).iterate(index),
             Id.FloatLiteral => @fieldParentPtr(NodeFloatLiteral, "base", base).iterate(index),
-        };
-    }
-
-    pub fn destroy(base: &Node, allocator: &mem.Allocator) void {
-        return switch (base.id) {
-            Id.Root => allocator.destroy(@fieldParentPtr(NodeRoot, "base", base)),
-            Id.VarDecl => allocator.destroy(@fieldParentPtr(NodeVarDecl, "base", base)),
-            Id.Identifier => allocator.destroy(@fieldParentPtr(NodeIdentifier, "base", base)),
-            Id.FnProto => allocator.destroy(@fieldParentPtr(NodeFnProto, "base", base)),
-            Id.ParamDecl => allocator.destroy(@fieldParentPtr(NodeParamDecl, "base", base)),
-            Id.Block => allocator.destroy(@fieldParentPtr(NodeBlock, "base", base)),
-            Id.InfixOp => allocator.destroy(@fieldParentPtr(NodeInfixOp, "base", base)),
-            Id.PrefixOp => allocator.destroy(@fieldParentPtr(NodePrefixOp, "base", base)),
-            Id.IntegerLiteral => allocator.destroy(@fieldParentPtr(NodeIntegerLiteral, "base", base)),
-            Id.FloatLiteral => allocator.destroy(@fieldParentPtr(NodeFloatLiteral, "base", base)),
+            Id.BuiltinCall => @fieldParentPtr(NodeBuiltinCall, "base", base).iterate(index),
         };
     }
 };
@@ -269,3 +256,18 @@ pub const NodeFloatLiteral = struct {
         return null;
     }
 };
+
+pub const NodeBuiltinCall = struct {
+    base: Node,
+    builtin_token: Token,
+    params: ArrayList(&Node),
+
+    pub fn iterate(self: &NodeBuiltinCall, index: usize) ?&Node {
+        var i = index;
+
+        if (i < self.params.len) return self.params.at(i);
+        i -= self.params.len;
+
+        return null;
+    }
+};
diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig
index 546356caa3..694a036f97 100644
--- a/std/zig/tokenizer.zig
+++ b/std/zig/tokenizer.zig
@@ -68,6 +68,7 @@ pub const Token = struct {
         Invalid,
         Identifier,
         StringLiteral: StrLitKind,
+        StringIdentifier,
         Eof,
         Builtin,
         Bang,
@@ -205,6 +206,7 @@ pub const Tokenizer = struct {
         Ampersand,
         Period,
         Period2,
+        SawAtSign,
     };
 
     pub fn next(self: &Tokenizer) Token {
@@ -238,8 +240,7 @@ pub const Tokenizer = struct {
                         result.id = Token.Id.Identifier;
                     },
                     '@' => {
-                        state = State.Builtin;
-                        result.id = Token.Id.Builtin;
+                        state = State.SawAtSign;
                     },
                     '=' => {
                         state = State.Equal;
@@ -313,6 +314,20 @@ pub const Tokenizer = struct {
                         break;
                     },
                 },
+
+                State.SawAtSign => switch (c) {
+                    '"' => {
+                        result.id = Token.Id.StringIdentifier;
+                        state = State.StringLiteral;
+                    },
+                    else => {
+                        // reinterpret as a builtin
+                        self.index -= 1;
+                        state = State.Builtin;
+                        result.id = Token.Id.Builtin;
+                    },
+                },
+
                 State.Ampersand => switch (c) {
                     '=' => {
                         result.id = Token.Id.AmpersandEqual;
@@ -512,7 +527,59 @@ pub const Tokenizer = struct {
             }
         }
         result.end = self.index;
+        if (self.index == self.buffer.len) {
+            switch (state) {
+                State.Start,
+                State.C,
+                State.IntegerLiteral,
+                State.IntegerLiteralWithRadix,
+                State.FloatFraction,
+                State.FloatExponentNumber,
+                State.StringLiteral, // find this error later
+                State.Builtin => {},
 
+                State.Identifier => {
+                    if (Token.getKeyword(self.buffer[result.start..self.index])) |id| {
+                        result.id = id;
+                    }
+                },
+                State.LineComment => {
+                    result.id = Token.Id.Eof;
+                },
+
+                State.NumberDot,
+                State.FloatExponentUnsigned,
+                State.SawAtSign,
+                State.StringLiteralBackslash => {
+                    result.id = Token.Id.Invalid;
+                },
+
+                State.Equal => {
+                    result.id = Token.Id.Equal;
+                },
+                State.Bang => {
+                    result.id = Token.Id.Bang;
+                },
+                State.Minus => {
+                    result.id = Token.Id.Minus;
+                },
+                State.Slash => {
+                    result.id = Token.Id.Slash;
+                },
+                State.Zero => {
+                    result.id = Token.Id.IntegerLiteral;
+                },
+                State.Ampersand => {
+                    result.id = Token.Id.Ampersand;
+                },
+                State.Period => {
+                    result.id = Token.Id.Period;
+                },
+                State.Period2 => {
+                    result.id = Token.Id.Ellipsis2;
+                },
+            }
+        }
         if (result.id == Token.Id.Eof) {
             if (self.pending_invalid_token) |token| {
                 self.pending_invalid_token = null;
@@ -551,7 +618,7 @@ pub const Tokenizer = struct {
         } else {
             // check utf8-encoded character.
             const length = std.unicode.utf8ByteSequenceLength(c0) catch return 1;
-            if (self.index + length >= self.buffer.len) {
+            if (self.index + length > self.buffer.len) {
                 return u3(self.buffer.len - self.index);
             }
             const bytes = self.buffer[self.index..self.index + length];
@@ -632,15 +699,25 @@ test "tokenizer - illegal unicode codepoints" {
     testTokenize("//\xe2\x80\xaa", []Token.Id{});
 }
 
-fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
-    // (test authors, just make this bigger if you need it)
-    var padded_source: [0x100]u8 = undefined;
-    std.mem.copy(u8, padded_source[0..source.len], source);
-    padded_source[source.len + 0] = '\n';
-    padded_source[source.len + 1] = '\n';
-    padded_source[source.len + 2] = '\n';
+test "tokenizer - string identifier and builtin fns" {
+    testTokenize(
+        \\const @"if" = @import("std");
+    ,
+        []Token.Id{
+            Token.Id.Keyword_const,
+            Token.Id.StringIdentifier,
+            Token.Id.Equal,
+            Token.Id.Builtin,
+            Token.Id.LParen,
+            Token.Id {.StringLiteral = Token.StrLitKind.Normal},
+            Token.Id.RParen,
+            Token.Id.Semicolon,
+        }
+    );
+}
 
-    var tokenizer = Tokenizer.init(padded_source[0..source.len + 3]);
+fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
+    var tokenizer = Tokenizer.init(source);
     for (expected_tokens) |expected_token_id| {
         const token = tokenizer.next();
         std.debug.assert(@TagType(Token.Id)(token.id) == @TagType(Token.Id)(expected_token_id));
@@ -651,5 +728,6 @@ fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
             else => {},
         }
     }
-    std.debug.assert(tokenizer.next().id == Token.Id.Eof);
+    const last_token = tokenizer.next();
+    std.debug.assert(last_token.id == Token.Id.Eof);
 }

From 2dcff95bd2fb8f377491ac48b0ecf961183abcd7 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Tue, 13 Feb 2018 10:28:55 -0500
Subject: [PATCH 2/4] self hosted: add tokenizer test fix eof handling

---
 std/zig/tokenizer.zig | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig
index 694a036f97..de1263ac55 100644
--- a/std/zig/tokenizer.zig
+++ b/std/zig/tokenizer.zig
@@ -72,6 +72,8 @@ pub const Token = struct {
         Eof,
         Builtin,
         Bang,
+        Pipe,
+        PipeEqual,
         Equal,
         EqualEqual,
         BangEqual,
@@ -193,6 +195,7 @@ pub const Tokenizer = struct {
         StringLiteralBackslash,
         Equal,
         Bang,
+        Pipe,
         Minus,
         Slash,
         LineComment,
@@ -248,6 +251,9 @@ pub const Tokenizer = struct {
                     '!' => {
                         state = State.Bang;
                     },
+                    '|' => {
+                        state = State.Pipe;
+                    },
                     '(' => {
                         result.id = Token.Id.LParen;
                         self.index += 1;
@@ -394,6 +400,18 @@ pub const Tokenizer = struct {
                     },
                 },
 
+                State.Pipe => switch (c) {
+                    '=' => {
+                        result.id = Token.Id.PipeEqual;
+                        self.index += 1;
+                        break;
+                    },
+                    else => {
+                        result.id = Token.Id.Pipe;
+                        break;
+                    },
+                },
+
                 State.Equal => switch (c) {
                     '=' => {
                         result.id = Token.Id.EqualEqual;
@@ -525,9 +543,7 @@ pub const Tokenizer = struct {
                     else => break,
                 },
             }
-        }
-        result.end = self.index;
-        if (self.index == self.buffer.len) {
+        } else if (self.index == self.buffer.len) {
             switch (state) {
                 State.Start,
                 State.C,
@@ -578,6 +594,9 @@ pub const Tokenizer = struct {
                 State.Period2 => {
                     result.id = Token.Id.Ellipsis2;
                 },
+                State.Pipe => {
+                    result.id = Token.Id.Pipe;
+                },
             }
         }
         if (result.id == Token.Id.Eof) {
@@ -587,6 +606,7 @@ pub const Tokenizer = struct {
             }
         }
 
+        result.end = self.index;
         return result;
     }
 
@@ -716,6 +736,13 @@ test "tokenizer - string identifier and builtin fns" {
     );
 }
 
+test "tokenizer - pipe and then invalid" {
+    testTokenize("||=", []Token.Id{
+        Token.Id.Pipe,
+        Token.Id.PipeEqual,
+    });
+}
+
 fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
     var tokenizer = Tokenizer.init(source);
     for (expected_tokens) |expected_token_id| {

From 02f70cda8a9fb038705e03b6e65625119bdef4e7 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Tue, 13 Feb 2018 10:54:46 -0500
Subject: [PATCH 3/4] zig_llvm.cpp uses new(std::nothrow)

This fixes a mismatched malloc/delete because
we were allocating with malloc and then llvm was
freeing with delete.
---
 src/zig_llvm.cpp | 35 ++++++++---------------------------
 1 file changed, 8 insertions(+), 27 deletions(-)

diff --git a/src/zig_llvm.cpp b/src/zig_llvm.cpp
index 3e92752d9f..97c07ab820 100644
--- a/src/zig_llvm.cpp
+++ b/src/zig_llvm.cpp
@@ -43,31 +43,8 @@
 
 #include <stdlib.h>
 
-#if defined(_MSC_VER)
-#define ATTRIBUTE_RETURNS_NOALIAS __declspec(restrict)
-#else
-#define ATTRIBUTE_RETURNS_NOALIAS __attribute__((__malloc__))
-#endif
-
 using namespace llvm;
 
-template<typename T, typename... Args>
-ATTRIBUTE_RETURNS_NOALIAS static inline T * create(Args... args) {
-    T * ptr = reinterpret_cast<T*>(malloc(sizeof(T)));
-    if (ptr == nullptr)
-        return nullptr;
-    new (ptr) T(args...);
-    return ptr;
-}
-
-template<typename T>
-static inline void destroy(T * ptr) {
-    if (ptr != nullptr) {
-        ptr[0].~T();
-    }
-    free(ptr);
-}
-
 void ZigLLVMInitializeLoopStrengthReducePass(LLVMPassRegistryRef R) {
     initializeLoopStrengthReducePass(*unwrap(R));
 }
@@ -116,7 +93,11 @@ bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMM
 
     Module* module = unwrap(module_ref);
 
-    PassManagerBuilder *PMBuilder = create<PassManagerBuilder>();
+    PassManagerBuilder *PMBuilder = new(std::nothrow) PassManagerBuilder();
+    if (PMBuilder == nullptr) {
+        *error_message = strdup("memory allocation failure");
+        return true;
+    }
     PMBuilder->OptLevel = target_machine->getOptLevel();
     PMBuilder->SizeLevel = 0;
 
@@ -150,7 +131,8 @@ bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMM
 
     // Set up the per-function pass manager.
     legacy::FunctionPassManager FPM = legacy::FunctionPassManager(module);
-    FPM.add(create<TargetLibraryInfoWrapperPass>(tlii));
+    auto tliwp = new(std::nothrow) TargetLibraryInfoWrapperPass(tlii);
+    FPM.add(tliwp);
     FPM.add(createTargetTransformInfoWrapperPass(target_machine->getTargetIRAnalysis()));
     if (assertions_on) {
         FPM.add(createVerifierPass());
@@ -446,10 +428,9 @@ unsigned ZigLLVMTag_DW_union_type(void) {
 }
 
 ZigLLVMDIBuilder *ZigLLVMCreateDIBuilder(LLVMModuleRef module, bool allow_unresolved) {
-    DIBuilder *di_builder = reinterpret_cast<DIBuilder*>(malloc(sizeof(DIBuilder)));
+    DIBuilder *di_builder = new(std::nothrow) DIBuilder(*unwrap(module), allow_unresolved);
     if (di_builder == nullptr)
         return nullptr;
-    new (di_builder) DIBuilder(*unwrap(module), allow_unresolved);
     return reinterpret_cast<ZigLLVMDIBuilder *>(di_builder);
 }
 

From c721354b73508ec53bf72d8e7fb304147676625d Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Tue, 13 Feb 2018 11:17:26 -0500
Subject: [PATCH 4/4] correct doc comment in self hosted parser

---
 std/zig/parser.zig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/std/zig/parser.zig b/std/zig/parser.zig
index 4dbca8501e..792cc2d834 100644
--- a/std/zig/parser.zig
+++ b/std/zig/parser.zig
@@ -95,7 +95,8 @@ pub const Parser = struct {
     };
 
     /// Returns an AST tree, allocated with the parser's allocator.
-    /// Result should be freed with `freeAst` when done.
+    /// Result should be freed with tree.deinit() when there are
+    /// no more references to any AST nodes of the tree.
     pub fn parse(self: &Parser) !Tree {
         var stack = self.initUtilityArrayList(State);
         defer self.deinitUtilityArrayList(stack);