mirror of
https://github.com/ziglang/zig.git
synced 2024-11-26 23:22:44 +00:00
457 lines
18 KiB
Zig
457 lines
18 KiB
Zig
const std = @import("std");
|
|
const builtin = @import("builtin");
|
|
const io = std.io;
|
|
const fs = std.fs;
|
|
const print = std.debug.print;
|
|
const mem = std.mem;
|
|
const testing = std.testing;
|
|
const Allocator = std.mem.Allocator;
|
|
const max_doc_file_size = 10 * 1024 * 1024;
|
|
const fatal = std.zig.fatal;
|
|
|
|
pub fn main() !void {
|
|
var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
defer arena_instance.deinit();
|
|
const arena = arena_instance.allocator();
|
|
|
|
const args = try std.process.argsAlloc(arena);
|
|
const input_file = args[1];
|
|
const output_file = args[2];
|
|
|
|
var in_file = try fs.cwd().openFile(input_file, .{ .mode = .read_only });
|
|
defer in_file.close();
|
|
|
|
var out_file = try fs.cwd().createFile(output_file, .{});
|
|
defer out_file.close();
|
|
|
|
var out_dir = try fs.cwd().openDir(fs.path.dirname(output_file).?, .{});
|
|
defer out_dir.close();
|
|
|
|
const input_file_bytes = try in_file.reader().readAllAlloc(arena, std.math.maxInt(u32));
|
|
|
|
var buffered_writer = io.bufferedWriter(out_file.writer());
|
|
|
|
var tokenizer = Tokenizer.init(input_file, input_file_bytes);
|
|
|
|
try walk(arena, &tokenizer, out_dir, buffered_writer.writer());
|
|
|
|
try buffered_writer.flush();
|
|
}
|
|
|
|
const Token = struct {
|
|
id: Id,
|
|
start: usize,
|
|
end: usize,
|
|
|
|
const Id = enum {
|
|
invalid,
|
|
content,
|
|
bracket_open,
|
|
tag_content,
|
|
separator,
|
|
bracket_close,
|
|
eof,
|
|
};
|
|
};
|
|
|
|
const Tokenizer = struct {
|
|
buffer: []const u8,
|
|
index: usize,
|
|
state: State,
|
|
source_file_name: []const u8,
|
|
|
|
const State = enum {
|
|
start,
|
|
l_bracket,
|
|
hash,
|
|
tag_name,
|
|
eof,
|
|
};
|
|
|
|
fn init(source_file_name: []const u8, buffer: []const u8) Tokenizer {
|
|
return Tokenizer{
|
|
.buffer = buffer,
|
|
.index = 0,
|
|
.state = .start,
|
|
.source_file_name = source_file_name,
|
|
};
|
|
}
|
|
|
|
fn next(self: *Tokenizer) Token {
|
|
var result = Token{
|
|
.id = .eof,
|
|
.start = self.index,
|
|
.end = undefined,
|
|
};
|
|
while (self.index < self.buffer.len) : (self.index += 1) {
|
|
const c = self.buffer[self.index];
|
|
switch (self.state) {
|
|
.start => switch (c) {
|
|
'{' => {
|
|
self.state = .l_bracket;
|
|
},
|
|
else => {
|
|
result.id = .content;
|
|
},
|
|
},
|
|
.l_bracket => switch (c) {
|
|
'#' => {
|
|
if (result.id != .eof) {
|
|
self.index -= 1;
|
|
self.state = .start;
|
|
break;
|
|
} else {
|
|
result.id = .bracket_open;
|
|
self.index += 1;
|
|
self.state = .tag_name;
|
|
break;
|
|
}
|
|
},
|
|
else => {
|
|
result.id = .content;
|
|
self.state = .start;
|
|
},
|
|
},
|
|
.tag_name => switch (c) {
|
|
'|' => {
|
|
if (result.id != .eof) {
|
|
break;
|
|
} else {
|
|
result.id = .separator;
|
|
self.index += 1;
|
|
break;
|
|
}
|
|
},
|
|
'#' => {
|
|
self.state = .hash;
|
|
},
|
|
else => {
|
|
result.id = .tag_content;
|
|
},
|
|
},
|
|
.hash => switch (c) {
|
|
'}' => {
|
|
if (result.id != .eof) {
|
|
self.index -= 1;
|
|
self.state = .tag_name;
|
|
break;
|
|
} else {
|
|
result.id = .bracket_close;
|
|
self.index += 1;
|
|
self.state = .start;
|
|
break;
|
|
}
|
|
},
|
|
else => {
|
|
result.id = .tag_content;
|
|
self.state = .tag_name;
|
|
},
|
|
},
|
|
.eof => unreachable,
|
|
}
|
|
} else {
|
|
switch (self.state) {
|
|
.start, .l_bracket, .eof => {},
|
|
else => {
|
|
result.id = .invalid;
|
|
},
|
|
}
|
|
self.state = .eof;
|
|
}
|
|
result.end = self.index;
|
|
return result;
|
|
}
|
|
|
|
const Location = struct {
|
|
line: usize,
|
|
column: usize,
|
|
line_start: usize,
|
|
line_end: usize,
|
|
};
|
|
|
|
fn getTokenLocation(self: *Tokenizer, token: Token) Location {
|
|
var loc = Location{
|
|
.line = 0,
|
|
.column = 0,
|
|
.line_start = 0,
|
|
.line_end = 0,
|
|
};
|
|
for (self.buffer, 0..) |c, i| {
|
|
if (i == token.start) {
|
|
loc.line_end = i;
|
|
while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {}
|
|
return loc;
|
|
}
|
|
if (c == '\n') {
|
|
loc.line += 1;
|
|
loc.column = 0;
|
|
loc.line_start = i + 1;
|
|
} else {
|
|
loc.column += 1;
|
|
}
|
|
}
|
|
return loc;
|
|
}
|
|
};
|
|
|
|
fn parseError(tokenizer: *Tokenizer, token: Token, comptime fmt: []const u8, args: anytype) anyerror {
|
|
const loc = tokenizer.getTokenLocation(token);
|
|
const args_prefix = .{ tokenizer.source_file_name, loc.line + 1, loc.column + 1 };
|
|
print("{s}:{d}:{d}: error: " ++ fmt ++ "\n", args_prefix ++ args);
|
|
if (loc.line_start <= loc.line_end) {
|
|
print("{s}\n", .{tokenizer.buffer[loc.line_start..loc.line_end]});
|
|
{
|
|
var i: usize = 0;
|
|
while (i < loc.column) : (i += 1) {
|
|
print(" ", .{});
|
|
}
|
|
}
|
|
{
|
|
const caret_count = @min(token.end, loc.line_end) - token.start;
|
|
var i: usize = 0;
|
|
while (i < caret_count) : (i += 1) {
|
|
print("~", .{});
|
|
}
|
|
}
|
|
print("\n", .{});
|
|
}
|
|
return error.ParseError;
|
|
}
|
|
|
|
fn assertToken(tokenizer: *Tokenizer, token: Token, id: Token.Id) !void {
|
|
if (token.id != id) {
|
|
return parseError(tokenizer, token, "expected {s}, found {s}", .{ @tagName(id), @tagName(token.id) });
|
|
}
|
|
}
|
|
|
|
fn eatToken(tokenizer: *Tokenizer, id: Token.Id) !Token {
|
|
const token = tokenizer.next();
|
|
try assertToken(tokenizer, token, id);
|
|
return token;
|
|
}
|
|
|
|
const ExpectedOutcome = enum {
|
|
succeed,
|
|
fail,
|
|
build_fail,
|
|
};
|
|
|
|
const Code = struct {
|
|
id: Id,
|
|
name: []const u8,
|
|
source_token: Token,
|
|
just_check_syntax: bool,
|
|
mode: std.builtin.OptimizeMode,
|
|
link_objects: []const []const u8,
|
|
target_str: ?[]const u8,
|
|
link_libc: bool,
|
|
link_mode: ?std.builtin.LinkMode,
|
|
disable_cache: bool,
|
|
verbose_cimport: bool,
|
|
additional_options: []const []const u8,
|
|
|
|
const Id = union(enum) {
|
|
@"test",
|
|
test_error: []const u8,
|
|
test_safety: []const u8,
|
|
exe: ExpectedOutcome,
|
|
obj: ?[]const u8,
|
|
lib,
|
|
};
|
|
};
|
|
|
|
fn walk(arena: Allocator, tokenizer: *Tokenizer, out_dir: std.fs.Dir, w: anytype) !void {
|
|
while (true) {
|
|
const token = tokenizer.next();
|
|
switch (token.id) {
|
|
.eof => break,
|
|
.content,
|
|
=> {
|
|
try w.writeAll(tokenizer.buffer[token.start..token.end]);
|
|
},
|
|
.bracket_open => {
|
|
const tag_token = try eatToken(tokenizer, .tag_content);
|
|
const tag_name = tokenizer.buffer[tag_token.start..tag_token.end];
|
|
|
|
if (mem.eql(u8, tag_name, "code_begin")) {
|
|
_ = try eatToken(tokenizer, .separator);
|
|
const code_kind_tok = try eatToken(tokenizer, .tag_content);
|
|
_ = try eatToken(tokenizer, .separator);
|
|
const name_tok = try eatToken(tokenizer, .tag_content);
|
|
const name = tokenizer.buffer[name_tok.start..name_tok.end];
|
|
var error_str: []const u8 = "";
|
|
const maybe_sep = tokenizer.next();
|
|
switch (maybe_sep.id) {
|
|
.separator => {
|
|
const error_tok = try eatToken(tokenizer, .tag_content);
|
|
error_str = tokenizer.buffer[error_tok.start..error_tok.end];
|
|
_ = try eatToken(tokenizer, .bracket_close);
|
|
},
|
|
.bracket_close => {},
|
|
else => return parseError(tokenizer, token, "invalid token", .{}),
|
|
}
|
|
const code_kind_str = tokenizer.buffer[code_kind_tok.start..code_kind_tok.end];
|
|
var code_kind_id: Code.Id = undefined;
|
|
var just_check_syntax = false;
|
|
if (mem.eql(u8, code_kind_str, "exe")) {
|
|
code_kind_id = Code.Id{ .exe = .succeed };
|
|
} else if (mem.eql(u8, code_kind_str, "exe_err")) {
|
|
code_kind_id = Code.Id{ .exe = .fail };
|
|
} else if (mem.eql(u8, code_kind_str, "exe_build_err")) {
|
|
code_kind_id = Code.Id{ .exe = .build_fail };
|
|
} else if (mem.eql(u8, code_kind_str, "test")) {
|
|
code_kind_id = .@"test";
|
|
} else if (mem.eql(u8, code_kind_str, "test_err")) {
|
|
code_kind_id = Code.Id{ .test_error = error_str };
|
|
} else if (mem.eql(u8, code_kind_str, "test_safety")) {
|
|
code_kind_id = Code.Id{ .test_safety = error_str };
|
|
} else if (mem.eql(u8, code_kind_str, "obj")) {
|
|
code_kind_id = Code.Id{ .obj = null };
|
|
} else if (mem.eql(u8, code_kind_str, "obj_err")) {
|
|
code_kind_id = Code.Id{ .obj = error_str };
|
|
} else if (mem.eql(u8, code_kind_str, "lib")) {
|
|
code_kind_id = Code.Id.lib;
|
|
} else if (mem.eql(u8, code_kind_str, "syntax")) {
|
|
code_kind_id = Code.Id{ .obj = null };
|
|
just_check_syntax = true;
|
|
} else {
|
|
return parseError(tokenizer, code_kind_tok, "unrecognized code kind: {s}", .{code_kind_str});
|
|
}
|
|
|
|
var mode: std.builtin.OptimizeMode = .Debug;
|
|
var link_objects = std.ArrayList([]const u8).init(arena);
|
|
var target_str: ?[]const u8 = null;
|
|
var link_libc = false;
|
|
var link_mode: ?std.builtin.LinkMode = null;
|
|
var disable_cache = false;
|
|
var verbose_cimport = false;
|
|
var additional_options = std.ArrayList([]const u8).init(arena);
|
|
|
|
const source_token = while (true) {
|
|
const content_tok = try eatToken(tokenizer, .content);
|
|
_ = try eatToken(tokenizer, .bracket_open);
|
|
const end_code_tag = try eatToken(tokenizer, .tag_content);
|
|
const end_tag_name = tokenizer.buffer[end_code_tag.start..end_code_tag.end];
|
|
if (mem.eql(u8, end_tag_name, "code_release_fast")) {
|
|
mode = .ReleaseFast;
|
|
} else if (mem.eql(u8, end_tag_name, "code_release_safe")) {
|
|
mode = .ReleaseSafe;
|
|
} else if (mem.eql(u8, end_tag_name, "code_disable_cache")) {
|
|
disable_cache = true;
|
|
} else if (mem.eql(u8, end_tag_name, "code_verbose_cimport")) {
|
|
verbose_cimport = true;
|
|
} else if (mem.eql(u8, end_tag_name, "code_link_object")) {
|
|
_ = try eatToken(tokenizer, .separator);
|
|
const obj_tok = try eatToken(tokenizer, .tag_content);
|
|
try link_objects.append(tokenizer.buffer[obj_tok.start..obj_tok.end]);
|
|
} else if (mem.eql(u8, end_tag_name, "target_windows")) {
|
|
target_str = "x86_64-windows";
|
|
} else if (mem.eql(u8, end_tag_name, "target_linux_x86_64")) {
|
|
target_str = "x86_64-linux";
|
|
} else if (mem.eql(u8, end_tag_name, "target_linux_riscv64")) {
|
|
target_str = "riscv64-linux";
|
|
} else if (mem.eql(u8, end_tag_name, "target_wasm")) {
|
|
target_str = "wasm32-freestanding";
|
|
} else if (mem.eql(u8, end_tag_name, "target_wasi")) {
|
|
target_str = "wasm32-wasi";
|
|
} else if (mem.eql(u8, end_tag_name, "link_libc")) {
|
|
link_libc = true;
|
|
} else if (mem.eql(u8, end_tag_name, "link_mode_dynamic")) {
|
|
link_mode = .dynamic;
|
|
} else if (mem.eql(u8, end_tag_name, "additonal_option")) {
|
|
_ = try eatToken(tokenizer, .separator);
|
|
const option = try eatToken(tokenizer, .tag_content);
|
|
try additional_options.append(tokenizer.buffer[option.start..option.end]);
|
|
} else if (mem.eql(u8, end_tag_name, "code_end")) {
|
|
_ = try eatToken(tokenizer, .bracket_close);
|
|
break content_tok;
|
|
} else {
|
|
return parseError(
|
|
tokenizer,
|
|
end_code_tag,
|
|
"invalid token inside code_begin: {s}",
|
|
.{end_tag_name},
|
|
);
|
|
}
|
|
_ = try eatToken(tokenizer, .bracket_close);
|
|
};
|
|
|
|
const basename = try std.fmt.allocPrint(arena, "{s}.zig", .{name});
|
|
|
|
var file = out_dir.createFile(basename, .{ .exclusive = true }) catch |err| {
|
|
fatal("unable to create file '{s}': {s}", .{ name, @errorName(err) });
|
|
};
|
|
defer file.close();
|
|
|
|
const source = tokenizer.buffer[source_token.start..source_token.end];
|
|
try file.writeAll(std.mem.trim(u8, source[1..], " \t\r\n"));
|
|
try file.writeAll("\n\n");
|
|
|
|
if (just_check_syntax) {
|
|
try file.writer().print("// syntax\n", .{});
|
|
} else switch (code_kind_id) {
|
|
.@"test" => try file.writer().print("// test\n", .{}),
|
|
.lib => try file.writer().print("// lib\n", .{}),
|
|
.test_error => |s| try file.writer().print("// test_error={s}\n", .{s}),
|
|
.test_safety => |s| try file.writer().print("// test_safety={s}\n", .{s}),
|
|
.exe => |s| try file.writer().print("// exe={s}\n", .{@tagName(s)}),
|
|
.obj => |opt| if (opt) |s| {
|
|
try file.writer().print("// obj={s}\n", .{s});
|
|
} else {
|
|
try file.writer().print("// obj\n", .{});
|
|
},
|
|
}
|
|
|
|
if (mode != .Debug)
|
|
try file.writer().print("// optimize={s}\n", .{@tagName(mode)});
|
|
|
|
for (link_objects.items) |link_object| {
|
|
try file.writer().print("// link_object={s}\n", .{link_object});
|
|
}
|
|
|
|
if (target_str) |s|
|
|
try file.writer().print("// target={s}\n", .{s});
|
|
|
|
if (link_libc) try file.writer().print("// link_libc\n", .{});
|
|
if (disable_cache) try file.writer().print("// disable_cache\n", .{});
|
|
if (verbose_cimport) try file.writer().print("// verbose_cimport\n", .{});
|
|
|
|
if (link_mode) |m|
|
|
try file.writer().print("// link_mode={s}\n", .{@tagName(m)});
|
|
|
|
for (additional_options.items) |o| {
|
|
try file.writer().print("// additional_option={s}\n", .{o});
|
|
}
|
|
try w.print("{{#code|{s}#}}\n", .{basename});
|
|
} else {
|
|
const close_bracket = while (true) {
|
|
const next = tokenizer.next();
|
|
if (next.id == .bracket_close) break next;
|
|
};
|
|
try w.writeAll(tokenizer.buffer[token.start..close_bracket.end]);
|
|
}
|
|
},
|
|
else => return parseError(tokenizer, token, "invalid token", .{}),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn urlize(allocator: Allocator, input: []const u8) ![]u8 {
|
|
var buf = std.ArrayList(u8).init(allocator);
|
|
defer buf.deinit();
|
|
|
|
const out = buf.writer();
|
|
for (input) |c| {
|
|
switch (c) {
|
|
'a'...'z', 'A'...'Z', '_', '-', '0'...'9' => {
|
|
try out.writeByte(c);
|
|
},
|
|
' ' => {
|
|
try out.writeByte('-');
|
|
},
|
|
else => {},
|
|
}
|
|
}
|
|
return try buf.toOwnedSlice();
|
|
}
|