From 3c43bc9208701af151a0346744e3bfc7eae43042 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 6 Jan 2016 01:28:58 -0700 Subject: [PATCH] support unknown size arrays --- doc/langref.md | 2 +- example/arrays/arrays.zig | 13 ++++- example/rand/main.zig | 16 ++++-- src/analyze.cpp | 118 ++++++++++++++++++++++++++++---------- src/analyze.hpp | 6 +- src/codegen.cpp | 43 ++------------ src/parser.cpp | 14 ++++- std/std.zig | 8 ++- 8 files changed, 137 insertions(+), 83 deletions(-) diff --git a/doc/langref.md b/doc/langref.md index 7a83fea8f8..8de57e3872 100644 --- a/doc/langref.md +++ b/doc/langref.md @@ -72,7 +72,7 @@ PointerType : token(Ampersand) option(token(Const)) Type MaybeType : token(Question) Type -ArrayType : token(LBracket) option(Expression) token(RBracket) Type +ArrayType : token(LBracket) option(Expression) token(RBracket) option(token(Const)) Type Block : token(LBrace) list(option(Statement), token(Semicolon)) token(RBrace) diff --git a/example/arrays/arrays.zig b/example/arrays/arrays.zig index 519b658d2f..957025efdd 100644 --- a/example/arrays/arrays.zig +++ b/example/arrays/arrays.zig @@ -19,9 +19,18 @@ pub fn main(argc: isize, argv: &&u8, env: &&u8) -> i32 { i += 1; } - if (accumulator == 15) { - print_str("OK\n"); + if (accumulator != 15) { + print_str("BAD\n"); } + if (get_array_len(array) != 5) { + print_str("BAD\n"); + } + + print_str("OK\n"); return 0; } + +fn get_array_len(a: []u32) -> usize { + a.len +} diff --git a/example/rand/main.zig b/example/rand/main.zig index 394a0571d7..c4bb90cb75 100644 --- a/example/rand/main.zig +++ b/example/rand/main.zig @@ -7,8 +7,10 @@ const ARRAY_SIZE : u16 = 624; /// Use `rand_init` to initialize this state. struct Rand { - array: [u32; ARRAY_SIZE], - index: #typeof(ARRAY_SIZE), + // TODO use ARRAY_SIZE here + array: [624]u32, + // TODO use ARRAY_SIZE here + index: #typeof(624), /// Get 32 bits of randomness. pub fn get_u32(r: &Rand) -> u32 { @@ -31,10 +33,11 @@ struct Rand { pub fn get_bytes(r: &Rand, buf: []u8) { var bytes_left = r.get_bytes_aligned(buf); if (bytes_left > 0) { - var rand_val_array : [u8; #sizeof(u32)]; + var rand_val_array : [#sizeof(u32)]u8; *(rand_val_array.ptr as &u32) = r.get_u32(); while (bytes_left > 0) { - buf[buf.len - bytes_left] = rand_val_array[#sizeof(u32) - bytes_left]; + // TODO array index operator so we can remove the .ptr + buf.ptr[buf.len - bytes_left] = rand_val_array[#sizeof(u32) - bytes_left]; bytes_left -= 1; } } @@ -46,7 +49,7 @@ struct Rand { const range = end - start; const leftover = #max_value(u64) % range; const upper_bound = #max_value(u64) - leftover; - var rand_val_array : [u8; #sizeof(u64)]; + var rand_val_array : [#sizeof(u64)]u8; while (true) { r.get_bytes_aligned(rand_val_array); @@ -79,7 +82,8 @@ struct Rand { fn get_bytes_aligned(r: &Rand, buf: []u8) -> usize { var bytes_left = buf.len; while (bytes_left > 4) { - *(&buf[buf.len - bytes_left] as &u32) = r.get_u32(); + // TODO: array access so we can remove .ptr + *(&buf.ptr[buf.len - bytes_left] as &u32) = r.get_u32(); bytes_left -= #sizeof(u32); } return bytes_left; diff --git a/src/analyze.cpp b/src/analyze.cpp index d4cea049a8..51f53fa800 100644 --- a/src/analyze.cpp +++ b/src/analyze.cpp @@ -219,7 +219,7 @@ static TypeTableEntry *get_array_type(CodeGen *g, ImportTableEntry *import, TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdArray); entry->type_ref = LLVMArrayType(child_type->type_ref, array_size); buf_resize(&entry->name, 0); - buf_appendf(&entry->name, "[%s; %" PRIu64 "]", buf_ptr(&child_type->name), array_size); + buf_appendf(&entry->name, "[%" PRIu64 "]%s", array_size, buf_ptr(&child_type->name)); entry->size_in_bits = child_type->size_in_bits * array_size; entry->align_in_bits = child_type->align_in_bits; @@ -235,6 +235,55 @@ static TypeTableEntry *get_array_type(CodeGen *g, ImportTableEntry *import, } } +static TypeTableEntry *get_unknown_size_array_type(CodeGen *g, ImportTableEntry *import, + TypeTableEntry *child_type, bool is_const) +{ + TypeTableEntry **parent_pointer = is_const ? + &child_type->unknown_size_array_const_parent : + &child_type->unknown_size_array_mut_parent; + if (*parent_pointer) { + return *parent_pointer; + } else { + TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdStruct); + + buf_resize(&entry->name, 0); + buf_appendf(&entry->name, "[]%s", buf_ptr(&child_type->name)); + entry->type_ref = LLVMStructCreateNamed(LLVMGetGlobalContext(), buf_ptr(&entry->name)); + + TypeTableEntry *pointer_type = get_pointer_to_type(g, child_type, is_const); + + unsigned element_count = 2; + LLVMTypeRef element_types[] = { + pointer_type->type_ref, + g->builtin_types.entry_usize->type_ref, + }; + LLVMStructSetBody(entry->type_ref, element_types, element_count, false); + + entry->size_in_bits = g->pointer_size_bytes * 2 * 8; + entry->align_in_bits = g->pointer_size_bytes * 8; + entry->data.structure.is_packed = false; + entry->data.structure.is_unknown_size_array = true; + entry->data.structure.field_count = element_count; + entry->data.structure.fields = allocate(element_count); + entry->data.structure.fields[0].name = buf_create_from_str("ptr"); + entry->data.structure.fields[0].type_entry = pointer_type; + entry->data.structure.fields[1].name = buf_create_from_str("len"); + entry->data.structure.fields[1].type_entry = g->builtin_types.entry_usize; + + LLVMZigDIType *di_element_types[] = { + pointer_type->di_type, + g->builtin_types.entry_usize->di_type, + }; + LLVMZigDIScope *compile_unit_scope = LLVMZigCompileUnitToScope(g->compile_unit); + entry->di_type = LLVMZigCreateDebugStructType(g->dbuilder, compile_unit_scope, + buf_ptr(&entry->name), g->dummy_di_file, 0, entry->size_in_bits, entry->align_in_bits, 0, + nullptr, di_element_types, element_count, 0, nullptr, ""); + + *parent_pointer = entry; + return entry; + } +} + static TypeTableEntry *eval_const_expr(CodeGen *g, BlockContext *context, AstNode *node, AstNodeNumberLiteral *out_number_literal) { @@ -313,38 +362,47 @@ static TypeTableEntry *resolve_type(CodeGen *g, AstNode *node, ImportTableEntry } case AstNodeTypeTypeArray: { - resolve_type(g, node->data.type.child_type, import, context); - TypeTableEntry *child_type = node->data.type.child_type->codegen_node->data.type_node.entry; + TypeTableEntry *child_type = resolve_type(g, node->data.type.child_type, import, context); if (child_type->id == TypeTableEntryIdUnreachable) { add_node_error(g, node, buf_create_from_str("array of unreachable not allowed")); - } - - AstNode *size_node = node->data.type.array_size; - TypeTableEntry *size_type = analyze_expression(g, import, context, - g->builtin_types.entry_usize, size_node); - if (size_type->id == TypeTableEntryIdInvalid) { type_node->entry = g->builtin_types.entry_invalid; return type_node->entry; } - AstNodeNumberLiteral number_literal; - TypeTableEntry *resolved_type = eval_const_expr(g, context, size_node, &number_literal); + AstNode *size_node = node->data.type.array_size; - if (resolved_type->id == TypeTableEntryIdInt) { - if (resolved_type->data.integral.is_signed) { - add_node_error(g, size_node, - buf_create_from_str("array size must be unsigned integer")); + if (size_node) { + TypeTableEntry *size_type = analyze_expression(g, import, context, + g->builtin_types.entry_usize, size_node); + if (size_type->id == TypeTableEntryIdInvalid) { type_node->entry = g->builtin_types.entry_invalid; - } else { - type_node->entry = get_array_type(g, import, child_type, number_literal.data.x_uint); + return type_node->entry; } + + AstNodeNumberLiteral number_literal; + TypeTableEntry *resolved_type = eval_const_expr(g, context, size_node, &number_literal); + + if (resolved_type->id == TypeTableEntryIdInt) { + if (resolved_type->data.integral.is_signed) { + add_node_error(g, size_node, + buf_create_from_str("array size must be unsigned integer")); + type_node->entry = g->builtin_types.entry_invalid; + } else { + type_node->entry = get_array_type(g, import, child_type, number_literal.data.x_uint); + } + } else { + add_node_error(g, size_node, + buf_create_from_str("unable to resolve constant expression")); + type_node->entry = g->builtin_types.entry_invalid; + } + return type_node->entry; } else { - add_node_error(g, size_node, - buf_create_from_str("unable to resolve constant expression")); - type_node->entry = g->builtin_types.entry_invalid; + type_node->entry = get_unknown_size_array_type(g, import, child_type, + node->data.type.is_const); + return type_node->entry; } - return type_node->entry; + } case AstNodeTypeTypeMaybe: { @@ -1016,13 +1074,14 @@ static TypeTableEntry *resolve_type_compatibility(CodeGen *g, BlockContext *cont return expected_type; } - // implicit constant sized array to string conversion - if (expected_type == g->builtin_types.entry_string && + // implicit constant sized array to unknown size array conversion + if (expected_type->id == TypeTableEntryIdStruct && + expected_type->data.structure.is_unknown_size_array && actual_type->id == TypeTableEntryIdArray && - actual_type->data.array.child_type == g->builtin_types.entry_u8) + actual_type->data.array.child_type == expected_type->data.structure.fields[0].type_entry->data.pointer.child_type) { node->codegen_node->expr_node.implicit_cast.after_type = expected_type; - node->codegen_node->expr_node.implicit_cast.op = CastOpArrayToString; + node->codegen_node->expr_node.implicit_cast.op = CastOpToUnknownSizeArray; node->codegen_node->expr_node.implicit_cast.source_node = node; context->cast_expr_alloca_list.append(&node->codegen_node->expr_node.implicit_cast); return expected_type; @@ -1292,11 +1351,12 @@ static TypeTableEntry *analyze_cast_expr(CodeGen *g, ImportTableEntry *import, B { cast_node->op = CastOpIntWidenOrShorten; return wanted_type; - } else if (wanted_type == g->builtin_types.entry_string && - actual_type->id == TypeTableEntryIdArray && - actual_type->data.array.child_type == g->builtin_types.entry_u8) + } else if (wanted_type->id == TypeTableEntryIdStruct && + wanted_type->data.structure.is_unknown_size_array && + actual_type->id == TypeTableEntryIdArray && + actual_type->data.array.child_type == wanted_type->data.structure.fields[0].type_entry) { - cast_node->op = CastOpArrayToString; + cast_node->op = CastOpToUnknownSizeArray; context->cast_expr_alloca_list.append(cast_node); return wanted_type; } else if (actual_type->id == TypeTableEntryIdNumberLiteral && diff --git a/src/analyze.hpp b/src/analyze.hpp index dcb85c94a1..8e3f569302 100644 --- a/src/analyze.hpp +++ b/src/analyze.hpp @@ -46,6 +46,7 @@ struct TypeTableEntryStruct { TypeStructField *fields; uint64_t size_bytes; bool is_invalid; // true if any fields are invalid + bool is_unknown_size_array; // reminder: hash tables must be initialized before use HashMap fn_table; @@ -100,6 +101,8 @@ struct TypeTableEntry { TypeTableEntry *pointer_mut_parent; HashMap arrays_by_size; TypeTableEntry *maybe_parent; + TypeTableEntry *unknown_size_array_const_parent; + TypeTableEntry *unknown_size_array_mut_parent; }; @@ -175,7 +178,6 @@ struct CodeGen { TypeTableEntry *entry_f32; TypeTableEntry *entry_f64; TypeTableEntry *entry_c_string_literal; - TypeTableEntry *entry_string; TypeTableEntry *entry_void; TypeTableEntry *entry_unreachable; TypeTableEntry *entry_invalid; @@ -283,7 +285,7 @@ enum CastOp { CastOpNothing, CastOpPtrToInt, CastOpIntWidenOrShorten, - CastOpArrayToString, + CastOpToUnknownSizeArray, CastOpMaybeWrap, CastOpPointerReinterpret, }; diff --git a/src/codegen.cpp b/src/codegen.cpp index 9c2cf3c88f..403261acc4 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -462,14 +462,17 @@ static LLVMValueRef gen_bare_cast(CodeGen *g, AstNode *node, LLVMValueRef expr_v add_debug_source_node(g, node); return LLVMBuildTrunc(g->builder, expr_val, wanted_type->type_ref, ""); } - case CastOpArrayToString: + case CastOpToUnknownSizeArray: { assert(cast_node->ptr); + TypeTableEntry *pointer_type = wanted_type->data.structure.fields[0].type_entry; + add_debug_source_node(g, node); LLVMValueRef ptr_ptr = LLVMBuildStructGEP(g->builder, cast_node->ptr, 0, ""); - LLVMBuildStore(g->builder, expr_val, ptr_ptr); + LLVMValueRef expr_bitcast = LLVMBuildBitCast(g->builder, expr_val, pointer_type->type_ref, ""); + LLVMBuildStore(g->builder, expr_bitcast, ptr_ptr); LLVMValueRef len_ptr = LLVMBuildStructGEP(g->builder, cast_node->ptr, 1, ""); LLVMValueRef len_val = LLVMConstInt(g->builtin_types.entry_usize->type_ref, @@ -1925,41 +1928,6 @@ static void define_builtin_types(CodeGen *g) { entry->di_type = g->builtin_types.entry_void->di_type; g->builtin_types.entry_unreachable = entry; } - { - TypeTableEntry *entry = new_type_table_entry(TypeTableEntryIdStruct); - - TypeTableEntry *const_pointer_to_u8 = get_pointer_to_type(g, g->builtin_types.entry_u8, true); - - unsigned element_count = 2; - LLVMTypeRef element_types[] = { - const_pointer_to_u8->type_ref, - g->builtin_types.entry_usize->type_ref - }; - entry->type_ref = LLVMStructCreateNamed(LLVMGetGlobalContext(), "string"); - LLVMStructSetBody(entry->type_ref, element_types, element_count, false); - - buf_init_from_str(&entry->name, "string"); - entry->size_in_bits = g->pointer_size_bytes * 2 * 8; - entry->align_in_bits = g->pointer_size_bytes; - entry->data.structure.is_packed = false; - entry->data.structure.field_count = element_count; - entry->data.structure.fields = allocate(element_count); - entry->data.structure.fields[0].name = buf_create_from_str("ptr"); - entry->data.structure.fields[0].type_entry = const_pointer_to_u8; - entry->data.structure.fields[1].name = buf_create_from_str("len"); - entry->data.structure.fields[1].type_entry = g->builtin_types.entry_usize; - - LLVMZigDIType *di_element_types[] = { - const_pointer_to_u8->di_type, - g->builtin_types.entry_usize->di_type - }; - LLVMZigDIScope *compile_unit_scope = LLVMZigCompileUnitToScope(g->compile_unit); - entry->di_type = LLVMZigCreateDebugStructType(g->dbuilder, compile_unit_scope, - "string", g->dummy_di_file, 0, entry->size_in_bits, entry->align_in_bits, 0, - nullptr, di_element_types, element_count, 0, nullptr, ""); - - g->builtin_types.entry_string = entry; - } } @@ -2103,7 +2071,6 @@ static ImportTableEntry *codegen_add_code(CodeGen *g, Buf *abs_full_path, import_entry->type_table.put(&g->builtin_types.entry_f64->name, g->builtin_types.entry_f64); import_entry->type_table.put(&g->builtin_types.entry_void->name, g->builtin_types.entry_void); import_entry->type_table.put(&g->builtin_types.entry_unreachable->name, g->builtin_types.entry_unreachable); - import_entry->type_table.put(&g->builtin_types.entry_string->name, g->builtin_types.entry_string); import_entry->root = ast_parse(source_code, tokenization.tokens, import_entry, g->err_color); assert(import_entry->root); diff --git a/src/parser.cpp b/src/parser.cpp index 36038a3bc2..24daca8864 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -219,7 +219,7 @@ void ast_print(AstNode *node, int indent) { } case AstNodeTypeTypePointer: { - const char *const_or_mut_str = node->data.type.is_const ? "const" : "mut"; + const char *const_or_mut_str = node->data.type.is_const ? "const" : "var"; fprintf(stderr, "'%s' PointerType\n", const_or_mut_str); ast_print(node->data.type.child_type, indent + 2); @@ -227,9 +227,11 @@ void ast_print(AstNode *node, int indent) { } case AstNodeTypeTypeArray: { - fprintf(stderr, "ArrayType\n"); + const char *const_or_mut_str = node->data.type.is_const ? "const" : "var"; + fprintf(stderr, "'%s' ArrayType\n", const_or_mut_str); + if (node->data.type.array_size) + ast_print(node->data.type.array_size, indent + 2); ast_print(node->data.type.child_type, indent + 2); - ast_print(node->data.type.array_size, indent + 2); break; } case AstNodeTypeTypeMaybe: @@ -1107,6 +1109,12 @@ static AstNode *ast_parse_type(ParseContext *pc, int *token_index) { ast_eat_token(pc, token_index, TokenIdRBracket); + Token *const_tok = &pc->tokens->at(*token_index); + if (const_tok->id == TokenIdKeywordConst) { + *token_index += 1; + node->data.type.is_const = true; + } + node->data.type.child_type = ast_parse_type(pc, token_index); } else { ast_invalid_token_error(pc, token); diff --git a/std/std.zig b/std/std.zig index 4b47100b59..ad6acea51d 100644 --- a/std/std.zig +++ b/std/std.zig @@ -39,13 +39,13 @@ pub fn os_get_random_bytes(buf: &u8, count: usize) -> isize { // TODO error handling // TODO handle buffering and flushing (mutex protected) -pub fn print_str(str: string) -> isize { +pub fn print_str(str: []const u8) -> isize { fprint_str(stdout_fileno, str) } // TODO error handling // TODO handle buffering and flushing (mutex protected) -pub fn fprint_str(fd: isize, str: string) -> isize { +pub fn fprint_str(fd: isize, str: []const u8) -> isize { write(fd, str.ptr, str.len) } @@ -73,6 +73,9 @@ fn digit_to_char(digit: u64) -> u8 { const max_u64_base10_digits: usize = 20; +// TODO use an array for out_buf instead of pointer. this should give bounds checking in +// debug mode and length can get optimized out in release mode. requires array slicing syntax +// for the buf_print_u64 call. fn buf_print_i64(out_buf: &u8, x: i64) -> usize { if (x < 0) { out_buf[0] = '-'; @@ -82,6 +85,7 @@ fn buf_print_i64(out_buf: &u8, x: i64) -> usize { } } +// TODO use an array for out_buf instead of pointer. fn buf_print_u64(out_buf: &u8, x: u64) -> usize { var buf: [max_u64_base10_digits]u8; var a = x;