From b54514d9dd15225ef2578b33c4c384db4680b90b Mon Sep 17 00:00:00 2001 From: Evan Haas Date: Thu, 18 Mar 2021 05:41:04 -0700 Subject: [PATCH] translate-c: Use [N:0] arrays when initializer is a string literal (#8264) * translate-c: Use [N:0] arrays when initializer is a string literal Translate incomplete arrays as [N:0] when initialized by a string literal. This preserves a bit more of the type information from the original C program. Fixes #8215 --- src/translate_c.zig | 126 ++++++++++++++++++++++++---------------- src/translate_c/ast.zig | 86 ++++++++++++++++++++++++++- test/translate_c.zig | 81 ++++++++++++++++---------- 3 files changed, 209 insertions(+), 84 deletions(-) diff --git a/src/translate_c.zig b/src/translate_c.zig index e63ced58e9..16881eef5d 100644 --- a/src/translate_c.zig +++ b/src/translate_c.zig @@ -636,7 +636,7 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co if (has_init) trans_init: { if (decl_init) |expr| { const node_or_error = if (expr.getStmtClass() == .StringLiteralClass) - transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), zigArraySize(c, type_node) catch 0) + transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node) else transExprCoercing(c, scope, expr, .used); init_node = node_or_error catch |err| switch (err) { @@ -1412,7 +1412,7 @@ fn transDeclStmtOne( var init_node = if (decl_init) |expr| if (expr.getStmtClass() == .StringLiteralClass) - try transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), try zigArraySize(c, type_node)) + try transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node) else try transExprCoercing(c, scope, expr, .used) else @@ -1758,6 +1758,20 @@ fn transReturnStmt( return Tag.@"return".create(c.arena, rhs); } +fn transNarrowStringLiteral( + c: *Context, + scope: *Scope, + stmt: *const clang.StringLiteral, + result_used: ResultUsed, +) TransError!Node { + var len: usize = undefined; + const bytes_ptr = stmt.getString_bytes_begin_size(&len); + + const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])}); + const node = try Tag.string_literal.create(c.arena, str); + return maybeSuppressResult(c, scope, result_used, node); +} + fn transStringLiteral( c: *Context, scope: *Scope, @@ -1766,19 +1780,14 @@ fn transStringLiteral( ) TransError!Node { const kind = stmt.getKind(); switch (kind) { - .Ascii, .UTF8 => { - var len: usize = undefined; - const bytes_ptr = stmt.getString_bytes_begin_size(&len); - - const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])}); - const node = try Tag.string_literal.create(c.arena, str); - return maybeSuppressResult(c, scope, result_used, node); - }, + .Ascii, .UTF8 => return transNarrowStringLiteral(c, scope, stmt, result_used), .UTF16, .UTF32, .Wide => { const str_type = @tagName(stmt.getKind()); const name = try std.fmt.allocPrint(c.arena, "zig.{s}_string_{d}", .{ str_type, c.getMangle() }); - const lit_array = try transStringLiteralAsArray(c, scope, stmt, stmt.getLength() + 1); + const expr_base = @ptrCast(*const clang.Expr, stmt); + const array_type = try transQualTypeInitialized(c, scope, expr_base.getType(), expr_base, expr_base.getBeginLoc()); + const lit_array = try transStringLiteralInitializer(c, scope, stmt, array_type); const decl = try Tag.var_simple.create(c.arena, .{ .name = name, .init = lit_array }); try scope.appendNode(decl); const node = try Tag.identifier.create(c.arena, name); @@ -1787,52 +1796,67 @@ fn transStringLiteral( } } -/// Parse the size of an array back out from an ast Node. -fn zigArraySize(c: *Context, node: Node) TransError!usize { - if (node.castTag(.array_type)) |array| { - return array.data.len; - } - return error.UnsupportedTranslation; +fn getArrayPayload(array_type: Node) ast.Payload.Array.ArrayTypeInfo { + return (array_type.castTag(.array_type) orelse array_type.castTag(.null_sentinel_array_type).?).data; } -/// Translate a string literal to an array of integers. Used when an -/// array is initialized from a string literal. `array_size` is the -/// size of the array being initialized. If the string literal is larger -/// than the array, truncate the string. If the array is larger than the -/// string literal, pad the array with 0's -fn transStringLiteralAsArray( +/// Translate a string literal that is initializing an array. In general narrow string +/// literals become `"".*` or `""[0..].*` if they need truncation. +/// Wide string literals become an array of integers. zero-fillers pad out the array to +/// the appropriate length, if necessary. +fn transStringLiteralInitializer( c: *Context, scope: *Scope, stmt: *const clang.StringLiteral, - array_size: usize, + array_type: Node, ) TransError!Node { - if (array_size == 0) return error.UnsupportedType; + assert(array_type.tag() == .array_type or array_type.tag() == .null_sentinel_array_type); + + const is_narrow = stmt.getKind() == .Ascii or stmt.getKind() == .UTF8; const str_length = stmt.getLength(); + const payload = getArrayPayload(array_type); + const array_size = payload.len; + const elem_type = payload.elem_type; - const expr_base = @ptrCast(*const clang.Expr, stmt); - const ty = expr_base.getType().getTypePtr(); - const const_arr_ty = @ptrCast(*const clang.ConstantArrayType, ty); + if (array_size == 0) return Tag.empty_array.create(c.arena, elem_type); - const elem_type = try transQualType(c, scope, const_arr_ty.getElementType(), expr_base.getBeginLoc()); - const arr_type = try Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_type }); - const init_list = try c.arena.alloc(Node, array_size); + const num_inits = math.min(str_length, array_size); + const init_node = if (num_inits > 0) blk: { + if (is_narrow) { + // "string literal".* or string literal"[0..num_inits].* + var str = try transNarrowStringLiteral(c, scope, stmt, .used); + if (str_length != array_size) str = try Tag.string_slice.create(c.arena, .{ .string = str, .end = num_inits }); + break :blk try Tag.deref.create(c.arena, str); + } else { + const init_list = try c.arena.alloc(Node, num_inits); + var i: c_uint = 0; + while (i < num_inits) : (i += 1) { + init_list[i] = try transCreateCharLitNode(c, false, stmt.getCodeUnit(i)); + } + const init_args = .{ .len = num_inits, .elem_type = elem_type }; + const init_array_type = try if (array_type.tag() == .array_type) Tag.array_type.create(c.arena, init_args) else Tag.null_sentinel_array_type.create(c.arena, init_args); + break :blk try Tag.array_init.create(c.arena, .{ + .cond = init_array_type, + .cases = init_list, + }); + } + } else null; - var i: c_uint = 0; - const kind = stmt.getKind(); - const narrow = kind == .Ascii or kind == .UTF8; - while (i < str_length and i < array_size) : (i += 1) { - const code_unit = stmt.getCodeUnit(i); - init_list[i] = try transCreateCharLitNode(c, narrow, code_unit); - } - while (i < array_size) : (i += 1) { - init_list[i] = try transCreateNodeNumber(c, 0, .int); - } + if (num_inits == array_size) return init_node.?; // init_node is only null if num_inits == 0; but if num_inits == array_size == 0 we've already returned + assert(array_size > str_length); // If array_size <= str_length, `num_inits == array_size` and we've already returned. - return Tag.array_init.create(c.arena, .{ - .cond = arr_type, - .cases = init_list, + const filler_node = try Tag.array_filler.create(c.arena, .{ + .type = elem_type, + .filler = Tag.zero_literal.init(), + .count = array_size - str_length, }); + + if (init_node) |some| { + return Tag.array_cat.create(c.arena, .{ .lhs = some, .rhs = filler_node }); + } else { + return filler_node; + } } /// determine whether `stmt` is a "pointer subtraction expression" - a subtraction where @@ -3342,9 +3366,8 @@ fn addTopLevelDecl(c: *Context, name: []const u8, decl_node: Node) !void { try c.global_scope.nodes.append(decl_node); } -/// Translate a qual type for a variable with an initializer. The initializer -/// only matters for incomplete arrays, since the size of the array is determined -/// by the size of the initializer +/// Translate a qualtype for a variable with an initializer. This only matters +/// for incomplete arrays, since the initializer determines the size of the array. fn transQualTypeInitialized( c: *Context, scope: *Scope, @@ -3360,9 +3383,14 @@ fn transQualTypeInitialized( switch (decl_init.getStmtClass()) { .StringLiteralClass => { const string_lit = @ptrCast(*const clang.StringLiteral, decl_init); - const string_lit_size = string_lit.getLength() + 1; // +1 for null terminator + const string_lit_size = string_lit.getLength(); const array_size = @intCast(usize, string_lit_size); - return Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty }); + + // incomplete array initialized with empty string, will be translated as [1]T{0} + // see https://github.com/ziglang/zig/issues/8256 + if (array_size == 0) return Tag.array_type.create(c.arena, .{ .len = 1, .elem_type = elem_ty }); + + return Tag.null_sentinel_array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty }); }, .InitListExprClass => { const init_expr = @ptrCast(*const clang.InitListExpr, decl_init); diff --git a/src/translate_c/ast.zig b/src/translate_c/ast.zig index e5f76cc1de..4b595a7940 100644 --- a/src/translate_c/ast.zig +++ b/src/translate_c/ast.zig @@ -40,6 +40,8 @@ pub const Node = extern union { string_literal, char_literal, enum_literal, + /// "string"[0..end] + string_slice, identifier, @"if", /// if (!operand) break; @@ -176,6 +178,7 @@ pub const Node = extern union { c_pointer, single_pointer, array_type, + null_sentinel_array_type, /// @import("std").meta.sizeof(operand) std_meta_sizeof, @@ -334,7 +337,7 @@ pub const Node = extern union { .std_meta_promoteIntLiteral => Payload.PromoteIntLiteral, .block => Payload.Block, .c_pointer, .single_pointer => Payload.Pointer, - .array_type => Payload.Array, + .array_type, .null_sentinel_array_type => Payload.Array, .arg_redecl, .alias, .fail_decl => Payload.ArgRedecl, .log2_int_type => Payload.Log2IntType, .var_simple, .pub_var_simple => Payload.SimpleVarDecl, @@ -342,6 +345,7 @@ pub const Node = extern union { .array_filler => Payload.ArrayFiller, .pub_inline_fn => Payload.PubInlineFn, .field_access => Payload.FieldAccess, + .string_slice => Payload.StringSlice, }; } @@ -584,10 +588,12 @@ pub const Payload = struct { pub const Array = struct { base: Payload, - data: struct { + data: ArrayTypeInfo, + + pub const ArrayTypeInfo = struct { elem_type: Node, len: usize, - }, + }; }; pub const Pointer = struct { @@ -664,6 +670,14 @@ pub const Payload = struct { radix: Node, }, }; + + pub const StringSlice = struct { + base: Payload, + data: struct { + string: Node, + end: usize, + }, + }; }; /// Converts the nodes into a Zig ast. @@ -1015,6 +1029,36 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex { .data = undefined, }); }, + .string_slice => { + const payload = node.castTag(.string_slice).?.data; + + const string = try renderNode(c, payload.string); + const l_bracket = try c.addToken(.l_bracket, "["); + const start = try c.addNode(.{ + .tag = .integer_literal, + .main_token = try c.addToken(.integer_literal, "0"), + .data = undefined, + }); + _ = try c.addToken(.ellipsis2, ".."); + const end = try c.addNode(.{ + .tag = .integer_literal, + .main_token = try c.addTokenFmt(.integer_literal, "{d}", .{payload.end}), + .data = undefined, + }); + _ = try c.addToken(.r_bracket, "]"); + + return c.addNode(.{ + .tag = .slice, + .main_token = l_bracket, + .data = .{ + .lhs = string, + .rhs = try c.addExtra(std.zig.ast.Node.Slice{ + .start = start, + .end = end, + }), + }, + }); + }, .fail_decl => { const payload = node.castTag(.fail_decl).?.data; // pub const name = @compileError(msg); @@ -1581,6 +1625,10 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex { const payload = node.castTag(.array_type).?.data; return renderArrayType(c, payload.len, payload.elem_type); }, + .null_sentinel_array_type => { + const payload = node.castTag(.null_sentinel_array_type).?.data; + return renderNullSentinelArrayType(c, payload.len, payload.elem_type); + }, .array_filler => { const payload = node.castTag(.array_filler).?.data; @@ -1946,6 +1994,36 @@ fn renderArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex { }); } +fn renderNullSentinelArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex { + const l_bracket = try c.addToken(.l_bracket, "["); + const len_expr = try c.addNode(.{ + .tag = .integer_literal, + .main_token = try c.addTokenFmt(.integer_literal, "{d}", .{len}), + .data = undefined, + }); + _ = try c.addToken(.colon, ":"); + + const sentinel_expr = try c.addNode(.{ + .tag = .integer_literal, + .main_token = try c.addToken(.integer_literal, "0"), + .data = undefined, + }); + + _ = try c.addToken(.r_bracket, "]"); + const elem_type_expr = try renderNode(c, elem_type); + return c.addNode(.{ + .tag = .array_type_sentinel, + .main_token = l_bracket, + .data = .{ + .lhs = len_expr, + .rhs = try c.addExtra(std.zig.ast.Node.ArrayTypeSentinel { + .sentinel = sentinel_expr, + .elem_type = elem_type_expr, + }), + }, + }); +} + fn addSemicolonIfNeeded(c: *Context, node: Node) !void { switch (node.tag()) { .warning => unreachable, @@ -2014,6 +2092,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex { .integer_literal, .float_literal, .string_literal, + .string_slice, .char_literal, .enum_literal, .identifier, @@ -2035,6 +2114,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex { .func, .call, .array_type, + .null_sentinel_array_type, .bool_to_int, .div_exact, .byte_offset_of, diff --git a/test/translate_c.zig b/test/translate_c.zig index 6aab4736a3..1e89de78a1 100644 --- a/test/translate_c.zig +++ b/test/translate_c.zig @@ -745,14 +745,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\ static const char v2[] = "2.2.2"; \\} , &[_][]const u8{ - \\const v2: [6]u8 = [6]u8{ - \\ '2', - \\ '.', - \\ '2', - \\ '.', - \\ '2', - \\ 0, - \\}; + \\const v2: [5:0]u8 = "2.2.2".*; \\pub export fn foo() void {} }); @@ -1600,30 +1593,9 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\static char arr1[] = "hello"; \\char arr2[] = "hello"; , &[_][]const u8{ - \\pub export var arr0: [6]u8 = [6]u8{ - \\ 'h', - \\ 'e', - \\ 'l', - \\ 'l', - \\ 'o', - \\ 0, - \\}; - \\pub var arr1: [6]u8 = [6]u8{ - \\ 'h', - \\ 'e', - \\ 'l', - \\ 'l', - \\ 'o', - \\ 0, - \\}; - \\pub export var arr2: [6]u8 = [6]u8{ - \\ 'h', - \\ 'e', - \\ 'l', - \\ 'l', - \\ 'o', - \\ 0, - \\}; + \\pub export var arr0: [5:0]u8 = "hello".*; + \\pub var arr1: [5:0]u8 = "hello".*; + \\pub export var arr2: [5:0]u8 = "hello".*; }); cases.add("array initializer expr", @@ -3425,4 +3397,49 @@ pub fn addCases(cases: *tests.TranslateCContext) void { , &[_][]const u8{ \\pub const FOO = @compileError("TODO implement function '__builtin_alloca_with_align' in std.c.builtins"); }); + + cases.add("null sentinel arrays when initialized from string literal. Issue #8256", + \\#include + \\char zero[0] = "abc"; + \\uint32_t zero_w[0] = U"💯💯💯"; + \\char empty_incomplete[] = ""; + \\uint32_t empty_incomplete_w[] = U""; + \\char empty_constant[100] = ""; + \\uint32_t empty_constant_w[100] = U""; + \\char incomplete[] = "abc"; + \\uint32_t incomplete_w[] = U"💯💯💯"; + \\char truncated[1] = "abc"; + \\uint32_t truncated_w[1] = U"💯💯💯"; + \\char extend[5] = "a"; + \\uint32_t extend_w[5] = U"💯"; + \\char no_null[3] = "abc"; + \\uint32_t no_null_w[3] = U"💯💯💯"; + , &[_][]const u8{ + \\pub export var zero: [0]u8 = [0]u8{}; + \\pub export var zero_w: [0]u32 = [0]u32{}; + \\pub export var empty_incomplete: [1]u8 = [1]u8{0} ** 1; + \\pub export var empty_incomplete_w: [1]u32 = [1]u32{0} ** 1; + \\pub export var empty_constant: [100]u8 = [1]u8{0} ** 100; + \\pub export var empty_constant_w: [100]u32 = [1]u32{0} ** 100; + \\pub export var incomplete: [3:0]u8 = "abc".*; + \\pub export var incomplete_w: [3:0]u32 = [3:0]u32{ + \\ '\u{1f4af}', + \\ '\u{1f4af}', + \\ '\u{1f4af}', + \\}; + \\pub export var truncated: [1]u8 = "abc"[0..1].*; + \\pub export var truncated_w: [1]u32 = [1]u32{ + \\ '\u{1f4af}', + \\}; + \\pub export var extend: [5]u8 = "a"[0..1].* ++ [1]u8{0} ** 4; + \\pub export var extend_w: [5]u32 = [1]u32{ + \\ '\u{1f4af}', + \\} ++ [1]u32{0} ** 4; + \\pub export var no_null: [3]u8 = "abc".*; + \\pub export var no_null_w: [3]u32 = [3]u32{ + \\ '\u{1f4af}', + \\ '\u{1f4af}', + \\ '\u{1f4af}', + \\}; + }); }