translate-c: Use [N:0] arrays when initializer is a string literal (#8264)

* translate-c: Use [N:0] arrays when initializer is a string literal

Translate incomplete arrays as [N:0] when initialized by a string literal.
This preserves a bit more of the type information from the original C program.

Fixes #8215
This commit is contained in:
Evan Haas 2021-03-18 05:41:04 -07:00 committed by GitHub
parent 75a7abb0c4
commit b54514d9dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 209 additions and 84 deletions

View File

@ -636,7 +636,7 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co
if (has_init) trans_init: {
if (decl_init) |expr| {
const node_or_error = if (expr.getStmtClass() == .StringLiteralClass)
transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), zigArraySize(c, type_node) catch 0)
transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
else
transExprCoercing(c, scope, expr, .used);
init_node = node_or_error catch |err| switch (err) {
@ -1412,7 +1412,7 @@ fn transDeclStmtOne(
var init_node = if (decl_init) |expr|
if (expr.getStmtClass() == .StringLiteralClass)
try transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), try zigArraySize(c, type_node))
try transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
else
try transExprCoercing(c, scope, expr, .used)
else
@ -1758,6 +1758,20 @@ fn transReturnStmt(
return Tag.@"return".create(c.arena, rhs);
}
fn transNarrowStringLiteral(
c: *Context,
scope: *Scope,
stmt: *const clang.StringLiteral,
result_used: ResultUsed,
) TransError!Node {
var len: usize = undefined;
const bytes_ptr = stmt.getString_bytes_begin_size(&len);
const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
const node = try Tag.string_literal.create(c.arena, str);
return maybeSuppressResult(c, scope, result_used, node);
}
fn transStringLiteral(
c: *Context,
scope: *Scope,
@ -1766,19 +1780,14 @@ fn transStringLiteral(
) TransError!Node {
const kind = stmt.getKind();
switch (kind) {
.Ascii, .UTF8 => {
var len: usize = undefined;
const bytes_ptr = stmt.getString_bytes_begin_size(&len);
const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
const node = try Tag.string_literal.create(c.arena, str);
return maybeSuppressResult(c, scope, result_used, node);
},
.Ascii, .UTF8 => return transNarrowStringLiteral(c, scope, stmt, result_used),
.UTF16, .UTF32, .Wide => {
const str_type = @tagName(stmt.getKind());
const name = try std.fmt.allocPrint(c.arena, "zig.{s}_string_{d}", .{ str_type, c.getMangle() });
const lit_array = try transStringLiteralAsArray(c, scope, stmt, stmt.getLength() + 1);
const expr_base = @ptrCast(*const clang.Expr, stmt);
const array_type = try transQualTypeInitialized(c, scope, expr_base.getType(), expr_base, expr_base.getBeginLoc());
const lit_array = try transStringLiteralInitializer(c, scope, stmt, array_type);
const decl = try Tag.var_simple.create(c.arena, .{ .name = name, .init = lit_array });
try scope.appendNode(decl);
const node = try Tag.identifier.create(c.arena, name);
@ -1787,52 +1796,67 @@ fn transStringLiteral(
}
}
/// Parse the size of an array back out from an ast Node.
fn zigArraySize(c: *Context, node: Node) TransError!usize {
if (node.castTag(.array_type)) |array| {
return array.data.len;
}
return error.UnsupportedTranslation;
fn getArrayPayload(array_type: Node) ast.Payload.Array.ArrayTypeInfo {
return (array_type.castTag(.array_type) orelse array_type.castTag(.null_sentinel_array_type).?).data;
}
/// Translate a string literal to an array of integers. Used when an
/// array is initialized from a string literal. `array_size` is the
/// size of the array being initialized. If the string literal is larger
/// than the array, truncate the string. If the array is larger than the
/// string literal, pad the array with 0's
fn transStringLiteralAsArray(
/// Translate a string literal that is initializing an array. In general narrow string
/// literals become `"<string>".*` or `"<string>"[0..<size>].*` if they need truncation.
/// Wide string literals become an array of integers. zero-fillers pad out the array to
/// the appropriate length, if necessary.
fn transStringLiteralInitializer(
c: *Context,
scope: *Scope,
stmt: *const clang.StringLiteral,
array_size: usize,
array_type: Node,
) TransError!Node {
if (array_size == 0) return error.UnsupportedType;
assert(array_type.tag() == .array_type or array_type.tag() == .null_sentinel_array_type);
const is_narrow = stmt.getKind() == .Ascii or stmt.getKind() == .UTF8;
const str_length = stmt.getLength();
const payload = getArrayPayload(array_type);
const array_size = payload.len;
const elem_type = payload.elem_type;
const expr_base = @ptrCast(*const clang.Expr, stmt);
const ty = expr_base.getType().getTypePtr();
const const_arr_ty = @ptrCast(*const clang.ConstantArrayType, ty);
if (array_size == 0) return Tag.empty_array.create(c.arena, elem_type);
const elem_type = try transQualType(c, scope, const_arr_ty.getElementType(), expr_base.getBeginLoc());
const arr_type = try Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_type });
const init_list = try c.arena.alloc(Node, array_size);
const num_inits = math.min(str_length, array_size);
const init_node = if (num_inits > 0) blk: {
if (is_narrow) {
// "string literal".* or string literal"[0..num_inits].*
var str = try transNarrowStringLiteral(c, scope, stmt, .used);
if (str_length != array_size) str = try Tag.string_slice.create(c.arena, .{ .string = str, .end = num_inits });
break :blk try Tag.deref.create(c.arena, str);
} else {
const init_list = try c.arena.alloc(Node, num_inits);
var i: c_uint = 0;
while (i < num_inits) : (i += 1) {
init_list[i] = try transCreateCharLitNode(c, false, stmt.getCodeUnit(i));
}
const init_args = .{ .len = num_inits, .elem_type = elem_type };
const init_array_type = try if (array_type.tag() == .array_type) Tag.array_type.create(c.arena, init_args) else Tag.null_sentinel_array_type.create(c.arena, init_args);
break :blk try Tag.array_init.create(c.arena, .{
.cond = init_array_type,
.cases = init_list,
});
}
} else null;
var i: c_uint = 0;
const kind = stmt.getKind();
const narrow = kind == .Ascii or kind == .UTF8;
while (i < str_length and i < array_size) : (i += 1) {
const code_unit = stmt.getCodeUnit(i);
init_list[i] = try transCreateCharLitNode(c, narrow, code_unit);
}
while (i < array_size) : (i += 1) {
init_list[i] = try transCreateNodeNumber(c, 0, .int);
}
if (num_inits == array_size) return init_node.?; // init_node is only null if num_inits == 0; but if num_inits == array_size == 0 we've already returned
assert(array_size > str_length); // If array_size <= str_length, `num_inits == array_size` and we've already returned.
return Tag.array_init.create(c.arena, .{
.cond = arr_type,
.cases = init_list,
const filler_node = try Tag.array_filler.create(c.arena, .{
.type = elem_type,
.filler = Tag.zero_literal.init(),
.count = array_size - str_length,
});
if (init_node) |some| {
return Tag.array_cat.create(c.arena, .{ .lhs = some, .rhs = filler_node });
} else {
return filler_node;
}
}
/// determine whether `stmt` is a "pointer subtraction expression" - a subtraction where
@ -3342,9 +3366,8 @@ fn addTopLevelDecl(c: *Context, name: []const u8, decl_node: Node) !void {
try c.global_scope.nodes.append(decl_node);
}
/// Translate a qual type for a variable with an initializer. The initializer
/// only matters for incomplete arrays, since the size of the array is determined
/// by the size of the initializer
/// Translate a qualtype for a variable with an initializer. This only matters
/// for incomplete arrays, since the initializer determines the size of the array.
fn transQualTypeInitialized(
c: *Context,
scope: *Scope,
@ -3360,9 +3383,14 @@ fn transQualTypeInitialized(
switch (decl_init.getStmtClass()) {
.StringLiteralClass => {
const string_lit = @ptrCast(*const clang.StringLiteral, decl_init);
const string_lit_size = string_lit.getLength() + 1; // +1 for null terminator
const string_lit_size = string_lit.getLength();
const array_size = @intCast(usize, string_lit_size);
return Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
// incomplete array initialized with empty string, will be translated as [1]T{0}
// see https://github.com/ziglang/zig/issues/8256
if (array_size == 0) return Tag.array_type.create(c.arena, .{ .len = 1, .elem_type = elem_ty });
return Tag.null_sentinel_array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
},
.InitListExprClass => {
const init_expr = @ptrCast(*const clang.InitListExpr, decl_init);

View File

@ -40,6 +40,8 @@ pub const Node = extern union {
string_literal,
char_literal,
enum_literal,
/// "string"[0..end]
string_slice,
identifier,
@"if",
/// if (!operand) break;
@ -176,6 +178,7 @@ pub const Node = extern union {
c_pointer,
single_pointer,
array_type,
null_sentinel_array_type,
/// @import("std").meta.sizeof(operand)
std_meta_sizeof,
@ -334,7 +337,7 @@ pub const Node = extern union {
.std_meta_promoteIntLiteral => Payload.PromoteIntLiteral,
.block => Payload.Block,
.c_pointer, .single_pointer => Payload.Pointer,
.array_type => Payload.Array,
.array_type, .null_sentinel_array_type => Payload.Array,
.arg_redecl, .alias, .fail_decl => Payload.ArgRedecl,
.log2_int_type => Payload.Log2IntType,
.var_simple, .pub_var_simple => Payload.SimpleVarDecl,
@ -342,6 +345,7 @@ pub const Node = extern union {
.array_filler => Payload.ArrayFiller,
.pub_inline_fn => Payload.PubInlineFn,
.field_access => Payload.FieldAccess,
.string_slice => Payload.StringSlice,
};
}
@ -584,10 +588,12 @@ pub const Payload = struct {
pub const Array = struct {
base: Payload,
data: struct {
data: ArrayTypeInfo,
pub const ArrayTypeInfo = struct {
elem_type: Node,
len: usize,
},
};
};
pub const Pointer = struct {
@ -664,6 +670,14 @@ pub const Payload = struct {
radix: Node,
},
};
pub const StringSlice = struct {
base: Payload,
data: struct {
string: Node,
end: usize,
},
};
};
/// Converts the nodes into a Zig ast.
@ -1015,6 +1029,36 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
.data = undefined,
});
},
.string_slice => {
const payload = node.castTag(.string_slice).?.data;
const string = try renderNode(c, payload.string);
const l_bracket = try c.addToken(.l_bracket, "[");
const start = try c.addNode(.{
.tag = .integer_literal,
.main_token = try c.addToken(.integer_literal, "0"),
.data = undefined,
});
_ = try c.addToken(.ellipsis2, "..");
const end = try c.addNode(.{
.tag = .integer_literal,
.main_token = try c.addTokenFmt(.integer_literal, "{d}", .{payload.end}),
.data = undefined,
});
_ = try c.addToken(.r_bracket, "]");
return c.addNode(.{
.tag = .slice,
.main_token = l_bracket,
.data = .{
.lhs = string,
.rhs = try c.addExtra(std.zig.ast.Node.Slice{
.start = start,
.end = end,
}),
},
});
},
.fail_decl => {
const payload = node.castTag(.fail_decl).?.data;
// pub const name = @compileError(msg);
@ -1581,6 +1625,10 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
const payload = node.castTag(.array_type).?.data;
return renderArrayType(c, payload.len, payload.elem_type);
},
.null_sentinel_array_type => {
const payload = node.castTag(.null_sentinel_array_type).?.data;
return renderNullSentinelArrayType(c, payload.len, payload.elem_type);
},
.array_filler => {
const payload = node.castTag(.array_filler).?.data;
@ -1946,6 +1994,36 @@ fn renderArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
});
}
fn renderNullSentinelArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
const l_bracket = try c.addToken(.l_bracket, "[");
const len_expr = try c.addNode(.{
.tag = .integer_literal,
.main_token = try c.addTokenFmt(.integer_literal, "{d}", .{len}),
.data = undefined,
});
_ = try c.addToken(.colon, ":");
const sentinel_expr = try c.addNode(.{
.tag = .integer_literal,
.main_token = try c.addToken(.integer_literal, "0"),
.data = undefined,
});
_ = try c.addToken(.r_bracket, "]");
const elem_type_expr = try renderNode(c, elem_type);
return c.addNode(.{
.tag = .array_type_sentinel,
.main_token = l_bracket,
.data = .{
.lhs = len_expr,
.rhs = try c.addExtra(std.zig.ast.Node.ArrayTypeSentinel {
.sentinel = sentinel_expr,
.elem_type = elem_type_expr,
}),
},
});
}
fn addSemicolonIfNeeded(c: *Context, node: Node) !void {
switch (node.tag()) {
.warning => unreachable,
@ -2014,6 +2092,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
.integer_literal,
.float_literal,
.string_literal,
.string_slice,
.char_literal,
.enum_literal,
.identifier,
@ -2035,6 +2114,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
.func,
.call,
.array_type,
.null_sentinel_array_type,
.bool_to_int,
.div_exact,
.byte_offset_of,

View File

@ -745,14 +745,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
\\ static const char v2[] = "2.2.2";
\\}
, &[_][]const u8{
\\const v2: [6]u8 = [6]u8{
\\ '2',
\\ '.',
\\ '2',
\\ '.',
\\ '2',
\\ 0,
\\};
\\const v2: [5:0]u8 = "2.2.2".*;
\\pub export fn foo() void {}
});
@ -1600,30 +1593,9 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
\\static char arr1[] = "hello";
\\char arr2[] = "hello";
, &[_][]const u8{
\\pub export var arr0: [6]u8 = [6]u8{
\\ 'h',
\\ 'e',
\\ 'l',
\\ 'l',
\\ 'o',
\\ 0,
\\};
\\pub var arr1: [6]u8 = [6]u8{
\\ 'h',
\\ 'e',
\\ 'l',
\\ 'l',
\\ 'o',
\\ 0,
\\};
\\pub export var arr2: [6]u8 = [6]u8{
\\ 'h',
\\ 'e',
\\ 'l',
\\ 'l',
\\ 'o',
\\ 0,
\\};
\\pub export var arr0: [5:0]u8 = "hello".*;
\\pub var arr1: [5:0]u8 = "hello".*;
\\pub export var arr2: [5:0]u8 = "hello".*;
});
cases.add("array initializer expr",
@ -3425,4 +3397,49 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
, &[_][]const u8{
\\pub const FOO = @compileError("TODO implement function '__builtin_alloca_with_align' in std.c.builtins");
});
cases.add("null sentinel arrays when initialized from string literal. Issue #8256",
\\#include <stdint.h>
\\char zero[0] = "abc";
\\uint32_t zero_w[0] = U"💯💯💯";
\\char empty_incomplete[] = "";
\\uint32_t empty_incomplete_w[] = U"";
\\char empty_constant[100] = "";
\\uint32_t empty_constant_w[100] = U"";
\\char incomplete[] = "abc";
\\uint32_t incomplete_w[] = U"💯💯💯";
\\char truncated[1] = "abc";
\\uint32_t truncated_w[1] = U"💯💯💯";
\\char extend[5] = "a";
\\uint32_t extend_w[5] = U"💯";
\\char no_null[3] = "abc";
\\uint32_t no_null_w[3] = U"💯💯💯";
, &[_][]const u8{
\\pub export var zero: [0]u8 = [0]u8{};
\\pub export var zero_w: [0]u32 = [0]u32{};
\\pub export var empty_incomplete: [1]u8 = [1]u8{0} ** 1;
\\pub export var empty_incomplete_w: [1]u32 = [1]u32{0} ** 1;
\\pub export var empty_constant: [100]u8 = [1]u8{0} ** 100;
\\pub export var empty_constant_w: [100]u32 = [1]u32{0} ** 100;
\\pub export var incomplete: [3:0]u8 = "abc".*;
\\pub export var incomplete_w: [3:0]u32 = [3:0]u32{
\\ '\u{1f4af}',
\\ '\u{1f4af}',
\\ '\u{1f4af}',
\\};
\\pub export var truncated: [1]u8 = "abc"[0..1].*;
\\pub export var truncated_w: [1]u32 = [1]u32{
\\ '\u{1f4af}',
\\};
\\pub export var extend: [5]u8 = "a"[0..1].* ++ [1]u8{0} ** 4;
\\pub export var extend_w: [5]u32 = [1]u32{
\\ '\u{1f4af}',
\\} ++ [1]u32{0} ** 4;
\\pub export var no_null: [3]u8 = "abc".*;
\\pub export var no_null_w: [3]u32 = [3]u32{
\\ '\u{1f4af}',
\\ '\u{1f4af}',
\\ '\u{1f4af}',
\\};
});
}