mirror of
https://github.com/ziglang/zig.git
synced 2024-11-27 07:32:44 +00:00
translate-c: Use [N:0] arrays when initializer is a string literal (#8264)
* translate-c: Use [N:0] arrays when initializer is a string literal Translate incomplete arrays as [N:0] when initialized by a string literal. This preserves a bit more of the type information from the original C program. Fixes #8215
This commit is contained in:
parent
75a7abb0c4
commit
b54514d9dd
@ -636,7 +636,7 @@ fn visitVarDecl(c: *Context, var_decl: *const clang.VarDecl, mangled_name: ?[]co
|
||||
if (has_init) trans_init: {
|
||||
if (decl_init) |expr| {
|
||||
const node_or_error = if (expr.getStmtClass() == .StringLiteralClass)
|
||||
transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), zigArraySize(c, type_node) catch 0)
|
||||
transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
|
||||
else
|
||||
transExprCoercing(c, scope, expr, .used);
|
||||
init_node = node_or_error catch |err| switch (err) {
|
||||
@ -1412,7 +1412,7 @@ fn transDeclStmtOne(
|
||||
|
||||
var init_node = if (decl_init) |expr|
|
||||
if (expr.getStmtClass() == .StringLiteralClass)
|
||||
try transStringLiteralAsArray(c, scope, @ptrCast(*const clang.StringLiteral, expr), try zigArraySize(c, type_node))
|
||||
try transStringLiteralInitializer(c, scope, @ptrCast(*const clang.StringLiteral, expr), type_node)
|
||||
else
|
||||
try transExprCoercing(c, scope, expr, .used)
|
||||
else
|
||||
@ -1758,6 +1758,20 @@ fn transReturnStmt(
|
||||
return Tag.@"return".create(c.arena, rhs);
|
||||
}
|
||||
|
||||
fn transNarrowStringLiteral(
|
||||
c: *Context,
|
||||
scope: *Scope,
|
||||
stmt: *const clang.StringLiteral,
|
||||
result_used: ResultUsed,
|
||||
) TransError!Node {
|
||||
var len: usize = undefined;
|
||||
const bytes_ptr = stmt.getString_bytes_begin_size(&len);
|
||||
|
||||
const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
|
||||
const node = try Tag.string_literal.create(c.arena, str);
|
||||
return maybeSuppressResult(c, scope, result_used, node);
|
||||
}
|
||||
|
||||
fn transStringLiteral(
|
||||
c: *Context,
|
||||
scope: *Scope,
|
||||
@ -1766,19 +1780,14 @@ fn transStringLiteral(
|
||||
) TransError!Node {
|
||||
const kind = stmt.getKind();
|
||||
switch (kind) {
|
||||
.Ascii, .UTF8 => {
|
||||
var len: usize = undefined;
|
||||
const bytes_ptr = stmt.getString_bytes_begin_size(&len);
|
||||
|
||||
const str = try std.fmt.allocPrint(c.arena, "\"{}\"", .{std.zig.fmtEscapes(bytes_ptr[0..len])});
|
||||
const node = try Tag.string_literal.create(c.arena, str);
|
||||
return maybeSuppressResult(c, scope, result_used, node);
|
||||
},
|
||||
.Ascii, .UTF8 => return transNarrowStringLiteral(c, scope, stmt, result_used),
|
||||
.UTF16, .UTF32, .Wide => {
|
||||
const str_type = @tagName(stmt.getKind());
|
||||
const name = try std.fmt.allocPrint(c.arena, "zig.{s}_string_{d}", .{ str_type, c.getMangle() });
|
||||
const lit_array = try transStringLiteralAsArray(c, scope, stmt, stmt.getLength() + 1);
|
||||
|
||||
const expr_base = @ptrCast(*const clang.Expr, stmt);
|
||||
const array_type = try transQualTypeInitialized(c, scope, expr_base.getType(), expr_base, expr_base.getBeginLoc());
|
||||
const lit_array = try transStringLiteralInitializer(c, scope, stmt, array_type);
|
||||
const decl = try Tag.var_simple.create(c.arena, .{ .name = name, .init = lit_array });
|
||||
try scope.appendNode(decl);
|
||||
const node = try Tag.identifier.create(c.arena, name);
|
||||
@ -1787,52 +1796,67 @@ fn transStringLiteral(
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the size of an array back out from an ast Node.
|
||||
fn zigArraySize(c: *Context, node: Node) TransError!usize {
|
||||
if (node.castTag(.array_type)) |array| {
|
||||
return array.data.len;
|
||||
}
|
||||
return error.UnsupportedTranslation;
|
||||
fn getArrayPayload(array_type: Node) ast.Payload.Array.ArrayTypeInfo {
|
||||
return (array_type.castTag(.array_type) orelse array_type.castTag(.null_sentinel_array_type).?).data;
|
||||
}
|
||||
|
||||
/// Translate a string literal to an array of integers. Used when an
|
||||
/// array is initialized from a string literal. `array_size` is the
|
||||
/// size of the array being initialized. If the string literal is larger
|
||||
/// than the array, truncate the string. If the array is larger than the
|
||||
/// string literal, pad the array with 0's
|
||||
fn transStringLiteralAsArray(
|
||||
/// Translate a string literal that is initializing an array. In general narrow string
|
||||
/// literals become `"<string>".*` or `"<string>"[0..<size>].*` if they need truncation.
|
||||
/// Wide string literals become an array of integers. zero-fillers pad out the array to
|
||||
/// the appropriate length, if necessary.
|
||||
fn transStringLiteralInitializer(
|
||||
c: *Context,
|
||||
scope: *Scope,
|
||||
stmt: *const clang.StringLiteral,
|
||||
array_size: usize,
|
||||
array_type: Node,
|
||||
) TransError!Node {
|
||||
if (array_size == 0) return error.UnsupportedType;
|
||||
assert(array_type.tag() == .array_type or array_type.tag() == .null_sentinel_array_type);
|
||||
|
||||
const is_narrow = stmt.getKind() == .Ascii or stmt.getKind() == .UTF8;
|
||||
|
||||
const str_length = stmt.getLength();
|
||||
const payload = getArrayPayload(array_type);
|
||||
const array_size = payload.len;
|
||||
const elem_type = payload.elem_type;
|
||||
|
||||
const expr_base = @ptrCast(*const clang.Expr, stmt);
|
||||
const ty = expr_base.getType().getTypePtr();
|
||||
const const_arr_ty = @ptrCast(*const clang.ConstantArrayType, ty);
|
||||
if (array_size == 0) return Tag.empty_array.create(c.arena, elem_type);
|
||||
|
||||
const elem_type = try transQualType(c, scope, const_arr_ty.getElementType(), expr_base.getBeginLoc());
|
||||
const arr_type = try Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_type });
|
||||
const init_list = try c.arena.alloc(Node, array_size);
|
||||
const num_inits = math.min(str_length, array_size);
|
||||
const init_node = if (num_inits > 0) blk: {
|
||||
if (is_narrow) {
|
||||
// "string literal".* or string literal"[0..num_inits].*
|
||||
var str = try transNarrowStringLiteral(c, scope, stmt, .used);
|
||||
if (str_length != array_size) str = try Tag.string_slice.create(c.arena, .{ .string = str, .end = num_inits });
|
||||
break :blk try Tag.deref.create(c.arena, str);
|
||||
} else {
|
||||
const init_list = try c.arena.alloc(Node, num_inits);
|
||||
var i: c_uint = 0;
|
||||
while (i < num_inits) : (i += 1) {
|
||||
init_list[i] = try transCreateCharLitNode(c, false, stmt.getCodeUnit(i));
|
||||
}
|
||||
const init_args = .{ .len = num_inits, .elem_type = elem_type };
|
||||
const init_array_type = try if (array_type.tag() == .array_type) Tag.array_type.create(c.arena, init_args) else Tag.null_sentinel_array_type.create(c.arena, init_args);
|
||||
break :blk try Tag.array_init.create(c.arena, .{
|
||||
.cond = init_array_type,
|
||||
.cases = init_list,
|
||||
});
|
||||
}
|
||||
} else null;
|
||||
|
||||
var i: c_uint = 0;
|
||||
const kind = stmt.getKind();
|
||||
const narrow = kind == .Ascii or kind == .UTF8;
|
||||
while (i < str_length and i < array_size) : (i += 1) {
|
||||
const code_unit = stmt.getCodeUnit(i);
|
||||
init_list[i] = try transCreateCharLitNode(c, narrow, code_unit);
|
||||
}
|
||||
while (i < array_size) : (i += 1) {
|
||||
init_list[i] = try transCreateNodeNumber(c, 0, .int);
|
||||
}
|
||||
if (num_inits == array_size) return init_node.?; // init_node is only null if num_inits == 0; but if num_inits == array_size == 0 we've already returned
|
||||
assert(array_size > str_length); // If array_size <= str_length, `num_inits == array_size` and we've already returned.
|
||||
|
||||
return Tag.array_init.create(c.arena, .{
|
||||
.cond = arr_type,
|
||||
.cases = init_list,
|
||||
const filler_node = try Tag.array_filler.create(c.arena, .{
|
||||
.type = elem_type,
|
||||
.filler = Tag.zero_literal.init(),
|
||||
.count = array_size - str_length,
|
||||
});
|
||||
|
||||
if (init_node) |some| {
|
||||
return Tag.array_cat.create(c.arena, .{ .lhs = some, .rhs = filler_node });
|
||||
} else {
|
||||
return filler_node;
|
||||
}
|
||||
}
|
||||
|
||||
/// determine whether `stmt` is a "pointer subtraction expression" - a subtraction where
|
||||
@ -3342,9 +3366,8 @@ fn addTopLevelDecl(c: *Context, name: []const u8, decl_node: Node) !void {
|
||||
try c.global_scope.nodes.append(decl_node);
|
||||
}
|
||||
|
||||
/// Translate a qual type for a variable with an initializer. The initializer
|
||||
/// only matters for incomplete arrays, since the size of the array is determined
|
||||
/// by the size of the initializer
|
||||
/// Translate a qualtype for a variable with an initializer. This only matters
|
||||
/// for incomplete arrays, since the initializer determines the size of the array.
|
||||
fn transQualTypeInitialized(
|
||||
c: *Context,
|
||||
scope: *Scope,
|
||||
@ -3360,9 +3383,14 @@ fn transQualTypeInitialized(
|
||||
switch (decl_init.getStmtClass()) {
|
||||
.StringLiteralClass => {
|
||||
const string_lit = @ptrCast(*const clang.StringLiteral, decl_init);
|
||||
const string_lit_size = string_lit.getLength() + 1; // +1 for null terminator
|
||||
const string_lit_size = string_lit.getLength();
|
||||
const array_size = @intCast(usize, string_lit_size);
|
||||
return Tag.array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
|
||||
|
||||
// incomplete array initialized with empty string, will be translated as [1]T{0}
|
||||
// see https://github.com/ziglang/zig/issues/8256
|
||||
if (array_size == 0) return Tag.array_type.create(c.arena, .{ .len = 1, .elem_type = elem_ty });
|
||||
|
||||
return Tag.null_sentinel_array_type.create(c.arena, .{ .len = array_size, .elem_type = elem_ty });
|
||||
},
|
||||
.InitListExprClass => {
|
||||
const init_expr = @ptrCast(*const clang.InitListExpr, decl_init);
|
||||
|
@ -40,6 +40,8 @@ pub const Node = extern union {
|
||||
string_literal,
|
||||
char_literal,
|
||||
enum_literal,
|
||||
/// "string"[0..end]
|
||||
string_slice,
|
||||
identifier,
|
||||
@"if",
|
||||
/// if (!operand) break;
|
||||
@ -176,6 +178,7 @@ pub const Node = extern union {
|
||||
c_pointer,
|
||||
single_pointer,
|
||||
array_type,
|
||||
null_sentinel_array_type,
|
||||
|
||||
/// @import("std").meta.sizeof(operand)
|
||||
std_meta_sizeof,
|
||||
@ -334,7 +337,7 @@ pub const Node = extern union {
|
||||
.std_meta_promoteIntLiteral => Payload.PromoteIntLiteral,
|
||||
.block => Payload.Block,
|
||||
.c_pointer, .single_pointer => Payload.Pointer,
|
||||
.array_type => Payload.Array,
|
||||
.array_type, .null_sentinel_array_type => Payload.Array,
|
||||
.arg_redecl, .alias, .fail_decl => Payload.ArgRedecl,
|
||||
.log2_int_type => Payload.Log2IntType,
|
||||
.var_simple, .pub_var_simple => Payload.SimpleVarDecl,
|
||||
@ -342,6 +345,7 @@ pub const Node = extern union {
|
||||
.array_filler => Payload.ArrayFiller,
|
||||
.pub_inline_fn => Payload.PubInlineFn,
|
||||
.field_access => Payload.FieldAccess,
|
||||
.string_slice => Payload.StringSlice,
|
||||
};
|
||||
}
|
||||
|
||||
@ -584,10 +588,12 @@ pub const Payload = struct {
|
||||
|
||||
pub const Array = struct {
|
||||
base: Payload,
|
||||
data: struct {
|
||||
data: ArrayTypeInfo,
|
||||
|
||||
pub const ArrayTypeInfo = struct {
|
||||
elem_type: Node,
|
||||
len: usize,
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
pub const Pointer = struct {
|
||||
@ -664,6 +670,14 @@ pub const Payload = struct {
|
||||
radix: Node,
|
||||
},
|
||||
};
|
||||
|
||||
pub const StringSlice = struct {
|
||||
base: Payload,
|
||||
data: struct {
|
||||
string: Node,
|
||||
end: usize,
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
/// Converts the nodes into a Zig ast.
|
||||
@ -1015,6 +1029,36 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
|
||||
.data = undefined,
|
||||
});
|
||||
},
|
||||
.string_slice => {
|
||||
const payload = node.castTag(.string_slice).?.data;
|
||||
|
||||
const string = try renderNode(c, payload.string);
|
||||
const l_bracket = try c.addToken(.l_bracket, "[");
|
||||
const start = try c.addNode(.{
|
||||
.tag = .integer_literal,
|
||||
.main_token = try c.addToken(.integer_literal, "0"),
|
||||
.data = undefined,
|
||||
});
|
||||
_ = try c.addToken(.ellipsis2, "..");
|
||||
const end = try c.addNode(.{
|
||||
.tag = .integer_literal,
|
||||
.main_token = try c.addTokenFmt(.integer_literal, "{d}", .{payload.end}),
|
||||
.data = undefined,
|
||||
});
|
||||
_ = try c.addToken(.r_bracket, "]");
|
||||
|
||||
return c.addNode(.{
|
||||
.tag = .slice,
|
||||
.main_token = l_bracket,
|
||||
.data = .{
|
||||
.lhs = string,
|
||||
.rhs = try c.addExtra(std.zig.ast.Node.Slice{
|
||||
.start = start,
|
||||
.end = end,
|
||||
}),
|
||||
},
|
||||
});
|
||||
},
|
||||
.fail_decl => {
|
||||
const payload = node.castTag(.fail_decl).?.data;
|
||||
// pub const name = @compileError(msg);
|
||||
@ -1581,6 +1625,10 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
|
||||
const payload = node.castTag(.array_type).?.data;
|
||||
return renderArrayType(c, payload.len, payload.elem_type);
|
||||
},
|
||||
.null_sentinel_array_type => {
|
||||
const payload = node.castTag(.null_sentinel_array_type).?.data;
|
||||
return renderNullSentinelArrayType(c, payload.len, payload.elem_type);
|
||||
},
|
||||
.array_filler => {
|
||||
const payload = node.castTag(.array_filler).?.data;
|
||||
|
||||
@ -1946,6 +1994,36 @@ fn renderArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
|
||||
});
|
||||
}
|
||||
|
||||
fn renderNullSentinelArrayType(c: *Context, len: usize, elem_type: Node) !NodeIndex {
|
||||
const l_bracket = try c.addToken(.l_bracket, "[");
|
||||
const len_expr = try c.addNode(.{
|
||||
.tag = .integer_literal,
|
||||
.main_token = try c.addTokenFmt(.integer_literal, "{d}", .{len}),
|
||||
.data = undefined,
|
||||
});
|
||||
_ = try c.addToken(.colon, ":");
|
||||
|
||||
const sentinel_expr = try c.addNode(.{
|
||||
.tag = .integer_literal,
|
||||
.main_token = try c.addToken(.integer_literal, "0"),
|
||||
.data = undefined,
|
||||
});
|
||||
|
||||
_ = try c.addToken(.r_bracket, "]");
|
||||
const elem_type_expr = try renderNode(c, elem_type);
|
||||
return c.addNode(.{
|
||||
.tag = .array_type_sentinel,
|
||||
.main_token = l_bracket,
|
||||
.data = .{
|
||||
.lhs = len_expr,
|
||||
.rhs = try c.addExtra(std.zig.ast.Node.ArrayTypeSentinel {
|
||||
.sentinel = sentinel_expr,
|
||||
.elem_type = elem_type_expr,
|
||||
}),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
fn addSemicolonIfNeeded(c: *Context, node: Node) !void {
|
||||
switch (node.tag()) {
|
||||
.warning => unreachable,
|
||||
@ -2014,6 +2092,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
|
||||
.integer_literal,
|
||||
.float_literal,
|
||||
.string_literal,
|
||||
.string_slice,
|
||||
.char_literal,
|
||||
.enum_literal,
|
||||
.identifier,
|
||||
@ -2035,6 +2114,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
|
||||
.func,
|
||||
.call,
|
||||
.array_type,
|
||||
.null_sentinel_array_type,
|
||||
.bool_to_int,
|
||||
.div_exact,
|
||||
.byte_offset_of,
|
||||
|
@ -745,14 +745,7 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
|
||||
\\ static const char v2[] = "2.2.2";
|
||||
\\}
|
||||
, &[_][]const u8{
|
||||
\\const v2: [6]u8 = [6]u8{
|
||||
\\ '2',
|
||||
\\ '.',
|
||||
\\ '2',
|
||||
\\ '.',
|
||||
\\ '2',
|
||||
\\ 0,
|
||||
\\};
|
||||
\\const v2: [5:0]u8 = "2.2.2".*;
|
||||
\\pub export fn foo() void {}
|
||||
});
|
||||
|
||||
@ -1600,30 +1593,9 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
|
||||
\\static char arr1[] = "hello";
|
||||
\\char arr2[] = "hello";
|
||||
, &[_][]const u8{
|
||||
\\pub export var arr0: [6]u8 = [6]u8{
|
||||
\\ 'h',
|
||||
\\ 'e',
|
||||
\\ 'l',
|
||||
\\ 'l',
|
||||
\\ 'o',
|
||||
\\ 0,
|
||||
\\};
|
||||
\\pub var arr1: [6]u8 = [6]u8{
|
||||
\\ 'h',
|
||||
\\ 'e',
|
||||
\\ 'l',
|
||||
\\ 'l',
|
||||
\\ 'o',
|
||||
\\ 0,
|
||||
\\};
|
||||
\\pub export var arr2: [6]u8 = [6]u8{
|
||||
\\ 'h',
|
||||
\\ 'e',
|
||||
\\ 'l',
|
||||
\\ 'l',
|
||||
\\ 'o',
|
||||
\\ 0,
|
||||
\\};
|
||||
\\pub export var arr0: [5:0]u8 = "hello".*;
|
||||
\\pub var arr1: [5:0]u8 = "hello".*;
|
||||
\\pub export var arr2: [5:0]u8 = "hello".*;
|
||||
});
|
||||
|
||||
cases.add("array initializer expr",
|
||||
@ -3425,4 +3397,49 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
|
||||
, &[_][]const u8{
|
||||
\\pub const FOO = @compileError("TODO implement function '__builtin_alloca_with_align' in std.c.builtins");
|
||||
});
|
||||
|
||||
cases.add("null sentinel arrays when initialized from string literal. Issue #8256",
|
||||
\\#include <stdint.h>
|
||||
\\char zero[0] = "abc";
|
||||
\\uint32_t zero_w[0] = U"💯💯💯";
|
||||
\\char empty_incomplete[] = "";
|
||||
\\uint32_t empty_incomplete_w[] = U"";
|
||||
\\char empty_constant[100] = "";
|
||||
\\uint32_t empty_constant_w[100] = U"";
|
||||
\\char incomplete[] = "abc";
|
||||
\\uint32_t incomplete_w[] = U"💯💯💯";
|
||||
\\char truncated[1] = "abc";
|
||||
\\uint32_t truncated_w[1] = U"💯💯💯";
|
||||
\\char extend[5] = "a";
|
||||
\\uint32_t extend_w[5] = U"💯";
|
||||
\\char no_null[3] = "abc";
|
||||
\\uint32_t no_null_w[3] = U"💯💯💯";
|
||||
, &[_][]const u8{
|
||||
\\pub export var zero: [0]u8 = [0]u8{};
|
||||
\\pub export var zero_w: [0]u32 = [0]u32{};
|
||||
\\pub export var empty_incomplete: [1]u8 = [1]u8{0} ** 1;
|
||||
\\pub export var empty_incomplete_w: [1]u32 = [1]u32{0} ** 1;
|
||||
\\pub export var empty_constant: [100]u8 = [1]u8{0} ** 100;
|
||||
\\pub export var empty_constant_w: [100]u32 = [1]u32{0} ** 100;
|
||||
\\pub export var incomplete: [3:0]u8 = "abc".*;
|
||||
\\pub export var incomplete_w: [3:0]u32 = [3:0]u32{
|
||||
\\ '\u{1f4af}',
|
||||
\\ '\u{1f4af}',
|
||||
\\ '\u{1f4af}',
|
||||
\\};
|
||||
\\pub export var truncated: [1]u8 = "abc"[0..1].*;
|
||||
\\pub export var truncated_w: [1]u32 = [1]u32{
|
||||
\\ '\u{1f4af}',
|
||||
\\};
|
||||
\\pub export var extend: [5]u8 = "a"[0..1].* ++ [1]u8{0} ** 4;
|
||||
\\pub export var extend_w: [5]u32 = [1]u32{
|
||||
\\ '\u{1f4af}',
|
||||
\\} ++ [1]u32{0} ** 4;
|
||||
\\pub export var no_null: [3]u8 = "abc".*;
|
||||
\\pub export var no_null_w: [3]u32 = [3]u32{
|
||||
\\ '\u{1f4af}',
|
||||
\\ '\u{1f4af}',
|
||||
\\ '\u{1f4af}',
|
||||
\\};
|
||||
});
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user