From 9ea2076663730ab6ac9cad5cb5f84e58198d4d95 Mon Sep 17 00:00:00 2001 From: mlugg Date: Mon, 18 Sep 2023 02:05:35 +0100 Subject: [PATCH] translate-c: prevent variable names conflicting with type names This introduces the concept of a "weak global name" into translate-c. translate-c consists of two passes. The first is important, because it discovers all global names, which are used to prevent naming conflicts: whenever we see an identifier in the second pass, we can mangle it if it conflicts with any global or any other in-scope identifier. Unfortunately, this is a bit tricky for structs, unions, and enums. In C, these types are not represented by normal identifers, but by separate tags - `struct foo` does not prevent an unrelated identifier `foo` existing. In general, we want to translate type names to user-friendly ones such as `struct_foo` and `foo` where possible, but we can't guarantee such names will not conflict with real variable names. This is where weak global names come in. In the initial pass, when a global type declaration is seen, `struct_foo` and `foo` are both added as weak global names. This essentially means that we will use these names for the type *if possible*, but if there is another global with the same name, we will mangle the type name instead. Then, when actually translating the declaration, we check whether there's a "true" global with a conflicting name, in which case we mangle our name. If the user-friendly alias `foo` conflicts, we do not attempt to mangle it: we just don't emit it, because a mangled alias isn't particularly helpful. --- src/translate_c.zig | 76 +++++++++++++++++++++++++++++++++------ test/run_translated_c.zig | 24 +++++++++++++ test/translate_c.zig | 25 ++++++++++--- 3 files changed, 110 insertions(+), 15 deletions(-) diff --git a/src/translate_c.zig b/src/translate_c.zig index b8b221301c..582bd4c8ed 100644 --- a/src/translate_c.zig +++ b/src/translate_c.zig @@ -218,7 +218,7 @@ const Scope = struct { /// Check if the global scope contains the name, includes all decls that haven't been translated yet. fn contains(scope: *Root, name: []const u8) bool { - return scope.containsNow(name) or scope.context.global_names.contains(name); + return scope.containsNow(name) or scope.context.global_names.contains(name) or scope.context.weak_global_names.contains(name); } }; @@ -335,6 +335,15 @@ pub const Context = struct { /// up front in a pre-processing step. global_names: std.StringArrayHashMapUnmanaged(void) = .{}, + /// This is similar to `global_names`, but contains names which we would + /// *like* to use, but do not strictly *have* to if they are unavailable. + /// These are relevant to types, which ideally we would name like + /// 'struct_foo' with an alias 'foo', but if either of those names is taken, + /// may be mangled. + /// This is distinct from `global_names` so we can detect at a type + /// declaration whether or not the name is available. + weak_global_names: std.StringArrayHashMapUnmanaged(void) = .{}, + pattern_list: PatternList, fn getMangle(c: *Context) u32 { @@ -425,10 +434,8 @@ pub fn translate( try addMacros(&context); for (context.alias_list.items) |alias| { - if (!context.global_scope.sym_table.contains(alias.alias)) { - const node = try Tag.alias.create(arena, .{ .actual = alias.alias, .mangled = alias.name }); - try addTopLevelDecl(&context, alias.alias, node); - } + const node = try Tag.alias.create(arena, .{ .actual = alias.alias, .mangled = alias.name }); + try addTopLevelDecl(&context, alias.alias, node); } return ast.render(gpa, context.global_scope.nodes.items); @@ -493,7 +500,29 @@ fn declVisitorC(context: ?*anyopaque, decl: *const clang.Decl) callconv(.C) bool fn declVisitorNamesOnly(c: *Context, decl: *const clang.Decl) Error!void { if (decl.castToNamedDecl()) |named_decl| { const decl_name = try c.str(named_decl.getName_bytes_begin()); - try c.global_names.put(c.gpa, decl_name, {}); + + switch (decl.getKind()) { + .Record, .Enum => { + // These types are prefixed with the container kind. + const container_prefix = if (decl.getKind() == .Record) prefix: { + const record_decl: *const clang.RecordDecl = @ptrCast(decl); + if (record_decl.isUnion()) { + break :prefix "union"; + } else { + break :prefix "struct"; + } + } else "enum"; + const prefixed_name = try std.fmt.allocPrint(c.arena, "{s}_{s}", .{ container_prefix, decl_name }); + // `decl_name` and `prefixed_name` are the preferred names for this type. + // However, we can name it anything else if necessary, so these are "weak names". + try c.weak_global_names.ensureUnusedCapacity(c.gpa, 2); + c.weak_global_names.putAssumeCapacity(decl_name, {}); + c.weak_global_names.putAssumeCapacity(prefixed_name, {}); + }, + else => { + try c.global_names.put(c.gpa, decl_name, {}); + }, + } // Check for typedefs with unnamed enum/record child types. if (decl.getKind() == .Typedef) { @@ -1079,6 +1108,21 @@ fn flexibleArrayField(c: *Context, record_def: *const clang.RecordDecl) ?*const return flexible_field; } +fn mangleWeakGlobalName(c: *Context, want_name: []const u8) ![]const u8 { + var cur_name = want_name; + + if (!c.weak_global_names.contains(want_name)) { + // This type wasn't noticed by the name detection pass, so nothing has been treating this as + // a weak global name. We must mangle it to avoid conflicts with locals. + cur_name = try std.fmt.allocPrint(c.arena, "{s}_{d}", .{ want_name, c.getMangle() }); + } + + while (c.global_names.contains(cur_name)) { + cur_name = try std.fmt.allocPrint(c.arena, "{s}_{d}", .{ want_name, c.getMangle() }); + } + return cur_name; +} + fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordDecl) Error!void { if (c.decl_table.get(@intFromPtr(record_decl.getCanonicalDecl()))) |_| return; // Avoid processing this decl twice @@ -1113,6 +1157,9 @@ fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordD is_unnamed = true; } name = try std.fmt.allocPrint(c.arena, "{s}_{s}", .{ container_kind_name, bare_name }); + if (toplevel and !is_unnamed) { + name = try mangleWeakGlobalName(c, name); + } } if (!toplevel) name = try bs.makeMangledName(c, name); try c.decl_table.putNoClobber(c.gpa, @intFromPtr(record_decl.getCanonicalDecl()), name); @@ -1217,7 +1264,10 @@ fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordD const node = Node.initPayload(&payload.base); if (toplevel) { try addTopLevelDecl(c, name, node); - if (!is_unnamed) + // Only add the alias if the name is available *and* it was caught by + // name detection. Don't bother performing a weak mangle, since a + // mangled name is of no real use here. + if (!is_unnamed and !c.global_names.contains(bare_name) and c.weak_global_names.contains(bare_name)) try c.alias_list.append(.{ .alias = bare_name, .name = name }); } else { try scope.appendNode(node); @@ -1246,6 +1296,9 @@ fn transEnumDecl(c: *Context, scope: *Scope, enum_decl: *const clang.EnumDecl) E is_unnamed = true; } name = try std.fmt.allocPrint(c.arena, "enum_{s}", .{bare_name}); + if (toplevel and !is_unnamed) { + name = try mangleWeakGlobalName(c, name); + } } if (!toplevel) name = try bs.makeMangledName(c, name); try c.decl_table.putNoClobber(c.gpa, @intFromPtr(enum_decl.getCanonicalDecl()), name); @@ -1313,7 +1366,10 @@ fn transEnumDecl(c: *Context, scope: *Scope, enum_decl: *const clang.EnumDecl) E const node = Node.initPayload(&payload.base); if (toplevel) { try addTopLevelDecl(c, name, node); - if (!is_unnamed) + // Only add the alias if the name is available *and* it was caught by + // name detection. Don't bother performing a weak mangle, since a + // mangled name is of no real use here. + if (!is_unnamed and !c.global_names.contains(bare_name) and c.weak_global_names.contains(bare_name)) try c.alias_list.append(.{ .alias = bare_name, .name = name }); } else { try scope.appendNode(node); @@ -4881,7 +4937,7 @@ fn transType(c: *Context, scope: *Scope, ty: *const clang.Type, source_loc: clan var trans_scope = scope; if (@as(*const clang.Decl, @ptrCast(record_decl)).castToNamedDecl()) |named_decl| { const decl_name = try c.str(named_decl.getName_bytes_begin()); - if (c.global_names.get(decl_name)) |_| trans_scope = &c.global_scope.base; + if (c.weak_global_names.contains(decl_name)) trans_scope = &c.global_scope.base; } try transRecordDecl(c, trans_scope, record_decl); const name = c.decl_table.get(@intFromPtr(record_decl.getCanonicalDecl())).?; @@ -4894,7 +4950,7 @@ fn transType(c: *Context, scope: *Scope, ty: *const clang.Type, source_loc: clan var trans_scope = scope; if (@as(*const clang.Decl, @ptrCast(enum_decl)).castToNamedDecl()) |named_decl| { const decl_name = try c.str(named_decl.getName_bytes_begin()); - if (c.global_names.get(decl_name)) |_| trans_scope = &c.global_scope.base; + if (c.weak_global_names.contains(decl_name)) trans_scope = &c.global_scope.base; } try transEnumDecl(c, trans_scope, enum_decl); const name = c.decl_table.get(@intFromPtr(enum_decl.getCanonicalDecl())).?; diff --git a/test/run_translated_c.zig b/test/run_translated_c.zig index 13b72c1738..526425341a 100644 --- a/test/run_translated_c.zig +++ b/test/run_translated_c.zig @@ -1905,4 +1905,28 @@ pub fn addCases(cases: *tests.RunTranslatedCContext) void { \\ return 0; \\} , ""); + + cases.add("struct without global declaration does not conflict with local variable name", + \\#include + \\static void foo(struct foobar *unused) {} + \\int main(void) { + \\ int struct_foobar = 123; + \\ if (struct_foobar != 123) abort(); + \\ int foobar = 456; + \\ if (foobar != 456) abort(); + \\ return 0; + \\} + , ""); + + cases.add("struct without global declaration does not conflict with global variable name", + \\#include + \\static void foo(struct foobar *unused) {} + \\static int struct_foobar = 123; + \\static int foobar = 456; + \\int main(void) { + \\ if (struct_foobar != 123) abort(); + \\ if (foobar != 456) abort(); + \\ return 0; + \\} + , ""); } diff --git a/test/translate_c.zig b/test/translate_c.zig index 3fad7f8fe5..367cbc5695 100644 --- a/test/translate_c.zig +++ b/test/translate_c.zig @@ -148,16 +148,16 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\} a = {}; \\#define PTR void * , &[_][]const u8{ - \\pub const struct_Bar = extern struct { + \\pub const struct_Bar_1 = extern struct { \\ a: c_int, \\}; \\pub const struct_Foo = extern struct { \\ a: c_int, - \\ b: struct_Bar, + \\ b: struct_Bar_1, \\}; \\pub export var a: struct_Foo = struct_Foo{ \\ .a = 0, - \\ .b = @import("std").mem.zeroes(struct_Bar), + \\ .b = @import("std").mem.zeroes(struct_Bar_1), \\}; , \\pub const PTR = ?*anyopaque; @@ -2361,11 +2361,11 @@ pub fn addCases(cases: *tests.TranslateCContext) void { \\ struct Bar c; \\}; , &[_][]const u8{ - \\pub const struct_Bar = extern struct { + \\pub const struct_Bar_1 = extern struct { \\ b: c_int, \\}; \\pub const struct_Foo = extern struct { - \\ c: struct_Bar, + \\ c: struct_Bar_1, \\}; }); } @@ -4135,4 +4135,19 @@ pub fn addCases(cases: *tests.TranslateCContext) void { , &[_][]const u8{ \\pub const FOO = @compileError("unable to translate macro: untranslatable usage of arg `x`"); }); + + cases.add("global struct whose default name conflicts with global is mangled", + \\struct foo { + \\ int x; + \\}; + \\const char *struct_foo = "hello world"; + , &[_][]const u8{ + \\pub const struct_foo_1 = extern struct { + \\ x: c_int, + \\}; + , + \\pub const foo = struct_foo_1; + , + \\pub export var struct_foo: [*c]const u8 = "hello world"; + }); }