translate-c: prevent variable names conflicting with type names

This introduces the concept of a "weak global name" into translate-c.

translate-c consists of two passes. The first is important, because it
discovers all global names, which are used to prevent naming conflicts:
whenever we see an identifier in the second pass, we can mangle it if it
conflicts with any global or any other in-scope identifier.

Unfortunately, this is a bit tricky for structs, unions, and enums. In
C, these types are not represented by normal identifers, but by separate
tags - `struct foo` does not prevent an unrelated identifier `foo`
existing. In general, we want to translate type names to user-friendly
ones such as `struct_foo` and `foo` where possible, but we can't
guarantee such names will not conflict with real variable names.

This is where weak global names come in. In the initial pass, when a
global type declaration is seen, `struct_foo` and `foo` are both added
as weak global names. This essentially means that we will use these
names for the type *if possible*, but if there is another global with
the same name, we will mangle the type name instead. Then, when actually
translating the declaration, we check whether there's a "true" global
with a conflicting name, in which case we mangle our name. If the
user-friendly alias `foo` conflicts, we do not attempt to mangle it: we
just don't emit it, because a mangled alias isn't particularly helpful.
This commit is contained in:
mlugg 2023-09-18 02:05:35 +01:00 committed by Veikka Tuominen
parent d2a937838e
commit 9ea2076663
3 changed files with 110 additions and 15 deletions

View File

@ -218,7 +218,7 @@ const Scope = struct {
/// Check if the global scope contains the name, includes all decls that haven't been translated yet. /// Check if the global scope contains the name, includes all decls that haven't been translated yet.
fn contains(scope: *Root, name: []const u8) bool { fn contains(scope: *Root, name: []const u8) bool {
return scope.containsNow(name) or scope.context.global_names.contains(name); return scope.containsNow(name) or scope.context.global_names.contains(name) or scope.context.weak_global_names.contains(name);
} }
}; };
@ -335,6 +335,15 @@ pub const Context = struct {
/// up front in a pre-processing step. /// up front in a pre-processing step.
global_names: std.StringArrayHashMapUnmanaged(void) = .{}, global_names: std.StringArrayHashMapUnmanaged(void) = .{},
/// This is similar to `global_names`, but contains names which we would
/// *like* to use, but do not strictly *have* to if they are unavailable.
/// These are relevant to types, which ideally we would name like
/// 'struct_foo' with an alias 'foo', but if either of those names is taken,
/// may be mangled.
/// This is distinct from `global_names` so we can detect at a type
/// declaration whether or not the name is available.
weak_global_names: std.StringArrayHashMapUnmanaged(void) = .{},
pattern_list: PatternList, pattern_list: PatternList,
fn getMangle(c: *Context) u32 { fn getMangle(c: *Context) u32 {
@ -425,10 +434,8 @@ pub fn translate(
try addMacros(&context); try addMacros(&context);
for (context.alias_list.items) |alias| { for (context.alias_list.items) |alias| {
if (!context.global_scope.sym_table.contains(alias.alias)) { const node = try Tag.alias.create(arena, .{ .actual = alias.alias, .mangled = alias.name });
const node = try Tag.alias.create(arena, .{ .actual = alias.alias, .mangled = alias.name }); try addTopLevelDecl(&context, alias.alias, node);
try addTopLevelDecl(&context, alias.alias, node);
}
} }
return ast.render(gpa, context.global_scope.nodes.items); return ast.render(gpa, context.global_scope.nodes.items);
@ -493,7 +500,29 @@ fn declVisitorC(context: ?*anyopaque, decl: *const clang.Decl) callconv(.C) bool
fn declVisitorNamesOnly(c: *Context, decl: *const clang.Decl) Error!void { fn declVisitorNamesOnly(c: *Context, decl: *const clang.Decl) Error!void {
if (decl.castToNamedDecl()) |named_decl| { if (decl.castToNamedDecl()) |named_decl| {
const decl_name = try c.str(named_decl.getName_bytes_begin()); const decl_name = try c.str(named_decl.getName_bytes_begin());
try c.global_names.put(c.gpa, decl_name, {});
switch (decl.getKind()) {
.Record, .Enum => {
// These types are prefixed with the container kind.
const container_prefix = if (decl.getKind() == .Record) prefix: {
const record_decl: *const clang.RecordDecl = @ptrCast(decl);
if (record_decl.isUnion()) {
break :prefix "union";
} else {
break :prefix "struct";
}
} else "enum";
const prefixed_name = try std.fmt.allocPrint(c.arena, "{s}_{s}", .{ container_prefix, decl_name });
// `decl_name` and `prefixed_name` are the preferred names for this type.
// However, we can name it anything else if necessary, so these are "weak names".
try c.weak_global_names.ensureUnusedCapacity(c.gpa, 2);
c.weak_global_names.putAssumeCapacity(decl_name, {});
c.weak_global_names.putAssumeCapacity(prefixed_name, {});
},
else => {
try c.global_names.put(c.gpa, decl_name, {});
},
}
// Check for typedefs with unnamed enum/record child types. // Check for typedefs with unnamed enum/record child types.
if (decl.getKind() == .Typedef) { if (decl.getKind() == .Typedef) {
@ -1079,6 +1108,21 @@ fn flexibleArrayField(c: *Context, record_def: *const clang.RecordDecl) ?*const
return flexible_field; return flexible_field;
} }
fn mangleWeakGlobalName(c: *Context, want_name: []const u8) ![]const u8 {
var cur_name = want_name;
if (!c.weak_global_names.contains(want_name)) {
// This type wasn't noticed by the name detection pass, so nothing has been treating this as
// a weak global name. We must mangle it to avoid conflicts with locals.
cur_name = try std.fmt.allocPrint(c.arena, "{s}_{d}", .{ want_name, c.getMangle() });
}
while (c.global_names.contains(cur_name)) {
cur_name = try std.fmt.allocPrint(c.arena, "{s}_{d}", .{ want_name, c.getMangle() });
}
return cur_name;
}
fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordDecl) Error!void { fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordDecl) Error!void {
if (c.decl_table.get(@intFromPtr(record_decl.getCanonicalDecl()))) |_| if (c.decl_table.get(@intFromPtr(record_decl.getCanonicalDecl()))) |_|
return; // Avoid processing this decl twice return; // Avoid processing this decl twice
@ -1113,6 +1157,9 @@ fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordD
is_unnamed = true; is_unnamed = true;
} }
name = try std.fmt.allocPrint(c.arena, "{s}_{s}", .{ container_kind_name, bare_name }); name = try std.fmt.allocPrint(c.arena, "{s}_{s}", .{ container_kind_name, bare_name });
if (toplevel and !is_unnamed) {
name = try mangleWeakGlobalName(c, name);
}
} }
if (!toplevel) name = try bs.makeMangledName(c, name); if (!toplevel) name = try bs.makeMangledName(c, name);
try c.decl_table.putNoClobber(c.gpa, @intFromPtr(record_decl.getCanonicalDecl()), name); try c.decl_table.putNoClobber(c.gpa, @intFromPtr(record_decl.getCanonicalDecl()), name);
@ -1217,7 +1264,10 @@ fn transRecordDecl(c: *Context, scope: *Scope, record_decl: *const clang.RecordD
const node = Node.initPayload(&payload.base); const node = Node.initPayload(&payload.base);
if (toplevel) { if (toplevel) {
try addTopLevelDecl(c, name, node); try addTopLevelDecl(c, name, node);
if (!is_unnamed) // Only add the alias if the name is available *and* it was caught by
// name detection. Don't bother performing a weak mangle, since a
// mangled name is of no real use here.
if (!is_unnamed and !c.global_names.contains(bare_name) and c.weak_global_names.contains(bare_name))
try c.alias_list.append(.{ .alias = bare_name, .name = name }); try c.alias_list.append(.{ .alias = bare_name, .name = name });
} else { } else {
try scope.appendNode(node); try scope.appendNode(node);
@ -1246,6 +1296,9 @@ fn transEnumDecl(c: *Context, scope: *Scope, enum_decl: *const clang.EnumDecl) E
is_unnamed = true; is_unnamed = true;
} }
name = try std.fmt.allocPrint(c.arena, "enum_{s}", .{bare_name}); name = try std.fmt.allocPrint(c.arena, "enum_{s}", .{bare_name});
if (toplevel and !is_unnamed) {
name = try mangleWeakGlobalName(c, name);
}
} }
if (!toplevel) name = try bs.makeMangledName(c, name); if (!toplevel) name = try bs.makeMangledName(c, name);
try c.decl_table.putNoClobber(c.gpa, @intFromPtr(enum_decl.getCanonicalDecl()), name); try c.decl_table.putNoClobber(c.gpa, @intFromPtr(enum_decl.getCanonicalDecl()), name);
@ -1313,7 +1366,10 @@ fn transEnumDecl(c: *Context, scope: *Scope, enum_decl: *const clang.EnumDecl) E
const node = Node.initPayload(&payload.base); const node = Node.initPayload(&payload.base);
if (toplevel) { if (toplevel) {
try addTopLevelDecl(c, name, node); try addTopLevelDecl(c, name, node);
if (!is_unnamed) // Only add the alias if the name is available *and* it was caught by
// name detection. Don't bother performing a weak mangle, since a
// mangled name is of no real use here.
if (!is_unnamed and !c.global_names.contains(bare_name) and c.weak_global_names.contains(bare_name))
try c.alias_list.append(.{ .alias = bare_name, .name = name }); try c.alias_list.append(.{ .alias = bare_name, .name = name });
} else { } else {
try scope.appendNode(node); try scope.appendNode(node);
@ -4881,7 +4937,7 @@ fn transType(c: *Context, scope: *Scope, ty: *const clang.Type, source_loc: clan
var trans_scope = scope; var trans_scope = scope;
if (@as(*const clang.Decl, @ptrCast(record_decl)).castToNamedDecl()) |named_decl| { if (@as(*const clang.Decl, @ptrCast(record_decl)).castToNamedDecl()) |named_decl| {
const decl_name = try c.str(named_decl.getName_bytes_begin()); const decl_name = try c.str(named_decl.getName_bytes_begin());
if (c.global_names.get(decl_name)) |_| trans_scope = &c.global_scope.base; if (c.weak_global_names.contains(decl_name)) trans_scope = &c.global_scope.base;
} }
try transRecordDecl(c, trans_scope, record_decl); try transRecordDecl(c, trans_scope, record_decl);
const name = c.decl_table.get(@intFromPtr(record_decl.getCanonicalDecl())).?; const name = c.decl_table.get(@intFromPtr(record_decl.getCanonicalDecl())).?;
@ -4894,7 +4950,7 @@ fn transType(c: *Context, scope: *Scope, ty: *const clang.Type, source_loc: clan
var trans_scope = scope; var trans_scope = scope;
if (@as(*const clang.Decl, @ptrCast(enum_decl)).castToNamedDecl()) |named_decl| { if (@as(*const clang.Decl, @ptrCast(enum_decl)).castToNamedDecl()) |named_decl| {
const decl_name = try c.str(named_decl.getName_bytes_begin()); const decl_name = try c.str(named_decl.getName_bytes_begin());
if (c.global_names.get(decl_name)) |_| trans_scope = &c.global_scope.base; if (c.weak_global_names.contains(decl_name)) trans_scope = &c.global_scope.base;
} }
try transEnumDecl(c, trans_scope, enum_decl); try transEnumDecl(c, trans_scope, enum_decl);
const name = c.decl_table.get(@intFromPtr(enum_decl.getCanonicalDecl())).?; const name = c.decl_table.get(@intFromPtr(enum_decl.getCanonicalDecl())).?;

View File

@ -1905,4 +1905,28 @@ pub fn addCases(cases: *tests.RunTranslatedCContext) void {
\\ return 0; \\ return 0;
\\} \\}
, ""); , "");
cases.add("struct without global declaration does not conflict with local variable name",
\\#include <stdlib.h>
\\static void foo(struct foobar *unused) {}
\\int main(void) {
\\ int struct_foobar = 123;
\\ if (struct_foobar != 123) abort();
\\ int foobar = 456;
\\ if (foobar != 456) abort();
\\ return 0;
\\}
, "");
cases.add("struct without global declaration does not conflict with global variable name",
\\#include <stdlib.h>
\\static void foo(struct foobar *unused) {}
\\static int struct_foobar = 123;
\\static int foobar = 456;
\\int main(void) {
\\ if (struct_foobar != 123) abort();
\\ if (foobar != 456) abort();
\\ return 0;
\\}
, "");
} }

View File

@ -148,16 +148,16 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
\\} a = {}; \\} a = {};
\\#define PTR void * \\#define PTR void *
, &[_][]const u8{ , &[_][]const u8{
\\pub const struct_Bar = extern struct { \\pub const struct_Bar_1 = extern struct {
\\ a: c_int, \\ a: c_int,
\\}; \\};
\\pub const struct_Foo = extern struct { \\pub const struct_Foo = extern struct {
\\ a: c_int, \\ a: c_int,
\\ b: struct_Bar, \\ b: struct_Bar_1,
\\}; \\};
\\pub export var a: struct_Foo = struct_Foo{ \\pub export var a: struct_Foo = struct_Foo{
\\ .a = 0, \\ .a = 0,
\\ .b = @import("std").mem.zeroes(struct_Bar), \\ .b = @import("std").mem.zeroes(struct_Bar_1),
\\}; \\};
, ,
\\pub const PTR = ?*anyopaque; \\pub const PTR = ?*anyopaque;
@ -2361,11 +2361,11 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
\\ struct Bar c; \\ struct Bar c;
\\}; \\};
, &[_][]const u8{ , &[_][]const u8{
\\pub const struct_Bar = extern struct { \\pub const struct_Bar_1 = extern struct {
\\ b: c_int, \\ b: c_int,
\\}; \\};
\\pub const struct_Foo = extern struct { \\pub const struct_Foo = extern struct {
\\ c: struct_Bar, \\ c: struct_Bar_1,
\\}; \\};
}); });
} }
@ -4135,4 +4135,19 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
, &[_][]const u8{ , &[_][]const u8{
\\pub const FOO = @compileError("unable to translate macro: untranslatable usage of arg `x`"); \\pub const FOO = @compileError("unable to translate macro: untranslatable usage of arg `x`");
}); });
cases.add("global struct whose default name conflicts with global is mangled",
\\struct foo {
\\ int x;
\\};
\\const char *struct_foo = "hello world";
, &[_][]const u8{
\\pub const struct_foo_1 = extern struct {
\\ x: c_int,
\\};
,
\\pub const foo = struct_foo_1;
,
\\pub export var struct_foo: [*c]const u8 = "hello world";
});
} }