const std = @import("std"); const Allocator = std.mem.Allocator; const AtomicOp = enum { cas, swp, ldadd, ldclr, ldeor, ldset, }; pub fn main() !void { var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator); defer arena_instance.deinit(); const arena = arena_instance.allocator(); //const args = try std.process.argsAlloc(arena); var bw = std.io.bufferedWriter(std.io.getStdOut().writer()); const w = bw.writer(); try w.writeAll( \\//! This file is generated by tools/gen_outline_atomics.zig. \\const builtin = @import("builtin"); \\const std = @import("std"); \\const linkage = @import("./common.zig").linkage; \\const always_has_lse = std.Target.aarch64.featureSetHas(builtin.cpu.features, .lse); \\ \\/// This default is overridden at runtime after inspecting CPU properties. \\/// It is intentionally not exported in order to make the machine code that \\/// uses it a statically predicted direct branch rather than using the PLT, \\/// which ARM is concerned would have too much overhead. \\var __aarch64_have_lse_atomics: u8 = @intFromBool(always_has_lse); \\ \\ ); var footer = std.ArrayList(u8).init(arena); try footer.appendSlice("\ncomptime {\n"); for ([_]N{ .one, .two, .four, .eight, .sixteen }) |n| { for ([_]Ordering{ .relax, .acq, .rel, .acq_rel }) |order| { for ([_]AtomicOp{ .cas, .swp, .ldadd, .ldclr, .ldeor, .ldset }) |op| { if (n == .sixteen and op != .cas) continue; const name = try std.fmt.allocPrint(arena, "__aarch64_{s}{d}_{s}", .{ @tagName(op), n.toBytes(), @tagName(order), }); try writeFunction(arena, w, name, op, n, order); try footer.writer().print(" @export({s}, .{{ .name = \"{s}\", .linkage = linkage }});\n", .{ name, name, }); } } } try w.writeAll(footer.items); try w.writeAll("}\n"); try bw.flush(); } fn writeFunction( arena: Allocator, w: anytype, name: []const u8, op: AtomicOp, n: N, order: Ordering, ) !void { const body = switch (op) { .cas => try generateCas(arena, n, order), .swp => try generateSwp(arena, n, order), .ldadd => try generateLd(arena, n, order, .ldadd), .ldclr => try generateLd(arena, n, order, .ldclr), .ldeor => try generateLd(arena, n, order, .ldeor), .ldset => try generateLd(arena, n, order, .ldset), }; const fn_sig = try std.fmt.allocPrint( arena, "fn {[name]s}() align(16) callconv(.Naked) void {{", .{ .name = name }, ); try w.writeAll(fn_sig); try w.writeAll( \\ \\ @setRuntimeSafety(false); \\ asm volatile ( \\ ); var iter = std.mem.splitScalar(u8, body, '\n'); while (iter.next()) |line| { try w.writeAll(" \\\\"); try w.writeAll(line); try w.writeAll("\n"); } try w.writeAll( \\ : \\ : [__aarch64_have_lse_atomics] "{w16}" (__aarch64_have_lse_atomics), \\ : "w15", "w16", "w17", "memory" \\ ); \\ unreachable; \\} \\ ); } const N = enum(u8) { one = 1, two = 2, four = 4, eight = 8, sixteen = 16, const Defines = struct { s: []const u8, uxt: []const u8, b: []const u8, }; fn defines(n: N) Defines { const s = switch (n) { .one => "b", .two => "h", else => "", }; const uxt = switch (n) { .one => "uxtb", .two => "uxth", .four, .eight, .sixteen => "mov", }; const b = switch (n) { .one => "0x00000000", .two => "0x40000000", .four => "0x80000000", .eight => "0xc0000000", else => "0x00000000", }; return Defines{ .s = s, .uxt = uxt, .b = b, }; } fn register(n: N) []const u8 { return if (@intFromEnum(n) < 8) "w" else "x"; } fn toBytes(n: N) u8 { return @intFromEnum(n); } fn toBits(n: N) u8 { return n.toBytes() * 8; } }; const Ordering = enum { relax, acq, rel, acq_rel, const Defines = struct { suff: []const u8, a: []const u8, l: []const u8, m: []const u8, n: []const u8, }; fn defines(self: @This()) Defines { const suff = switch (self) { .relax => "_relax", .acq => "_acq", .rel => "_rel", .acq_rel => "_acq_rel", }; const a = switch (self) { .relax => "", .acq => "a", .rel => "", .acq_rel => "a", }; const l = switch (self) { .relax => "", .acq => "", .rel => "l", .acq_rel => "l", }; const m = switch (self) { .relax => "0x000000", .acq => "0x400000", .rel => "0x008000", .acq_rel => "0x408000", }; const n = switch (self) { .relax => "0x000000", .acq => "0x800000", .rel => "0x400000", .acq_rel => "0xc00000", }; return .{ .suff = suff, .a = a, .l = l, .m = m, .n = n }; } }; const LdName = enum { ldadd, ldclr, ldeor, ldset }; fn generateCas(arena: Allocator, n: N, order: Ordering) ![]const u8 { const s_def = n.defines(); const o_def = order.defines(); const reg = n.register(); if (@intFromEnum(n) < 16) { const cas = try std.fmt.allocPrint(arena, ".inst 0x08a07c41 + {s} + {s}", .{ s_def.b, o_def.m }); const ldxr = try std.fmt.allocPrint(arena, "ld{s}xr{s}", .{ o_def.a, s_def.s }); const stxr = try std.fmt.allocPrint(arena, "st{s}xr{s}", .{ o_def.l, s_def.s }); return try std.fmt.allocPrint(arena, \\ cbz w16, 8f \\ {[cas]s} \\ ret \\8: \\ {[uxt]s} {[reg]s}16, {[reg]s}0 \\0: \\ {[ldxr]s} {[reg]s}0, [x2] \\ cmp {[reg]s}0, {[reg]s}16 \\ bne 1f \\ {[stxr]s} w17, {[reg]s}1, [x2] \\ cbnz w17, 0b \\1: \\ ret , .{ .cas = cas, .uxt = s_def.uxt, .ldxr = ldxr, .stxr = stxr, .reg = reg, }); } else { const casp = try std.fmt.allocPrint(arena, ".inst 0x48207c82 + {s}", .{o_def.m}); const ldxp = try std.fmt.allocPrint(arena, "ld{s}xp", .{o_def.a}); const stxp = try std.fmt.allocPrint(arena, "st{s}xp", .{o_def.l}); return try std.fmt.allocPrint(arena, \\ cbz w16, 8f \\ {[casp]s} \\ ret \\8: \\ mov x16, x0 \\ mov x17, x1 \\0: \\ {[ldxp]s} x0, x1, [x4] \\ cmp x0, x16 \\ ccmp x1, x17, #0, eq \\ bne 1f \\ {[stxp]s} w15, x2, x3, [x4] \\ cbnz w15, 0b \\1: \\ ret , .{ .casp = casp, .ldxp = ldxp, .stxp = stxp, }); } } fn generateSwp(arena: Allocator, n: N, order: Ordering) ![]const u8 { const s_def = n.defines(); const o_def = order.defines(); const reg = n.register(); return try std.fmt.allocPrint(arena, \\ cbz w16, 8f \\ .inst 0x38208020 + {[b]s} + {[n]s} \\ ret \\8: \\ mov {[reg]s}16, {[reg]s}0 \\0: \\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1] \\ st{[l]s}xr{[s]s} w17, {[reg]s}16, [x1] \\ cbnz w17, 0b \\1: \\ ret , .{ .b = s_def.b, .n = o_def.n, .reg = reg, .s = s_def.s, .a = o_def.a, .l = o_def.l, }); } fn generateLd(arena: Allocator, n: N, order: Ordering, ld: LdName) ![]const u8 { const s_def = n.defines(); const o_def = order.defines(); const op = switch (ld) { .ldadd => "add", .ldclr => "bic", .ldeor => "eor", .ldset => "orr", }; const op_n = switch (ld) { .ldadd => "0x0000", .ldclr => "0x1000", .ldeor => "0x2000", .ldset => "0x3000", }; const reg = n.register(); return try std.fmt.allocPrint(arena, \\ cbz w16, 8f \\ .inst 0x38200020 + {[op_n]s} + {[b]s} + {[n]s} \\ ret \\8: \\ mov {[reg]s}16, {[reg]s}0 \\0: \\ ld{[a]s}xr{[s]s} {[reg]s}0, [x1] \\ {[op]s} {[reg]s}17, {[reg]s}0, {[reg]s}16 \\ st{[l]s}xr{[s]s} w15, {[reg]s}17, [x1] \\ cbnz w15, 0b \\1: \\ ret , .{ .op_n = op_n, .b = s_def.b, .n = o_def.n, .s = s_def.s, .a = o_def.a, .l = o_def.l, .op = op, .reg = reg, }); }