stage2 x86_64: use abi size to determine 64-bit operation

From my very cursory reading, it seems that the register manager doesn't distinguish between registers that are physically the same but have different sizes. In that case, this means that during codegen, we can't rely on `reg.size()` when determining the width of the operations we have to perform. Instead, we must use some form of `ty.abiSize(self.target.*)` to determine the size of the type we're operating with. If this size is 64 bits, then we should enable 64-bit operation. This fixed a bug in the codegen for spilling instructions, which was overwriting the previous stack entry with zeroes. See the modified test case in this commit.
2024-12-03 10:28:48 +00:00 · 2021-04-09 14:05:53 +08:00 · 2021-04-09 14:05:53 +08:00 · 5bd464e386
commit 5bd464e386
parent 36df1526da
2 changed files with 28 additions and 26 deletions
--- a/src/codegen.zig
+++ b/src/codegen.zig
@ -1687,7 +1687,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                        .register => |src_reg| {
                            // register, register use mr + 1 addressing mode: r/m16/32/64, r16/32/64
                            try self.encodeX8664Instruction(src, Instruction{
-                                .operand_size_64 = dst_reg.size() == 64,
+                                .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
                                .primary_opcode_1b = mr + 1,
                                // TODO: Explicit optional wrap due to stage 1 miscompilation :(
                                //       https://github.com/ziglang/zig/issues/6515
@ -1705,7 +1705,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                            const imm32 = @intCast(u31, imm); // This case must be handled before calling genX8664BinMathCode.
                            if (imm32 <= math.maxInt(u7)) {
                                try self.encodeX8664Instruction(src, Instruction{
-                                    .operand_size_64 = dst_reg.size() == 64,
+                                    .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
                                    .primary_opcode_1b = 0x83,
                                    .opcode_extension = opx,
                                    // TODO: Explicit optional wrap due to stage 1 miscompilation :(
@ -1719,7 +1719,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                                });
                            } else {
                                try self.encodeX8664Instruction(src, Instruction{
-                                    .operand_size_64 = dst_reg.size() == 64,
+                                    .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
                                    .primary_opcode_1b = 0x81,
                                    .opcode_extension = opx,
                                    // TODO: Explicit optional wrap due to stage 1 miscompilation :(
@ -1743,7 +1743,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                                return self.fail(src, "stack offset too large", .{});
                            }
                            try self.encodeX8664Instruction(src, Instruction{
-                                .operand_size_64 = dst_reg.size() == 64,
+                                .operand_size_64 = abi_size == 64,
                                .primary_opcode_1b = mr + 0x3,
                                .reg = dst_reg,
                                // TODO: Explicit optional wrap due to stage 1 miscompilation :(
@ -1802,7 +1802,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                return self.fail(src, "stack offset too large", .{});
            }
            try self.encodeX8664Instruction(src, Instruction{
-                .operand_size_64 = reg.size() == 64,
+                .operand_size_64 = abi_size == 64,
                .primary_opcode_1b = opcode,
                .reg = reg,
                // TODO: Explicit optional wrap due to stage 1 miscompilation :(
@ -3707,7 +3707,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {

                        // This is a variant of 8B /r.
                        try self.encodeX8664Instruction(src, Instruction{
-                            .operand_size_64 = reg.size() == 64,
+                            .operand_size_64 = ty.abiSize(self.target.*) == 64,

                            .primary_opcode_1b = 0x8B,

@ -3740,7 +3740,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                            // LEA reg, [<offset>]
                            // manually do this instruction to make sure the offset into the disp32 field won't change.
                            try self.code.ensureCapacity(self.code.items.len + 7);
-                            self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() });
+                            self.rex(.{ .w = ty.abiSize(self.target.*) == 64, .r = reg.isExtended() });
                            self.code.appendSliceAssumeCapacity(&[_]u8{
                                0x8D,
                                0x05 | (@as(u8, reg.id() & 0b111) << 3),
@ -3749,7 +3749,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {

                            // MOV reg, [reg]
                            try self.encodeX8664Instruction(src, Instruction{
-                                .operand_size_64 = reg.size() == 64,
+                                .operand_size_64 = ty.abiSize(self.target.*) == 64,

                                .primary_opcode_1b = 0x8B,

@ -3771,7 +3771,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                            // 0b00RRR100, where RRR is the lower three bits of the register ID.
                            // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32.
                            try self.code.ensureCapacity(self.code.items.len + 8);
-                            self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
+                            self.rex(.{ .w = ty.abiSize(self.target.*) == 64, .r = reg.isExtended() });
                            self.code.appendSliceAssumeCapacity(&[_]u8{
                                0x8B,
                                0x04 | (@as(u8, reg.id() & 0b111) << 3), // R
@ -3809,7 +3809,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                                // Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant.
                                // TODO: determine whether to allow other sized registers, and if so, handle them properly.
                                try self.encodeX8664Instruction(src, Instruction{
-                                    .operand_size_64 = reg.size() == 64,
+                                    .operand_size_64 = ty.abiSize(self.target.*) == 64,
                                    .primary_opcode_1b = 0x8B,
                                    .reg = reg,
                                    // TODO: Explicit optional wrap due to stage 1 miscompilation :(
@ -3823,14 +3823,14 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                        }
                    },
                    .stack_offset => |unadjusted_off| {
-                        const size_bytes = @divExact(reg.size(), 8);
-                        const off = unadjusted_off + size_bytes;
+                        const abi_size = ty.abiSize(self.target.*);
+                        const off = unadjusted_off + abi_size;
                        if (off < std.math.minInt(i32) or off > std.math.maxInt(i32)) {
                            return self.fail(src, "stack offset too large", .{});
                        }
                        const ioff = -@intCast(i32, off);
                        try self.encodeX8664Instruction(src, Instruction{
-                            .operand_size_64 = reg.size() == 64,
+                            .operand_size_64 = ty.abiSize(self.target.*) == 64,
                            .primary_opcode_1b = 0x8B,
                            .reg = reg,
                            // TODO: Explicit optional wrap due to stage 1 miscompilation :(
--- a/test/stage2/test.zig
+++ b/test/stage2/test.zig
@ -740,7 +740,7 @@ pub fn addCases(ctx: *TestContext) !void {
        // Spilling registers to the stack.
        case.addCompareOutput(
            \\export fn _start() noreturn {
-            \\    assert(add(3, 4) == 791);
+            \\    assert(add(3, 4) == 1221);
            \\
            \\    exit();
            \\}
@ -756,19 +756,21 @@ pub fn addCases(ctx: *TestContext) !void {
            \\        const i = g + h; // 100
            \\        const j = i + d; // 110
            \\        const k = i + j; // 210
-            \\        const l = k + c; // 217
-            \\        const m = l + d; // 227
-            \\        const n = m + e; // 241
-            \\        const o = n + f; // 265
-            \\        const p = o + g; // 303
-            \\        const q = p + h; // 365
-            \\        const r = q + i; // 465
-            \\        const s = r + j; // 575
-            \\        const t = s + k; // 785
-            \\        break :blk t;
+            \\        const l = j + k; // 320
+            \\        const m = l + c; // 327
+            \\        const n = m + d; // 337
+            \\        const o = n + e; // 351
+            \\        const p = o + f; // 375
+            \\        const q = p + g; // 413
+            \\        const r = q + h; // 475
+            \\        const s = r + i; // 575
+            \\        const t = s + j; // 685
+            \\        const u = t + k; // 895
+            \\        const v = u + l; // 1215
+            \\        break :blk v;
            \\    };
-            \\    const y = x + a; // 788
-            \\    const z = y + a; // 791
+            \\    const y = x + a; // 1218
+            \\    const z = y + a; // 1221
            \\    return z;
            \\}
            \\