From f9773ab622d84527ddfdb08d07443ec05cf28737 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Mar 2022 10:11:24 +0200 Subject: [PATCH 1/5] x64: clean up abstraction for generating integer division --- src/arch/x86_64/CodeGen.zig | 54 ++++++++++++++++++------------------- src/arch/x86_64/Emit.zig | 7 +++-- src/arch/x86_64/Mir.zig | 1 + 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index bc0171899a..ef4eeba3d8 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1501,11 +1501,12 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { return self.fail("TODO implement airShlWithOverflow for {}", .{self.target.cpu.arch}); } -/// Generates signed or unsigned integer division. +/// Generates signed or unsigned integer multiplication/division. /// Requires use of .rax and .rdx registers. Spills them if necessary. /// Quotient is saved in .rax and remainder in .rdx. -fn genIntDivOpMir( +fn genIntMulDivOpMir( self: *Self, + tag: Mir.Inst.Tag, ty: Type, signedness: std.builtin.Signedness, lhs: MCValue, @@ -1513,7 +1514,7 @@ fn genIntDivOpMir( ) !void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); if (abi_size > 8) { - return self.fail("TODO implement genIntDivOpMir for ABI size larger than 8", .{}); + return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{}); } try self.register_manager.getReg(.rax, null); @@ -1521,17 +1522,7 @@ fn genIntDivOpMir( self.register_manager.freezeRegs(&.{ .rax, .rdx }); defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); - const dividend = switch (lhs) { - .register => lhs, - else => blk: { - const reg = try self.copyToTmpRegister(ty, lhs); - break :blk MCValue{ .register = reg }; - }, - }; - try self.genSetReg(ty, .rax, dividend); - - self.register_manager.freezeRegs(&.{dividend.register}); - defer self.register_manager.unfreezeRegs(&.{dividend.register}); + try self.genSetReg(ty, .rax, lhs); switch (signedness) { .signed => { @@ -1555,22 +1546,19 @@ fn genIntDivOpMir( }, } - const divisor = switch (rhs) { + const factor = switch (rhs) { .register => rhs, + .stack_offset => rhs, else => blk: { const reg = try self.copyToTmpRegister(ty, rhs); break :blk MCValue{ .register = reg }; }, }; - const op_tag: Mir.Inst.Tag = switch (signedness) { - .signed => .idiv, - .unsigned => .div, - }; - switch (divisor) { + switch (factor) { .register => |reg| { _ = try self.addInst(.{ - .tag = op_tag, + .tag = tag, .ops = (Mir.Ops{ .reg1 = reg, }).encode(), @@ -1579,7 +1567,7 @@ fn genIntDivOpMir( }, .stack_offset => |off| { _ = try self.addInst(.{ - .tag = op_tag, + .tag = tag, .ops = (Mir.Ops{ .reg2 = .rbp, .flags = switch (abi_size) { @@ -1612,7 +1600,10 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa self.register_manager.freezeRegs(&.{divisor}); defer self.register_manager.unfreezeRegs(&.{ dividend, divisor }); - try self.genIntDivOpMir(Type.isize, signedness, .{ .register = dividend }, .{ .register = divisor }); + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .idiv, + .unsigned => .div, + }, Type.isize, signedness, .{ .register = dividend }, .{ .register = divisor }); _ = try self.addInst(.{ .tag = .xor, @@ -1673,13 +1664,16 @@ fn airDiv(self: *Self, inst: Air.Inst.Index) !void { const signedness = ty.intInfo(self.target.*).signedness; if (signedness == .unsigned) { - try self.genIntDivOpMir(ty, signedness, lhs, rhs); + try self.genIntMulDivOpMir(.div, ty, signedness, lhs, rhs); break :result MCValue{ .register = .rax }; } switch (tag) { .div_exact, .div_trunc => { - try self.genIntDivOpMir(ty, signedness, lhs, rhs); + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .idiv, + .unsigned => .div, + }, ty, signedness, lhs, rhs); break :result MCValue{ .register = .rax }; }, .div_floor => { @@ -1704,7 +1698,10 @@ fn airRem(self: *Self, inst: Air.Inst.Index) !void { const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); const signedness = ty.intInfo(self.target.*).signedness; - try self.genIntDivOpMir(ty, signedness, lhs, rhs); + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .idiv, + .unsigned => .div, + }, ty, signedness, lhs, rhs); break :result MCValue{ .register = .rdx }; }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -1725,7 +1722,10 @@ fn airMod(self: *Self, inst: Air.Inst.Index) !void { const signedness = ty.intInfo(self.target.*).signedness; switch (signedness) { .unsigned => { - try self.genIntDivOpMir(ty, signedness, lhs, rhs); + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .idiv, + .unsigned => .div, + }, ty, signedness, lhs, rhs); break :result MCValue{ .register = .rdx }; }, .signed => { diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 8d478f0a64..e8ea10bf29 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -145,6 +145,7 @@ pub fn lowerMir(emit: *Emit) InnerError!void { .sar => try emit.mirShift(.sar, inst), .imul => try emit.mirMulDiv(.imul, inst), + .mul => try emit.mirMulDiv(.mul, inst), .idiv => try emit.mirMulDiv(.idiv, inst), .div => try emit.mirMulDiv(.div, inst), .imul_complex => try emit.mirIMulComplex(inst), @@ -1164,6 +1165,7 @@ const Tag = enum { brk, nop, imul, + mul, idiv, div, syscall, @@ -1411,7 +1413,7 @@ inline fn getOpCode(tag: Tag, enc: Encoding, is_one_byte: bool) ?OpCode { .setnl, .setge => OpCode.twoByte(0x0f, 0x9d), .setle, .setng => OpCode.twoByte(0x0f, 0x9e), .setnle, .setg => OpCode.twoByte(0x0f, 0x9f), - .idiv, .div, .imul => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7), + .idiv, .div, .imul, .mul => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7), .fisttp16 => OpCode.oneByte(0xdf), .fisttp32 => OpCode.oneByte(0xdb), .fisttp64 => OpCode.oneByte(0xdd), @@ -1554,9 +1556,10 @@ inline fn getModRmExt(tag: Tag) ?u3 { => 0x4, .shr => 0x5, .sar => 0x7, + .mul => 0x4, .imul => 0x5, - .idiv => 0x7, .div => 0x6, + .idiv => 0x7, .fisttp16 => 0x1, .fisttp32 => 0x1, .fisttp64 => 0x1, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 96a8eec93d..183a76e4b7 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -227,6 +227,7 @@ pub const Inst = struct { /// 0b11 qword ptr [reg2 + imm32] imul, idiv, + mul, div, /// ops flags: form: From ee6e3aef5deb4aedd8f65e0ba7c44b4979ed6a96 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Mar 2022 10:39:25 +0200 Subject: [PATCH 2/5] x64: redo @mulWithOverflow using rax/rdx based multiplication --- src/arch/x86_64/CodeGen.zig | 76 +++++++++++++++++++++++++------------ test/behavior/math.zig | 8 ++-- 2 files changed, 54 insertions(+), 30 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index ef4eeba3d8..a800da2a9d 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1254,7 +1254,7 @@ fn genPtrBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_r offset_mcv.freezeIfRegister(&self.register_manager); defer offset_mcv.unfreezeIfRegister(&self.register_manager); - try self.genIMulOpMir(offset_ty, offset_mcv, .{ .immediate = elem_size }); + try self.genIntMulComplexOpMir(offset_ty, offset_mcv, .{ .immediate = elem_size }); const tag = self.air.instructions.items(.tag)[inst]; switch (tag) { @@ -1396,10 +1396,27 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { fn airMul(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const result: MCValue = if (self.liveness.isUnused(inst)) - .dead - else - try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs); + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const ty = self.air.typeOfIndex(inst); + + if (ty.zigTypeTag() != .Int) { + return self.fail("TODO implement 'mul' for operands of dst type {}", .{ty.zigTypeTag()}); + } + + // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. + try self.register_manager.getReg(.rax, null); + try self.register_manager.getReg(.rdx, null); + + const lhs = try self.resolveInst(bin_op.lhs); + const rhs = try self.resolveInst(bin_op.rhs); + + const signedness = ty.intInfo(self.target.*).signedness; + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .imul, + .unsigned => .mul, + }, ty, signedness, lhs, rhs); + break :result MCValue{ .register = .rax }; + }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -1474,23 +1491,31 @@ fn airSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; + const result = if (self.liveness.isUnused(inst)) .dead else result: { + const ty = self.air.typeOf(bin_op.lhs); + const signedness: std.builtin.Signedness = blk: { + if (ty.zigTypeTag() != .Int) { + return self.fail("TODO implement airMulWithOverflow for type {}", .{ty.fmtDebug()}); + } + break :blk ty.intInfo(self.target.*).signedness; + }; - if (self.liveness.isUnused(inst)) { - return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none }); - } + // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. + try self.register_manager.getReg(.rax, null); + try self.register_manager.getReg(.rdx, null); - const ty = self.air.typeOf(bin_op.lhs); - const signedness: std.builtin.Signedness = blk: { - if (ty.zigTypeTag() != .Int) { - return self.fail("TODO implement airMulWithOverflow for type {}", .{ty.fmtDebug()}); + const lhs = try self.resolveInst(bin_op.lhs); + const rhs = try self.resolveInst(bin_op.rhs); + + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .imul, + .unsigned => .mul, + }, ty, signedness, lhs, rhs); + + switch (signedness) { + .signed => break :result MCValue{ .register_overflow_signed = .rax }, + .unsigned => break :result MCValue{ .register_overflow_unsigned = .rax }, } - break :blk ty.intInfo(self.target.*).signedness; - }; - - const partial = try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs); - const result: MCValue = switch (signedness) { - .signed => .{ .register_overflow_signed = partial.register }, - .unsigned => .{ .register_overflow_unsigned = partial.register }, }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -1730,7 +1755,7 @@ fn airMod(self: *Self, inst: Air.Inst.Index) !void { }, .signed => { const div_floor = try self.genInlineIntDivFloor(ty, lhs, rhs); - try self.genIMulOpMir(ty, div_floor, rhs); + try self.genIntMulComplexOpMir(ty, div_floor, rhs); const reg = try self.copyToTmpRegister(ty, lhs); try self.genBinMathOpMir(.sub, ty, .{ .register = reg }, div_floor); @@ -2132,7 +2157,7 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void { fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Register { const reg = try self.copyToTmpRegister(index_ty, index); - try self.genIMulOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size }); + try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size }); return reg; } @@ -3096,7 +3121,6 @@ fn genBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: .bool_or, .bit_or => try self.genBinMathOpMir(.@"or", dst_ty, dst_mcv, src_mcv), .bool_and, .bit_and => try self.genBinMathOpMir(.@"and", dst_ty, dst_mcv, src_mcv), .xor, .not => try self.genBinMathOpMir(.xor, dst_ty, dst_mcv, src_mcv), - .mul, .mulwrap, .mul_with_overflow => try self.genIMulOpMir(dst_ty, dst_mcv, src_mcv), else => unreachable, } return dst_mcv; @@ -3252,8 +3276,10 @@ fn genBinMathOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MC } } -// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. -fn genIMulOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void { +/// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. +/// Does not use/spill .rax/.rdx. +/// Does not support byte-size operands. +fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void { const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); switch (dst_mcv) { .none => unreachable, @@ -3299,7 +3325,7 @@ fn genIMulOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) ! } else { // TODO verify we don't spill and assign to the same register as dst_mcv const src_reg = try self.copyToTmpRegister(dst_ty, src_mcv); - return self.genIMulOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg }); + return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg }); } }, .stack_offset => |off| { diff --git a/test/behavior/math.zig b/test/behavior/math.zig index 314d52be66..815c6e0efc 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -697,11 +697,9 @@ test "@mulWithOverflow" { try expect(!@mulWithOverflow(u8, a, b, &result)); try expect(result == 246); - if (builtin.zig_backend != .stage2_x86_64) { // TODO fix mul/imul on x86_64 - b = 4; - try expect(@mulWithOverflow(u8, a, b, &result)); - try expect(result == 236); - } + b = 4; + try expect(@mulWithOverflow(u8, a, b, &result)); + try expect(result == 236); } test "@subWithOverflow" { From 60879bc8ae216ddd33fab2e07d1d460e32636c95 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Mar 2022 11:00:57 +0200 Subject: [PATCH 3/5] x64: clean up instruction tracking for div/mul ops --- src/arch/x86_64/CodeGen.zig | 71 ++++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 17 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index a800da2a9d..744c7fbf75 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1404,8 +1404,10 @@ fn airMul(self: *Self, inst: Air.Inst.Index) !void { } // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. - try self.register_manager.getReg(.rax, null); + try self.register_manager.getReg(.rax, inst); try self.register_manager.getReg(.rdx, null); + self.register_manager.freezeRegs(&.{ .rax, .rdx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); @@ -1501,8 +1503,10 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }; // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. - try self.register_manager.getReg(.rax, null); + try self.register_manager.getReg(.rax, inst); try self.register_manager.getReg(.rdx, null); + self.register_manager.freezeRegs(&.{ .rax, .rdx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); @@ -1527,7 +1531,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } /// Generates signed or unsigned integer multiplication/division. -/// Requires use of .rax and .rdx registers. Spills them if necessary. +/// Clobbers .rax and .rdx registers. /// Quotient is saved in .rax and remainder in .rdx. fn genIntMulDivOpMir( self: *Self, @@ -1542,11 +1546,6 @@ fn genIntMulDivOpMir( return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{}); } - try self.register_manager.getReg(.rax, null); - try self.register_manager.getReg(.rdx, null); - self.register_manager.freezeRegs(&.{ .rax, .rdx }); - defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); - try self.genSetReg(ty, .rax, lhs); switch (signedness) { @@ -1610,6 +1609,7 @@ fn genIntMulDivOpMir( } } +/// Clobbers .rax and .rdx registers. fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue { const signedness = ty.intInfo(self.target.*).signedness; const dividend = switch (lhs) { @@ -1680,14 +1680,42 @@ fn airDiv(self: *Self, inst: Air.Inst.Index) !void { return self.fail("TODO implement {}", .{tag}); } + const signedness = ty.intInfo(self.target.*).signedness; + // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. - try self.register_manager.getReg(.rax, null); + const track_rax: ?Air.Inst.Index = blk: { + if (signedness == .unsigned) break :blk inst; + switch (tag) { + .div_exact, .div_trunc => break :blk inst, + else => break :blk null, + } + }; + try self.register_manager.getReg(.rax, track_rax); try self.register_manager.getReg(.rdx, null); + self.register_manager.freezeRegs(&.{ .rax, .rdx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); const lhs = try self.resolveInst(bin_op.lhs); - const rhs = try self.resolveInst(bin_op.rhs); + lhs.freezeIfRegister(&self.register_manager); + defer lhs.unfreezeIfRegister(&self.register_manager); + + const rhs = blk: { + const rhs = try self.resolveInst(bin_op.rhs); + if (signedness == .signed) { + switch (tag) { + .div_floor => { + rhs.freezeIfRegister(&self.register_manager); + defer rhs.unfreezeIfRegister(&self.register_manager); + break :blk try self.copyToRegisterWithInstTracking(inst, ty, rhs); + }, + else => {}, + } + } + break :blk rhs; + }; + rhs.freezeIfRegister(&self.register_manager); + defer rhs.unfreezeIfRegister(&self.register_manager); - const signedness = ty.intInfo(self.target.*).signedness; if (signedness == .unsigned) { try self.genIntMulDivOpMir(.div, ty, signedness, lhs, rhs); break :result MCValue{ .register = .rax }; @@ -1719,9 +1747,13 @@ fn airRem(self: *Self, inst: Air.Inst.Index) !void { } // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. try self.register_manager.getReg(.rax, null); - try self.register_manager.getReg(.rdx, null); + try self.register_manager.getReg(.rdx, inst); + self.register_manager.freezeRegs(&.{ .rax, .rdx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); + const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); + const signedness = ty.intInfo(self.target.*).signedness; try self.genIntMulDivOpMir(switch (signedness) { .signed => .idiv, @@ -1739,12 +1771,17 @@ fn airMod(self: *Self, inst: Air.Inst.Index) !void { if (ty.zigTypeTag() != .Int) { return self.fail("TODO implement .mod for operands of dst type {}", .{ty.zigTypeTag()}); } + const signedness = ty.intInfo(self.target.*).signedness; + // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. try self.register_manager.getReg(.rax, null); - try self.register_manager.getReg(.rdx, null); + try self.register_manager.getReg(.rdx, if (signedness == .unsigned) inst else null); + self.register_manager.freezeRegs(&.{ .rax, .rdx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); + const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const signedness = ty.intInfo(self.target.*).signedness; + switch (signedness) { .unsigned => { try self.genIntMulDivOpMir(switch (signedness) { @@ -1757,10 +1794,10 @@ fn airMod(self: *Self, inst: Air.Inst.Index) !void { const div_floor = try self.genInlineIntDivFloor(ty, lhs, rhs); try self.genIntMulComplexOpMir(ty, div_floor, rhs); - const reg = try self.copyToTmpRegister(ty, lhs); - try self.genBinMathOpMir(.sub, ty, .{ .register = reg }, div_floor); + const result = try self.copyToRegisterWithInstTracking(inst, ty, lhs); + try self.genBinMathOpMir(.sub, ty, result, div_floor); - break :result MCValue{ .register = reg }; + break :result result; }, } }; From d447cd940d7da884f0d699d9da679d8bbabb237a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Mar 2022 11:50:25 +0200 Subject: [PATCH 4/5] x64: track callee and caller saved registers This is now required to correctly track and spill registers required for some ops such `mul` or `div` (both required use of `.rax` and `.rdx` registers). --- src/arch/x86_64/CodeGen.zig | 10 ++++++++-- src/arch/x86_64/abi.zig | 11 ++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 744c7fbf75..854017054b 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -31,6 +31,8 @@ const bits = @import("bits.zig"); const abi = @import("abi.zig"); const Register = bits.Register; const callee_preserved_regs = abi.callee_preserved_regs; +const caller_preserved_regs = abi.caller_preserved_regs; +const allocatable_registers = abi.allocatable_registers; const c_abi_int_param_regs = abi.c_abi_int_param_regs; const c_abi_int_return_regs = abi.c_abi_int_return_regs; @@ -40,7 +42,7 @@ const InnerError = error{ OutOfRegisters, }; -const RegisterManager = RegisterManagerFn(Self, Register, &callee_preserved_regs); +const RegisterManager = RegisterManagerFn(Self, Register, &allocatable_registers); gpa: Allocator, air: Air, @@ -3519,6 +3521,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. try self.spillCompareFlagsIfOccupied(); + for (caller_preserved_regs) |reg| { + try self.register_manager.getReg(reg, null); + } + if (info.return_value == .stack_offset) { const ret_ty = fn_ty.fnReturnType(); const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*)); @@ -3715,7 +3721,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. const result: MCValue = result: { switch (info.return_value) { .register => |reg| { - if (RegisterManager.indexOfReg(&callee_preserved_regs, reg) == null) { + if (RegisterManager.indexOfRegIntoTracked(reg) == null) { // Save function return value in a callee saved register break :result try self.copyToRegisterWithInstTracking( inst, diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig index 9fd09ab60a..4afa3ad792 100644 --- a/src/arch/x86_64/abi.zig +++ b/src/arch/x86_64/abi.zig @@ -370,8 +370,13 @@ pub fn classifySystemV(ty: Type, target: Target) [8]Class { } } -/// These registers need to be preserved (saved on the stack) and restored by the callee before getting clobbered -/// and when the callee returns. -pub const callee_preserved_regs = [_]Register{ .rcx, .rsi, .rdi, .r8, .r9, .r10, .r11 }; +/// Note that .rsp and .rbp also belong to this set, however, we never expect to use them +/// for anything else but stack offset tracking therefore we exclude them from this set. +pub const callee_preserved_regs = [_]Register{ .rbx, .r12, .r13, .r14, .r15 }; +/// These registers need to be preserved (saved on the stack) and restored by the caller before +/// the caller relinquishes control to a subroutine via call instruction (or similar). +/// In other words, these registers are free to use by the callee. +pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 }; +pub const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs; pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx }; From 376d0878ec2e366633e2dbc7c91ab7ae2a6ae5b7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Mar 2022 18:08:03 +0200 Subject: [PATCH 5/5] x64: spill .rdi to stack if expecting return value saved on stack Since .rdi is not part of the callee saved registers, it needs to be proactively spilled to the stack so that we don't clobber the return address where to save the return value. --- src/arch/x86_64/CodeGen.zig | 113 ++++++++++++++---------------------- 1 file changed, 45 insertions(+), 68 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 854017054b..da25541cd1 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -58,7 +58,6 @@ arg_index: u32, src_loc: Module.SrcLoc, stack_align: u32, -ret_backpatches: std.ArrayListUnmanaged(Mir.Inst.Index) = .{}, compare_flags_inst: ?Air.Inst.Index = null, /// MIR Instructions @@ -353,7 +352,6 @@ pub fn generate( std.AutoHashMap(Mir.Inst.Index, Air.Inst.Index).init(bin_file.allocator) else {}, }; - defer function.ret_backpatches.deinit(bin_file.allocator); defer function.stack.deinit(bin_file.allocator); defer function.blocks.deinit(bin_file.allocator); defer function.exitlude_jump_relocs.deinit(bin_file.allocator); @@ -482,6 +480,21 @@ fn gen(self: *Self) InnerError!void { .data = undefined, }); + if (self.ret_mcv == .stack_offset) { + // The address where to store the return value for the caller is in `.rdi` + // register which the callee is free to clobber. Therefore, we purposely + // spill it to stack immediately. + const ptr_ty = Type.usize; + const abi_size = @intCast(u32, ptr_ty.abiSize(self.target.*)); + const abi_align = ptr_ty.abiAlignment(self.target.*); + const stack_offset = mem.alignForwardGeneric(u32, self.next_stack_offset + abi_size, abi_align); + self.next_stack_offset = stack_offset; + self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset); + try self.genSetStack(ptr_ty, @intCast(i32, stack_offset), MCValue{ .register = .rdi }, .{}); + self.ret_mcv = MCValue{ .stack_offset = @intCast(i32, stack_offset) }; + log.debug("gen: spilling .rdi to stack at offset {}", .{stack_offset}); + } + _ = try self.addInst(.{ .tag = .dbg_prologue_end, .ops = undefined, @@ -519,23 +532,6 @@ fn gen(self: *Self) InnerError!void { var disp = data.disp + 8; inline for (callee_preserved_regs) |reg, i| { if (self.register_manager.isRegAllocated(reg)) { - if (reg.to64() == .rdi) { - for (self.ret_backpatches.items) |inst| { - log.debug(".rdi was spilled, backpatching with mov from stack at offset {}", .{ - -@intCast(i32, disp), - }); - const ops = Mir.Ops.decode(self.mir_instructions.items(.ops)[inst]); - self.mir_instructions.set(inst, Mir.Inst{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = ops.reg1, - .reg2 = .rbp, - .flags = 0b01, - }).encode(), - .data = .{ .imm = @bitCast(u32, -@intCast(i32, disp)) }, - }); - } - } data.regs |= 1 << @intCast(u5, i); self.max_end_stack += 8; disp += 8; @@ -912,8 +908,7 @@ fn allocMem(self: *Self, inst: Air.Inst.Index, abi_size: u32, abi_align: u32) !u // TODO find a free slot instead of always appending const offset = mem.alignForwardGeneric(u32, self.next_stack_offset + abi_size, abi_align); self.next_stack_offset = offset; - if (self.next_stack_offset > self.max_end_stack) - self.max_end_stack = self.next_stack_offset; + self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset); try self.stack.putNoClobber(self.gpa, offset, .{ .inst = inst, .size = abi_size, @@ -3316,7 +3311,6 @@ fn genBinMathOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MC } /// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. -/// Does not use/spill .rax/.rdx. /// Does not support byte-size operands. fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void { const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); @@ -3530,10 +3524,11 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*)); const ret_abi_align = @intCast(u32, ret_ty.abiAlignment(self.target.*)); const stack_offset = @intCast(i32, try self.allocMem(inst, ret_abi_size, ret_abi_align)); + log.debug("airCall: return value on stack at offset {}", .{stack_offset}); try self.register_manager.getReg(.rdi, null); - self.register_manager.freezeRegs(&.{.rdi}); try self.genSetReg(Type.usize, .rdi, .{ .ptr_stack_offset = stack_offset }); + self.register_manager.freezeRegs(&.{.rdi}); info.return_value.stack_offset = stack_offset; } @@ -3720,15 +3715,13 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. const result: MCValue = result: { switch (info.return_value) { - .register => |reg| { - if (RegisterManager.indexOfRegIntoTracked(reg) == null) { - // Save function return value in a callee saved register - break :result try self.copyToRegisterWithInstTracking( - inst, - self.air.typeOfIndex(inst), - info.return_value, - ); - } + .register => { + // Save function return value in a new register + break :result try self.copyToRegisterWithInstTracking( + inst, + self.air.typeOfIndex(inst), + info.return_value, + ); }, else => {}, } @@ -3755,19 +3748,11 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void { const ret_ty = self.fn_type.fnReturnType(); switch (self.ret_mcv) { .stack_offset => { - // TODO audit register allocation! - self.register_manager.freezeRegs(&.{ .rax, .rcx, .rdi }); - defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx, .rdi }); - const reg = try self.register_manager.allocReg(null); - const backpatch = try self.addInst(.{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = reg, - .reg2 = .rdi, - }).encode(), - .data = undefined, - }); - try self.ret_backpatches.append(self.gpa, backpatch); + self.register_manager.freezeRegs(&.{ .rax, .rcx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx }); + const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv); + self.register_manager.freezeRegs(&.{reg}); + defer self.register_manager.unfreezeRegs(&.{reg}); try self.genSetStack(ret_ty, 0, operand, .{ .source_stack_base = .rbp, .dest_stack_base = reg, @@ -3798,19 +3783,11 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { const elem_ty = ptr_ty.elemType(); switch (self.ret_mcv) { .stack_offset => { - // TODO audit register allocation! - self.register_manager.freezeRegs(&.{ .rax, .rcx, .rdi }); - defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx, .rdi }); - const reg = try self.register_manager.allocReg(null); - const backpatch = try self.addInst(.{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = reg, - .reg2 = .rdi, - }).encode(), - .data = undefined, - }); - try self.ret_backpatches.append(self.gpa, backpatch); + self.register_manager.freezeRegs(&.{ .rax, .rcx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx }); + const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv); + self.register_manager.freezeRegs(&.{reg}); + defer self.register_manager.unfreezeRegs(&.{reg}); try self.genInlineMemcpy(.{ .stack_offset = 0 }, ptr, .{ .immediate = elem_ty.abiSize(self.target.*) }, .{ .source_stack_base = .rbp, .dest_stack_base = reg, @@ -5092,6 +5069,7 @@ const InlineMemcpyOpts = struct { dest_stack_base: ?Register = null, }; +/// Spills .rax and .rcx. fn genInlineMemcpy( self: *Self, dst_ptr: MCValue, @@ -5099,13 +5077,7 @@ fn genInlineMemcpy( len: MCValue, opts: InlineMemcpyOpts, ) InnerError!void { - // TODO this is wrong. We should check first if any of the operands is in `.rax` or `.rcx` before - // spilling. Consolidate with other TODOs regarding register allocation mechanics. - try self.register_manager.getReg(.rax, null); - try self.register_manager.getReg(.rcx, null); - self.register_manager.freezeRegs(&.{ .rax, .rcx }); - defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx }); if (opts.source_stack_base) |reg| self.register_manager.freezeRegs(&.{reg}); defer if (opts.source_stack_base) |reg| self.register_manager.unfreezeRegs(&.{reg}); @@ -5145,7 +5117,6 @@ fn genInlineMemcpy( return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr}); }, } - self.register_manager.freezeRegs(&.{dst_addr_reg}); defer self.register_manager.unfreezeRegs(&.{dst_addr_reg}); @@ -5181,7 +5152,6 @@ fn genInlineMemcpy( return self.fail("TODO implement memcpy for setting stack when src is {}", .{src_ptr}); }, } - self.register_manager.freezeRegs(&.{src_addr_reg}); defer self.register_manager.unfreezeRegs(&.{src_addr_reg}); @@ -5189,6 +5159,11 @@ fn genInlineMemcpy( const count_reg = regs[0].to64(); const tmp_reg = regs[1].to8(); + self.register_manager.unfreezeRegs(&.{ .rax, .rcx }); + + try self.register_manager.getReg(.rax, null); + try self.register_manager.getReg(.rcx, null); + try self.genSetReg(Type.usize, count_reg, len); // mov rcx, 0 @@ -5284,6 +5259,7 @@ fn genInlineMemcpy( try self.performReloc(loop_reloc); } +/// Spills .rax register. fn genInlineMemset( self: *Self, dst_ptr: MCValue, @@ -5291,9 +5267,7 @@ fn genInlineMemset( len: MCValue, opts: InlineMemcpyOpts, ) InnerError!void { - try self.register_manager.getReg(.rax, null); self.register_manager.freezeRegs(&.{.rax}); - defer self.register_manager.unfreezeRegs(&.{.rax}); const addr_reg = try self.register_manager.allocReg(null); switch (dst_ptr) { @@ -5330,6 +5304,9 @@ fn genInlineMemset( self.register_manager.freezeRegs(&.{addr_reg}); defer self.register_manager.unfreezeRegs(&.{addr_reg}); + self.register_manager.unfreezeRegs(&.{.rax}); + try self.register_manager.getReg(.rax, null); + try self.genSetReg(Type.usize, .rax, len); try self.genBinMathOpMir(.sub, Type.usize, .{ .register = .rax }, .{ .immediate = 1 });