From 8258530c39c347ca83fdbe7575460712960a83f3 Mon Sep 17 00:00:00 2001 From: mlugg Date: Sun, 9 Apr 2023 01:32:53 +0100 Subject: [PATCH] Liveness: control flow analysis This is a partial rewrite of Liveness, so has some other notable changes: - A proper multi-pass system to prevent code duplication - Better logging - Minor bugfixes --- src/Liveness.zig | 1882 ++++++++++++++++++---------------- src/arch/aarch64/CodeGen.zig | 6 - src/arch/arm/CodeGen.zig | 6 - src/arch/sparc64/CodeGen.zig | 6 - src/arch/wasm/CodeGen.zig | 6 - src/arch/x86_64/CodeGen.zig | 6 - src/codegen/c.zig | 5 - src/print_air.zig | 9 - 8 files changed, 987 insertions(+), 939 deletions(-) diff --git a/src/Liveness.zig b/src/Liveness.zig index 6412792037..02f6c34c3f 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -24,8 +24,10 @@ tomb_bits: []usize, /// Sparse table of specially handled instructions. The value is an index into the `extra` /// array. The meaning of the data depends on the AIR tag. /// * `cond_br` - points to a `CondBr` in `extra` at this index. +/// * `try`, `try_ptr` - points to a `CondBr` in `extra` at this index. The error path (the block +/// in the instruction) is considered the "else" path, and the rest of the block the "then". /// * `switch_br` - points to a `SwitchBr` in `extra` at this index. -/// * `loop` - points to a `Loop` in `extra` at this index. +/// * `block` - points to a `Block` in `extra` at this index. /// * `asm`, `call`, `aggregate_init` - the value is a set of bits which are the extra tomb /// bits of operands. /// The main tomb bits are still used and the extra ones are starting with the lsb of the @@ -52,11 +54,88 @@ pub const SwitchBr = struct { else_death_count: u32, }; -/// Trailing is the set of instructions whose lifetimes end at the end of the loop body. -pub const Loop = struct { +/// Trailing is the set of instructions which die in the block. Note that these are not additional +/// deaths (they are all recorded as normal within the block), but backends may use this information +/// as a more efficient way to track which instructions are still alive after a block. +pub const Block = struct { death_count: u32, }; +/// Liveness analysis runs in several passes. Each pass iterates backwards over instructions in +/// bodies, and recurses into bodies. +const LivenessPass = enum { + /// In this pass, we perform some basic analysis of loops to gain information the main pass + /// needs. In particular, for every `loop`, we track the following information: + /// * Every block which the loop body contains a `br` to. + /// * Every operand referenced within the loop body but created outside the loop. + /// This gives the main analysis pass enough information to determine the full set of + /// instructions which need to be alive when a loop repeats. This data is TEMPORARILY stored in + /// `a.extra`. It is not re-added to `extra` by the main pass, since it is not useful to + /// backends. + loop_analysis, + + /// This pass performs the main liveness analysis, setting up tombs and extra data while + /// considering control flow etc. + main_analysis, +}; + +/// Each analysis pass may wish to pass data through calls. A pointer to a `LivenessPassData(pass)` +/// stored on the stack is passed through calls to `analyzeInst` etc. +fn LivenessPassData(comptime pass: LivenessPass) type { + return switch (pass) { + .loop_analysis => struct { + /// The set of blocks which are exited with a `br` instruction at some point within this + /// body and which we are currently within. + breaks: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}, + + /// The set of operands for which we have seen at least one usage but not their birth. + live_set: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}, + + fn deinit(self: *@This(), gpa: Allocator) void { + self.breaks.deinit(gpa); + self.live_set.deinit(gpa); + } + }, + + .main_analysis => struct { + /// Every `block` currently under analysis. + block_scopes: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockScope) = .{}, + + /// The set of deaths which should be made to occur at the earliest possible point in + /// this control flow branch. These instructions die when they are last referenced in + /// the current branch; if unreferenced, they die at the start of the branch. Populated + /// when a `br` instruction is reached. If deaths are common to all branches of control + /// flow, they may be bubbled up to the parent branch. + branch_deaths: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}, + + /// The set of instructions currently alive. Instructions which must die in this branch + /// (i.e. those in `branch_deaths`) are not in this set, because they must die before + /// this point. + live_set: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}, + + /// The extra data initialized by the `loop_analysis` pass for this pass to consume. + /// Owned by this struct during this pass. + old_extra: std.ArrayListUnmanaged(u32) = .{}, + + const BlockScope = struct { + /// The set of instructions which are alive upon a `br` to this block. + live_set: std.AutoHashMapUnmanaged(Air.Inst.Index, void), + }; + + fn deinit(self: *@This(), gpa: Allocator) void { + var it = self.block_scopes.valueIterator(); + while (it.next()) |block| { + block.live_set.deinit(gpa); + } + self.block_scopes.deinit(gpa); + self.branch_deaths.deinit(gpa); + self.live_set.deinit(gpa); + self.old_extra.deinit(gpa); + } + }, + }; +} + pub fn analyze(gpa: Allocator, air: Air) Allocator.Error!Liveness { const tracy = trace(@src()); defer tracy.end(); @@ -64,7 +143,6 @@ pub fn analyze(gpa: Allocator, air: Air) Allocator.Error!Liveness { var a: Analysis = .{ .gpa = gpa, .air = air, - .table = .{}, .tomb_bits = try gpa.alloc( usize, (air.instructions.len * bpi + @bitSizeOf(usize) - 1) / @bitSizeOf(usize), @@ -75,19 +153,27 @@ pub fn analyze(gpa: Allocator, air: Air) Allocator.Error!Liveness { errdefer gpa.free(a.tomb_bits); errdefer a.special.deinit(gpa); defer a.extra.deinit(gpa); - defer a.table.deinit(gpa); std.mem.set(usize, a.tomb_bits, 0); const main_body = air.getMainBody(); - try a.table.ensureTotalCapacity(gpa, @intCast(u32, main_body.len)); - try analyzeWithContext(&a, null, main_body); + { - var to_remove: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}; - defer to_remove.deinit(gpa); - try removeDeaths(&a, &to_remove, main_body); + var data: LivenessPassData(.loop_analysis) = .{}; + defer data.deinit(gpa); + try analyzeBody(&a, .loop_analysis, &data, main_body); } - return Liveness{ + + { + var data: LivenessPassData(.main_analysis) = .{}; + defer data.deinit(gpa); + data.old_extra = a.extra; + a.extra = .{}; + try analyzeBody(&a, .main_analysis, &data, main_body); + assert(data.branch_deaths.count() == 0); + } + + return .{ .tomb_bits = a.tomb_bits, .special = a.special, .extra = try a.extra.toOwnedSlice(gpa), @@ -661,18 +747,27 @@ pub fn getSwitchBr(l: Liveness, gpa: Allocator, inst: Air.Inst.Index, cases_len: }; } -pub const LoopSlice = struct { +/// Note that this information is technically redundant, but is useful for +/// backends nonetheless: see `Block`. +pub const BlockSlices = struct { deaths: []const Air.Inst.Index, }; -pub fn getLoop(l: Liveness, inst: Air.Inst.Index) LoopSlice { +pub fn getBlock(l: Liveness, inst: Air.Inst.Index) BlockSlices { const index: usize = l.special.get(inst) orelse return .{ .deaths = &.{}, }; const death_count = l.extra[index]; - return .{ .deaths = l.extra[index + 1 ..][0..death_count] }; + const deaths = l.extra[index + 1 ..][0..death_count]; + return .{ + .deaths = deaths, + }; } +pub const LoopSlice = struct { + deaths: []const Air.Inst.Index, +}; + pub fn deinit(l: *Liveness, gpa: Allocator) void { gpa.free(l.tomb_bits); gpa.free(l.extra); @@ -687,6 +782,7 @@ pub fn iterateBigTomb(l: Liveness, inst: Air.Inst.Index) BigTomb { .extra_offset = 0, .extra = l.extra, .bit_index = 0, + .reached_end = false, }; } @@ -702,13 +798,16 @@ pub const BigTomb = struct { extra_start: u32, extra_offset: u32, extra: []const u32, + reached_end: bool, /// Returns whether the next operand dies. pub fn feed(bt: *BigTomb) bool { + if (bt.reached_end) return false; + const this_bit_index = bt.bit_index; bt.bit_index += 1; - const small_tombs = Liveness.bpi - 1; + const small_tombs = bpi - 1; if (this_bit_index < small_tombs) { const dies = @truncate(u1, bt.tomb_bits >> @intCast(Liveness.OperandInt, this_bit_index)) != 0; return dies; @@ -716,6 +815,10 @@ pub const BigTomb = struct { const big_bit_index = this_bit_index - small_tombs; while (big_bit_index - bt.extra_offset * 31 >= 31) { + if (@truncate(u1, bt.extra[bt.extra_start + bt.extra_offset] >> 31) != 0) { + bt.reached_end = true; + return false; + } bt.extra_offset += 1; } const dies = @truncate(u1, bt.extra[bt.extra_start + bt.extra_offset] >> @@ -728,7 +831,6 @@ pub const BigTomb = struct { const Analysis = struct { gpa: Allocator, air: Air, - table: std.AutoHashMapUnmanaged(Air.Inst.Index, void), tomb_bits: []usize, special: std.AutoHashMapUnmanaged(Air.Inst.Index, u32), extra: std.ArrayListUnmanaged(u32), @@ -758,46 +860,70 @@ const Analysis = struct { } }; -fn analyzeWithContext( +fn analyzeBody( a: *Analysis, - new_set: ?*std.AutoHashMapUnmanaged(Air.Inst.Index, void), + comptime pass: LivenessPass, + data: *LivenessPassData(pass), body: []const Air.Inst.Index, ) Allocator.Error!void { var i: usize = body.len; - - if (new_set) |ns| { - // We are only interested in doing this for instructions which are born - // before a conditional branch, so after obtaining the new set for - // each branch we prune the instructions which were born within. - while (i != 0) { - i -= 1; - const inst = body[i]; - _ = ns.remove(inst); - try analyzeInst(a, new_set, inst); - } - } else { - while (i != 0) { - i -= 1; - const inst = body[i]; - try analyzeInst(a, new_set, inst); - } + while (i != 0) { + i -= 1; + const inst = body[i]; + try analyzeInst(a, pass, data, inst); } } +const ControlBranchInfo = struct { + branch_deaths: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}, + live_set: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}, +}; + +/// Helper function for running `analyzeBody`, but resetting `branch_deaths` and `live_set` to their +/// original states before returning, returning the modified versions of them. Only makes sense in +/// the `main_analysis` pass. +fn analyzeBodyResetBranch( + a: *Analysis, + comptime pass: LivenessPass, + data: *LivenessPassData(pass), + body: []const Air.Inst.Index, +) !ControlBranchInfo { + switch (pass) { + .main_analysis => {}, + else => @compileError("Liveness.analyzeBodyResetBranch only makes sense in LivenessPass.main_analysis"), + } + + const gpa = a.gpa; + + const old_branch_deaths = try data.branch_deaths.clone(a.gpa); + defer { + data.branch_deaths.deinit(gpa); + data.branch_deaths = old_branch_deaths; + } + + const old_live_set = try data.live_set.clone(a.gpa); + defer { + data.live_set.deinit(gpa); + data.live_set = old_live_set; + } + + try analyzeBody(a, pass, data, body); + + return .{ + .branch_deaths = data.branch_deaths.move(), + .live_set = data.live_set.move(), + }; +} + fn analyzeInst( a: *Analysis, - new_set: ?*std.AutoHashMapUnmanaged(Air.Inst.Index, void), + comptime pass: LivenessPass, + data: *LivenessPassData(pass), inst: Air.Inst.Index, ) Allocator.Error!void { - const gpa = a.gpa; - const table = &a.table; const inst_tags = a.air.instructions.items(.tag); const inst_datas = a.air.instructions.items(.data); - // No tombstone for this instruction means it is never referenced, - // and its birth marks its own death. Very metal 🤘 - const main_tomb = !table.contains(inst); - switch (inst_tags[inst]) { .add, .add_optimized, @@ -861,28 +987,24 @@ fn analyzeInst( .max, => { const o = inst_datas[inst].bin_op; - return trackOperands(a, new_set, inst, main_tomb, .{ o.lhs, o.rhs, .none }); + return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none }); }, .vector_store_elem => { const o = inst_datas[inst].vector_store_elem; const extra = a.air.extraData(Air.Bin, o.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ o.vector_ptr, extra.lhs, extra.rhs }); + return analyzeOperands(a, pass, data, inst, .{ o.vector_ptr, extra.lhs, extra.rhs }); }, .arg, .alloc, .ret_ptr, - .constant, - .const_ty, - .trap, .breakpoint, .dbg_stmt, .dbg_inline_begin, .dbg_inline_end, .dbg_block_begin, .dbg_block_end, - .unreach, .fence, .ret_addr, .frame_addr, @@ -893,7 +1015,15 @@ fn analyzeInst( .work_item_id, .work_group_size, .work_group_id, - => return trackOperands(a, new_set, inst, main_tomb, .{ .none, .none, .none }), + => return analyzeOperands(a, pass, data, inst, .{ .none, .none, .none }), + + .constant, + .const_ty, + => unreachable, + + .trap, + .unreach, + => return analyzeFuncEnd(a, pass, data, inst, .{ .none, .none, .none }), .not, .bitcast, @@ -938,7 +1068,7 @@ fn analyzeInst( .c_va_copy, => { const o = inst_datas[inst].ty_op; - return trackOperands(a, new_set, inst, main_tomb, .{ o.operand, .none, .none }); + return analyzeOperands(a, pass, data, inst, .{ o.operand, .none, .none }); }, .is_null, @@ -951,8 +1081,6 @@ fn analyzeInst( .is_non_err_ptr, .ptrtoint, .bool_to_int, - .ret, - .ret_load, .is_named_enum_value, .tag_name, .error_name, @@ -977,7 +1105,14 @@ fn analyzeInst( .c_va_end, => { const operand = inst_datas[inst].un_op; - return trackOperands(a, new_set, inst, main_tomb, .{ operand, .none, .none }); + return analyzeOperands(a, pass, data, inst, .{ operand, .none, .none }); + }, + + .ret, + .ret_load, + => { + const operand = inst_datas[inst].un_op; + return analyzeFuncEnd(a, pass, data, inst, .{ operand, .none, .none }); }, .add_with_overflow, @@ -992,19 +1127,19 @@ fn analyzeInst( => { const ty_pl = inst_datas[inst].ty_pl; const extra = a.air.extraData(Air.Bin, ty_pl.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ extra.lhs, extra.rhs, .none }); + return analyzeOperands(a, pass, data, inst, .{ extra.lhs, extra.rhs, .none }); }, .dbg_var_ptr, .dbg_var_val, => { const operand = inst_datas[inst].pl_op.operand; - return trackOperands(a, new_set, inst, main_tomb, .{ operand, .none, .none }); + return analyzeOperands(a, pass, data, inst, .{ operand, .none, .none }); }, .prefetch => { const prefetch = inst_datas[inst].prefetch; - return trackOperands(a, new_set, inst, main_tomb, .{ prefetch.ptr, .none, .none }); + return analyzeOperands(a, pass, data, inst, .{ prefetch.ptr, .none, .none }); }, .call, .call_always_tail, .call_never_tail, .call_never_inline => { @@ -1016,37 +1151,35 @@ fn analyzeInst( var buf = [1]Air.Inst.Ref{.none} ** (bpi - 1); buf[0] = callee; std.mem.copy(Air.Inst.Ref, buf[1..], args); - return trackOperands(a, new_set, inst, main_tomb, buf); + return analyzeOperands(a, pass, data, inst, buf); } - var extra_tombs: ExtraTombs = .{ - .analysis = a, - .new_set = new_set, - .inst = inst, - .main_tomb = main_tomb, - }; - defer extra_tombs.deinit(); - try extra_tombs.feed(callee); - for (args) |arg| { - try extra_tombs.feed(arg); + + var big = try AnalyzeBigOperands(pass).init(a, data, inst, args.len + 1); + defer big.deinit(); + var i: usize = args.len; + while (i > 0) { + i -= 1; + try big.feed(args[i]); } - return extra_tombs.finish(); + try big.feed(callee); + return big.finish(); }, .select => { const pl_op = inst_datas[inst].pl_op; const extra = a.air.extraData(Air.Bin, pl_op.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ pl_op.operand, extra.lhs, extra.rhs }); + return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, extra.lhs, extra.rhs }); }, .shuffle => { const extra = a.air.extraData(Air.Shuffle, inst_datas[inst].ty_pl.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ extra.a, extra.b, .none }); + return analyzeOperands(a, pass, data, inst, .{ extra.a, extra.b, .none }); }, .reduce, .reduce_optimized => { const reduce = inst_datas[inst].reduce; - return trackOperands(a, new_set, inst, main_tomb, .{ reduce.operand, .none, .none }); + return analyzeOperands(a, pass, data, inst, .{ reduce.operand, .none, .none }); }, .cmp_vector, .cmp_vector_optimized => { const extra = a.air.extraData(Air.VectorCmp, inst_datas[inst].ty_pl.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ extra.lhs, extra.rhs, .none }); + return analyzeOperands(a, pass, data, inst, .{ extra.lhs, extra.rhs, .none }); }, .aggregate_init => { const ty_pl = inst_datas[inst].ty_pl; @@ -1057,62 +1190,58 @@ fn analyzeInst( if (elements.len <= bpi - 1) { var buf = [1]Air.Inst.Ref{.none} ** (bpi - 1); std.mem.copy(Air.Inst.Ref, &buf, elements); - return trackOperands(a, new_set, inst, main_tomb, buf); + return analyzeOperands(a, pass, data, inst, buf); } - var extra_tombs: ExtraTombs = .{ - .analysis = a, - .new_set = new_set, - .inst = inst, - .main_tomb = main_tomb, - }; - defer extra_tombs.deinit(); - for (elements) |elem| { - try extra_tombs.feed(elem); + + var big = try AnalyzeBigOperands(pass).init(a, data, inst, elements.len); + defer big.deinit(); + var i: usize = elements.len; + while (i > 0) { + i -= 1; + try big.feed(elements[i]); } - return extra_tombs.finish(); + return big.finish(); }, .union_init => { const extra = a.air.extraData(Air.UnionInit, inst_datas[inst].ty_pl.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ extra.init, .none, .none }); + return analyzeOperands(a, pass, data, inst, .{ extra.init, .none, .none }); }, .struct_field_ptr, .struct_field_val => { const extra = a.air.extraData(Air.StructField, inst_datas[inst].ty_pl.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ extra.struct_operand, .none, .none }); + return analyzeOperands(a, pass, data, inst, .{ extra.struct_operand, .none, .none }); }, .field_parent_ptr => { const extra = a.air.extraData(Air.FieldParentPtr, inst_datas[inst].ty_pl.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ extra.field_ptr, .none, .none }); + return analyzeOperands(a, pass, data, inst, .{ extra.field_ptr, .none, .none }); }, .cmpxchg_strong, .cmpxchg_weak => { const extra = a.air.extraData(Air.Cmpxchg, inst_datas[inst].ty_pl.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ extra.ptr, extra.expected_value, extra.new_value }); + return analyzeOperands(a, pass, data, inst, .{ extra.ptr, extra.expected_value, extra.new_value }); }, .mul_add => { const pl_op = inst_datas[inst].pl_op; const extra = a.air.extraData(Air.Bin, pl_op.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ extra.lhs, extra.rhs, pl_op.operand }); + return analyzeOperands(a, pass, data, inst, .{ extra.lhs, extra.rhs, pl_op.operand }); }, .atomic_load => { const ptr = inst_datas[inst].atomic_load.ptr; - return trackOperands(a, new_set, inst, main_tomb, .{ ptr, .none, .none }); + return analyzeOperands(a, pass, data, inst, .{ ptr, .none, .none }); }, .atomic_rmw => { const pl_op = inst_datas[inst].pl_op; const extra = a.air.extraData(Air.AtomicRmw, pl_op.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ pl_op.operand, extra.operand, .none }); + return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, extra.operand, .none }); }, .memset, .memcpy, => { const pl_op = inst_datas[inst].pl_op; const extra = a.air.extraData(Air.Bin, pl_op.payload).data; - return trackOperands(a, new_set, inst, main_tomb, .{ pl_op.operand, extra.lhs, extra.rhs }); + return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, extra.lhs, extra.rhs }); }, - .br => { - const br = inst_datas[inst].br; - return trackOperands(a, new_set, inst, main_tomb, .{ br.operand, .none, .none }); - }, + .br => return analyzeInstBr(a, pass, data, inst), + .assembly => { const extra = a.air.extraData(Air.Asm, inst_datas[inst].ty_pl.payload); var extra_i: usize = extra.end; @@ -1121,912 +1250,875 @@ fn analyzeInst( const inputs = @ptrCast([]const Air.Inst.Ref, a.air.extra[extra_i..][0..extra.data.inputs_len]); extra_i += inputs.len; - simple: { + const num_operands = simple: { var buf = [1]Air.Inst.Ref{.none} ** (bpi - 1); var buf_index: usize = 0; for (outputs) |output| { if (output != .none) { - if (buf_index >= buf.len) break :simple; - buf[buf_index] = output; + if (buf_index < buf.len) buf[buf_index] = output; buf_index += 1; } } - if (buf_index + inputs.len > buf.len) break :simple; + if (buf_index + inputs.len > buf.len) { + break :simple buf_index + inputs.len; + } std.mem.copy(Air.Inst.Ref, buf[buf_index..], inputs); - return trackOperands(a, new_set, inst, main_tomb, buf); - } - var extra_tombs: ExtraTombs = .{ - .analysis = a, - .new_set = new_set, - .inst = inst, - .main_tomb = main_tomb, + return analyzeOperands(a, pass, data, inst, buf); }; - defer extra_tombs.deinit(); - for (outputs) |output| { - if (output != .none) { - try extra_tombs.feed(output); + + var big = try AnalyzeBigOperands(pass).init(a, data, inst, num_operands); + defer big.deinit(); + var i: usize = inputs.len; + while (i > 0) { + i -= 1; + try big.feed(inputs[i]); + } + i = outputs.len; + while (i > 0) { + i -= 1; + if (outputs[i] != .none) { + try big.feed(outputs[i]); } } - for (inputs) |input| { - try extra_tombs.feed(input); + return big.finish(); + }, + + .block => return analyzeInstBlock(a, pass, data, inst), + .loop => return analyzeInstLoop(a, pass, data, inst), + + .@"try" => return analyzeInstCondBr(a, pass, data, inst, .@"try"), + .try_ptr => return analyzeInstCondBr(a, pass, data, inst, .try_ptr), + .cond_br => return analyzeInstCondBr(a, pass, data, inst, .cond_br), + .switch_br => return analyzeInstSwitchBr(a, pass, data, inst), + + .wasm_memory_grow => { + const pl_op = inst_datas[inst].pl_op; + return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, .none, .none }); + }, + } +} + +/// Every instruction should hit this (after handling any nested bodies), in every pass. In the +/// initial pass, it is responsible for marking deaths of the (first three) operands and noticing +/// immediate deaths. +fn analyzeOperands( + a: *Analysis, + comptime pass: LivenessPass, + data: *LivenessPassData(pass), + inst: Air.Inst.Index, + operands: [bpi - 1]Air.Inst.Ref, +) Allocator.Error!void { + const gpa = a.gpa; + const inst_tags = a.air.instructions.items(.tag); + + switch (pass) { + .loop_analysis => { + _ = data.live_set.remove(inst); + + for (operands) |op_ref| { + const operand = Air.refToIndex(op_ref) orelse continue; + + // Don't compute any liveness for constants + switch (inst_tags[operand]) { + .constant, .const_ty => continue, + else => {}, + } + + _ = try data.live_set.put(gpa, operand, {}); } - return extra_tombs.finish(); }, - .block => { - const extra = a.air.extraData(Air.Block, inst_datas[inst].ty_pl.payload); - const body = a.air.extra[extra.end..][0..extra.data.body_len]; - try analyzeWithContext(a, new_set, body); - return trackOperands(a, new_set, inst, main_tomb, .{ .none, .none, .none }); - }, - .loop => { - const extra = a.air.extraData(Air.Block, inst_datas[inst].ty_pl.payload); - const body = a.air.extra[extra.end..][0..extra.data.body_len]; - var body_table: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}; - defer body_table.deinit(gpa); + .main_analysis => { + const usize_index = (inst * bpi) / @bitSizeOf(usize); - // Instructions outside the loop body cannot die within the loop, since further loop - // iterations may occur. Track deaths from the loop body - we'll remove all of these - // retroactively, and add them to our extra data. + var tomb_bits: Bpi = 0; - try analyzeWithContext(a, &body_table, body); + if (data.branch_deaths.remove(inst)) { + log.debug("[{}] %{}: resolved branch death to birth (immediate death)", .{ pass, inst }); + tomb_bits |= @as(Bpi, 1) << (bpi - 1); + assert(!data.live_set.contains(inst)); + } else if (data.live_set.remove(inst)) { + log.debug("[{}] %{}: removed from live set", .{ pass, inst }); + } else { + log.debug("[{}] %{}: immediate death", .{ pass, inst }); + tomb_bits |= @as(Bpi, 1) << (bpi - 1); + } - if (new_set) |ns| { - try ns.ensureUnusedCapacity(gpa, body_table.count()); - var it = body_table.keyIterator(); - while (it.next()) |key| { - _ = ns.putAssumeCapacity(key.*, {}); + // Note that it's important we iterate over the operands backwards, so that if a dying + // operand is used multiple times we mark its last use as its death. + var i = operands.len; + while (i > 0) { + i -= 1; + const op_ref = operands[i]; + const operand = Air.refToIndex(op_ref) orelse continue; + + // Don't compute any liveness for constants + switch (inst_tags[operand]) { + .constant, .const_ty => continue, + else => {}, + } + + const mask = @as(Bpi, 1) << @intCast(OperandInt, i); + + if ((try data.live_set.fetchPut(gpa, operand, {})) == null) { + log.debug("[{}] %{}: added %{} to live set (operand dies here)", .{ pass, inst, operand }); + tomb_bits |= mask; + if (data.branch_deaths.remove(operand)) { + log.debug("[{}] %{}: resolved branch death of %{} to this usage", .{ pass, inst, operand }); + } } } - try a.extra.ensureUnusedCapacity(gpa, std.meta.fields(Loop).len + body_table.count()); - const extra_index = a.addExtraAssumeCapacity(Loop{ - .death_count = body_table.count(), + a.tomb_bits[usize_index] |= @as(usize, tomb_bits) << + @intCast(Log2Int(usize), (inst % (@bitSizeOf(usize) / bpi)) * bpi); + }, + } +} + +/// Like `analyzeOperands`, but for an instruction which returns from a function, so should +/// effectively kill every remaining live value other than its operands. +fn analyzeFuncEnd( + a: *Analysis, + comptime pass: LivenessPass, + data: *LivenessPassData(pass), + inst: Air.Inst.Index, + operands: [bpi - 1]Air.Inst.Ref, +) Allocator.Error!void { + switch (pass) { + .loop_analysis => { + // No operands need to be alive if we're returning from the function, so we don't need + // to touch `breaks` here even though this is sort of like a break to the top level. + }, + + .main_analysis => { + const gpa = a.gpa; + + // Note that we preserve previous branch deaths - anything that needs to die in our + // "parent" branch also needs to die for us. + + try data.branch_deaths.ensureUnusedCapacity(gpa, data.live_set.count()); + var it = data.live_set.keyIterator(); + while (it.next()) |key| { + const alive = key.*; + data.branch_deaths.putAssumeCapacity(alive, {}); + } + data.live_set.clearRetainingCapacity(); + }, + } + + return analyzeOperands(a, pass, data, inst, operands); +} + +fn analyzeInstBr( + a: *Analysis, + comptime pass: LivenessPass, + data: *LivenessPassData(pass), + inst: Air.Inst.Index, +) !void { + const inst_datas = a.air.instructions.items(.data); + const br = inst_datas[inst].br; + const gpa = a.gpa; + + switch (pass) { + .loop_analysis => { + try data.breaks.put(gpa, br.block_inst, {}); + }, + + .main_analysis => { + const block_scope = data.block_scopes.get(br.block_inst).?; // we should always be breaking from an enclosing block + + // We mostly preserve previous branch deaths - anything that should die for our + // enclosing branch should die for us too. However, if our break target requires such an + // operand to be alive, it's actually not something we want to kill, since its "last + // use" (i.e. the point at which it should die) is outside of our scope. + var it = block_scope.live_set.keyIterator(); + while (it.next()) |key| { + const alive = key.*; + _ = data.branch_deaths.remove(alive); + } + log.debug("[{}] %{}: preserved branch deaths are {}", .{ pass, inst, fmtInstSet(&data.branch_deaths) }); + + // Anything that's currently alive but our target doesn't need becomes a branch death. + it = data.live_set.keyIterator(); + while (it.next()) |key| { + const alive = key.*; + if (!block_scope.live_set.contains(alive)) { + _ = try data.branch_deaths.put(gpa, alive, {}); + log.debug("[{}] %{}: added branch death of {}", .{ pass, inst, alive }); + } + } + const new_live_set = try block_scope.live_set.clone(gpa); + data.live_set.deinit(gpa); + data.live_set = new_live_set; + }, + } + + return analyzeOperands(a, pass, data, inst, .{ br.operand, .none, .none }); +} + +fn analyzeInstBlock( + a: *Analysis, + comptime pass: LivenessPass, + data: *LivenessPassData(pass), + inst: Air.Inst.Index, +) !void { + const inst_datas = a.air.instructions.items(.data); + const ty_pl = inst_datas[inst].ty_pl; + const extra = a.air.extraData(Air.Block, ty_pl.payload); + const body = a.air.extra[extra.end..][0..extra.data.body_len]; + + const gpa = a.gpa; + + // We actually want to do `analyzeOperands` *first*, since our result logically doesn't + // exist until the block body ends (and we're iterating backwards) + try analyzeOperands(a, pass, data, inst, .{ .none, .none, .none }); + + switch (pass) { + .loop_analysis => { + try analyzeBody(a, pass, data, body); + _ = data.breaks.remove(inst); + }, + + .main_analysis => { + log.debug("[{}] %{}: block live set is {}", .{ pass, inst, fmtInstSet(&data.live_set) }); + try data.block_scopes.put(gpa, inst, .{ + .live_set = try data.live_set.clone(gpa), }); - { - var it = body_table.keyIterator(); - while (it.next()) |key| { - a.extra.appendAssumeCapacity(key.*); - } + defer { + log.debug("[{}] %{}: popped block scope", .{ pass, inst }); + var scope = data.block_scopes.fetchRemove(inst).?.value; + scope.live_set.deinit(gpa); } + + log.debug("[{}] %{}: pushed new block scope", .{ pass, inst }); + try analyzeBody(a, pass, data, body); + + // If the block is noreturn, block deaths not only aren't useful, they're impossible to + // find: there could be more stuff alive after the block than before it! + if (!a.air.getRefType(ty_pl.ty).isNoReturn()) { + // The block kills the difference in the live sets + const block_scope = data.block_scopes.get(inst).?; + const num_deaths = data.live_set.count() - block_scope.live_set.count(); + + try a.extra.ensureUnusedCapacity(gpa, num_deaths + std.meta.fields(Block).len); + const extra_index = a.addExtraAssumeCapacity(Block{ + .death_count = num_deaths, + }); + + var measured_num: u32 = 0; + var it = data.live_set.keyIterator(); + while (it.next()) |key| { + const alive = key.*; + if (!block_scope.live_set.contains(alive)) { + // Dies in block + a.extra.appendAssumeCapacity(alive); + measured_num += 1; + } + } + assert(measured_num == num_deaths); // post-live-set should be a subset of pre-live-set + try a.special.put(gpa, inst, extra_index); + log.debug("[{}] %{}: block deaths are {}", .{ + pass, + inst, + fmtInstList(a.extra.items[extra_index + 1 ..][0..num_deaths]), + }); + } + }, + } +} + +fn analyzeInstLoop( + a: *Analysis, + comptime pass: LivenessPass, + data: *LivenessPassData(pass), + inst: Air.Inst.Index, +) !void { + const inst_datas = a.air.instructions.items(.data); + const extra = a.air.extraData(Air.Block, inst_datas[inst].ty_pl.payload); + const body = a.air.extra[extra.end..][0..extra.data.body_len]; + const gpa = a.gpa; + + try analyzeOperands(a, pass, data, inst, .{ .none, .none, .none }); + + switch (pass) { + .loop_analysis => { + var old_breaks = data.breaks.move(); + defer old_breaks.deinit(gpa); + + var old_live = data.live_set.move(); + defer old_live.deinit(gpa); + + try analyzeBody(a, pass, data, body); + + const num_breaks = data.breaks.count(); + try a.extra.ensureUnusedCapacity(gpa, 1 + num_breaks); + + const extra_index = @intCast(u32, a.extra.items.len); + a.extra.appendAssumeCapacity(num_breaks); + + var it = data.breaks.keyIterator(); + while (it.next()) |key| { + const block_inst = key.*; + a.extra.appendAssumeCapacity(block_inst); + } + log.debug("[{}] %{}: includes breaks to {}", .{ pass, inst, fmtInstSet(&data.breaks) }); + + // Now we put the live operands from the loop body in too + const num_live = data.live_set.count(); + try a.extra.ensureUnusedCapacity(gpa, 1 + num_live); + + a.extra.appendAssumeCapacity(num_live); + it = data.live_set.keyIterator(); + while (it.next()) |key| { + const alive = key.*; + a.extra.appendAssumeCapacity(alive); + } + log.debug("[{}] %{}: maintain liveness of {}", .{ pass, inst, fmtInstSet(&data.live_set) }); + try a.special.put(gpa, inst, extra_index); - // We'll remove invalid deaths in a separate pass after main liveness analysis. See - // removeDeaths for more details. + // Add back operands which were previously alive + it = old_live.keyIterator(); + while (it.next()) |key| { + const alive = key.*; + try data.live_set.put(gpa, alive, {}); + } - return; // Loop has no operands and it is always unreferenced. + // And the same for breaks + it = old_breaks.keyIterator(); + while (it.next()) |key| { + const block_inst = key.*; + try data.breaks.put(gpa, block_inst, {}); + } }, - .@"try" => { - const pl_op = inst_datas[inst].pl_op; - const extra = a.air.extraData(Air.Try, pl_op.payload); - const body = a.air.extra[extra.end..][0..extra.data.body_len]; - try analyzeWithContext(a, new_set, body); - return trackOperands(a, new_set, inst, main_tomb, .{ pl_op.operand, .none, .none }); - }, - .try_ptr => { - const extra = a.air.extraData(Air.TryPtr, inst_datas[inst].ty_pl.payload); - const body = a.air.extra[extra.end..][0..extra.data.body_len]; - try analyzeWithContext(a, new_set, body); - return trackOperands(a, new_set, inst, main_tomb, .{ extra.data.ptr, .none, .none }); - }, - .cond_br => { - // Each death that occurs inside one branch, but not the other, needs - // to be added as a death immediately upon entering the other branch. - const inst_data = inst_datas[inst].pl_op; - const condition = inst_data.operand; - const extra = a.air.extraData(Air.CondBr, inst_data.payload); - const then_body = a.air.extra[extra.end..][0..extra.data.then_body_len]; - const else_body = a.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len]; - var then_table: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}; - defer then_table.deinit(gpa); - try analyzeWithContext(a, &then_table, then_body); + .main_analysis => { + const extra_idx = a.special.fetchRemove(inst).?.value; // remove because this data does not exist after analysis - // Reset the table back to its state from before the branch. - { - var it = then_table.keyIterator(); + const num_breaks = data.old_extra.items[extra_idx]; + const breaks = data.old_extra.items[extra_idx + 1 ..][0..num_breaks]; + + const num_loop_live = data.old_extra.items[extra_idx + num_breaks + 1]; + const loop_live = data.old_extra.items[extra_idx + num_breaks + 2 ..][0..num_loop_live]; + + // This is necessarily not in the same control flow branch, because loops are noreturn + data.live_set.clearRetainingCapacity(); + + try data.live_set.ensureUnusedCapacity(gpa, @intCast(u32, loop_live.len)); + for (loop_live) |alive| { + data.live_set.putAssumeCapacity(alive, {}); + // If the loop requires a branch death operand to be alive, it's not something we + // want to kill: its "last use" (i.e. the point at which it should die) is the loop + // body itself. + _ = data.branch_deaths.remove(alive); + } + + log.debug("[{}] %{}: block live set is {}", .{ pass, inst, fmtInstSet(&data.live_set) }); + + for (breaks) |block_inst| { + // We might break to this block, so include every operand that the block needs alive + const block_scope = data.block_scopes.get(block_inst).?; + + var it = block_scope.live_set.keyIterator(); while (it.next()) |key| { - assert(table.remove(key.*)); + const alive = key.*; + try data.live_set.put(gpa, alive, {}); } } - var else_table: std.AutoHashMapUnmanaged(Air.Inst.Index, void) = .{}; - defer else_table.deinit(gpa); - try analyzeWithContext(a, &else_table, else_body); + try analyzeBody(a, pass, data, body); + }, + } +} - var then_entry_deaths = std.ArrayList(Air.Inst.Index).init(gpa); - defer then_entry_deaths.deinit(); - var else_entry_deaths = std.ArrayList(Air.Inst.Index).init(gpa); - defer else_entry_deaths.deinit(); +/// Despite its name, this function is used for analysis of not only `cond_br` instructions, but +/// also `try` and `try_ptr`, which are highly related. The `inst_type` parameter indicates which +/// type of instruction `inst` points to. +fn analyzeInstCondBr( + a: *Analysis, + comptime pass: LivenessPass, + data: *LivenessPassData(pass), + inst: Air.Inst.Index, + comptime inst_type: enum { cond_br, @"try", try_ptr }, +) !void { + const inst_datas = a.air.instructions.items(.data); + const gpa = a.gpa; + const extra = switch (inst_type) { + .cond_br => a.air.extraData(Air.CondBr, inst_datas[inst].pl_op.payload), + .@"try" => a.air.extraData(Air.Try, inst_datas[inst].pl_op.payload), + .try_ptr => a.air.extraData(Air.TryPtr, inst_datas[inst].ty_pl.payload), + }; + + const condition = switch (inst_type) { + .cond_br, .@"try" => inst_datas[inst].pl_op.operand, + .try_ptr => extra.data.ptr, + }; + + const then_body = switch (inst_type) { + .cond_br => a.air.extra[extra.end..][0..extra.data.then_body_len], + else => {}, // we won't use this + }; + + const else_body = switch (inst_type) { + .cond_br => a.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len], + .@"try", .try_ptr => a.air.extra[extra.end..][0..extra.data.body_len], + }; + + switch (pass) { + .loop_analysis => { + switch (inst_type) { + .cond_br => try analyzeBody(a, pass, data, then_body), + .@"try", .try_ptr => {}, + } + try analyzeBody(a, pass, data, else_body); + }, + + .main_analysis => { + var then_info: ControlBranchInfo = switch (inst_type) { + .cond_br => try analyzeBodyResetBranch(a, pass, data, then_body), + .@"try", .try_ptr => blk: { + var branch_deaths = try data.branch_deaths.clone(gpa); + errdefer branch_deaths.deinit(gpa); + var live_set = try data.live_set.clone(gpa); + errdefer live_set.deinit(gpa); + break :blk .{ + .branch_deaths = branch_deaths, + .live_set = live_set, + }; + }, + }; + defer then_info.branch_deaths.deinit(gpa); + defer then_info.live_set.deinit(gpa); + + // If this is a `try`, the "then body" (rest of the branch) might have referenced our + // result. If so, we want to avoid this value being considered live while analyzing the + // else branch. + switch (inst_type) { + .cond_br => {}, + .@"try", .try_ptr => _ = data.live_set.remove(inst), + } + + try analyzeBody(a, pass, data, else_body); + var else_info: ControlBranchInfo = .{ + .branch_deaths = data.branch_deaths.move(), + .live_set = data.live_set.move(), + }; + defer else_info.branch_deaths.deinit(gpa); + defer else_info.live_set.deinit(gpa); + + // Any queued deaths shared between both branches can be queued for us instead { - var it = else_table.keyIterator(); + var it = then_info.branch_deaths.keyIterator(); while (it.next()) |key| { - const else_death = key.*; - if (!then_table.contains(else_death)) { - try then_entry_deaths.append(else_death); + const death = key.*; + if (else_info.branch_deaths.remove(death)) { + // We'll remove it from then_deaths below + try data.branch_deaths.put(gpa, death, {}); } } - } - // This loop is the same, except it's for the then branch, and it additionally - // has to put its items back into the table to undo the reset. - { - var it = then_table.keyIterator(); + log.debug("[{}] %{}: bubbled deaths {}", .{ pass, inst, fmtInstSet(&data.branch_deaths) }); + it = data.branch_deaths.keyIterator(); while (it.next()) |key| { - const then_death = key.*; - if (!else_table.contains(then_death)) { - try else_entry_deaths.append(then_death); - } - try table.put(gpa, then_death, {}); + const death = key.*; + assert(then_info.branch_deaths.remove(death)); } } - // Now we have to correctly populate new_set. - if (new_set) |ns| { - try ns.ensureUnusedCapacity(gpa, @intCast(u32, then_table.count() + else_table.count())); - var it = then_table.keyIterator(); - while (it.next()) |key| { - _ = ns.putAssumeCapacity(key.*, {}); - } - it = else_table.keyIterator(); - while (it.next()) |key| { - _ = ns.putAssumeCapacity(key.*, {}); - } - } - const then_death_count = @intCast(u32, then_entry_deaths.items.len); - const else_death_count = @intCast(u32, else_entry_deaths.items.len); - try a.extra.ensureUnusedCapacity(gpa, std.meta.fields(Air.CondBr).len + - then_death_count + else_death_count); + log.debug("[{}] %{}: remaining 'then' branch deaths are {}", .{ pass, inst, fmtInstSet(&then_info.branch_deaths) }); + log.debug("[{}] %{}: remaining 'else' branch deaths are {}", .{ pass, inst, fmtInstSet(&else_info.branch_deaths) }); + + // Deaths that occur in one branch but not another need to be made to occur at the start + // of the other branch. + + var then_mirrored_deaths: std.ArrayListUnmanaged(Air.Inst.Index) = .{}; + defer then_mirrored_deaths.deinit(gpa); + + var else_mirrored_deaths: std.ArrayListUnmanaged(Air.Inst.Index) = .{}; + defer else_mirrored_deaths.deinit(gpa); + + // Note: this invalidates `else_info.live_set`, but expands `then_info.live_set` to + // be their union + { + var it = then_info.live_set.keyIterator(); + while (it.next()) |key| { + const death = key.*; + if (else_info.live_set.remove(death)) continue; // removing makes the loop below faster + if (else_info.branch_deaths.contains(death)) continue; + + // If this is a `try`, the "then body" (rest of the branch) might have + // referenced our result. We want to avoid killing this value in the else branch + // if that's the case, since it only exists in the (fake) then branch. + switch (inst_type) { + .cond_br => {}, + .@"try", .try_ptr => if (death == inst) continue, + } + + try else_mirrored_deaths.append(gpa, death); + } + // Since we removed common stuff above, `else_info.live_set` is now only operands + // which are *only* alive in the else branch + it = else_info.live_set.keyIterator(); + while (it.next()) |key| { + const death = key.*; + if (!then_info.branch_deaths.contains(death)) { + try then_mirrored_deaths.append(gpa, death); + } + // Make `then_info.live_set` contain the full live set (i.e. union of both) + try then_info.live_set.put(gpa, death, {}); + } + } + + log.debug("[{}] %{}: 'then' branch mirrored deaths are {}", .{ pass, inst, fmtInstList(then_mirrored_deaths.items) }); + log.debug("[{}] %{}: 'else' branch mirrored deaths are {}", .{ pass, inst, fmtInstList(else_mirrored_deaths.items) }); + + data.live_set.deinit(gpa); + data.live_set = then_info.live_set.move(); + + log.debug("[{}] %{}: new live set is {}", .{ pass, inst, fmtInstSet(&data.live_set) }); + + // Write the branch deaths to `extra` + const then_death_count = then_info.branch_deaths.count() + @intCast(u32, then_mirrored_deaths.items.len); + const else_death_count = else_info.branch_deaths.count() + @intCast(u32, else_mirrored_deaths.items.len); + + try a.extra.ensureUnusedCapacity(gpa, std.meta.fields(CondBr).len + then_death_count + else_death_count); const extra_index = a.addExtraAssumeCapacity(CondBr{ .then_death_count = then_death_count, .else_death_count = else_death_count, }); - a.extra.appendSliceAssumeCapacity(then_entry_deaths.items); - a.extra.appendSliceAssumeCapacity(else_entry_deaths.items); + a.extra.appendSliceAssumeCapacity(then_mirrored_deaths.items); + { + var it = then_info.branch_deaths.keyIterator(); + while (it.next()) |key| a.extra.appendAssumeCapacity(key.*); + } + a.extra.appendSliceAssumeCapacity(else_mirrored_deaths.items); + { + var it = else_info.branch_deaths.keyIterator(); + while (it.next()) |key| a.extra.appendAssumeCapacity(key.*); + } try a.special.put(gpa, inst, extra_index); - - // Continue on with the instruction analysis. The following code will find the condition - // instruction, and the deaths flag for the CondBr instruction will indicate whether the - // condition's lifetime ends immediately before entering any branch. - return trackOperands(a, new_set, inst, main_tomb, .{ condition, .none, .none }); }, - .switch_br => { - const pl_op = inst_datas[inst].pl_op; - const condition = pl_op.operand; - const switch_br = a.air.extraData(Air.SwitchBr, pl_op.payload); + } - const Table = std.AutoHashMapUnmanaged(Air.Inst.Index, void); - const case_tables = try gpa.alloc(Table, switch_br.data.cases_len + 1); // +1 for else - defer gpa.free(case_tables); + try analyzeOperands(a, pass, data, inst, .{ condition, .none, .none }); +} - std.mem.set(Table, case_tables, .{}); - defer for (case_tables) |*ct| ct.deinit(gpa); +fn analyzeInstSwitchBr( + a: *Analysis, + comptime pass: LivenessPass, + data: *LivenessPassData(pass), + inst: Air.Inst.Index, +) !void { + const inst_datas = a.air.instructions.items(.data); + const pl_op = inst_datas[inst].pl_op; + const condition = pl_op.operand; + const switch_br = a.air.extraData(Air.SwitchBr, pl_op.payload); + const gpa = a.gpa; + const ncases = switch_br.data.cases_len; + switch (pass) { + .loop_analysis => { var air_extra_index: usize = switch_br.end; - for (case_tables[0..switch_br.data.cases_len]) |*case_table| { + for (0..ncases) |_| { const case = a.air.extraData(Air.SwitchBr.Case, air_extra_index); const case_body = a.air.extra[case.end + case.data.items_len ..][0..case.data.body_len]; air_extra_index = case.end + case.data.items_len + case_body.len; - try analyzeWithContext(a, case_table, case_body); - - // Reset the table back to its state from before the case. - var it = case_table.keyIterator(); - while (it.next()) |key| { - assert(table.remove(key.*)); - } + try analyzeBody(a, pass, data, case_body); } { // else - const else_table = &case_tables[case_tables.len - 1]; const else_body = a.air.extra[air_extra_index..][0..switch_br.data.else_body_len]; - try analyzeWithContext(a, else_table, else_body); + try analyzeBody(a, pass, data, else_body); + } + }, - // Reset the table back to its state from before the case. - var it = else_table.keyIterator(); + .main_analysis => { + // This is, all in all, just a messier version of the `cond_br` logic. If you're trying + // to understand it, I encourage looking at `analyzeInstCondBr` first. + + const DeathSet = std.AutoHashMapUnmanaged(Air.Inst.Index, void); + const DeathList = std.ArrayListUnmanaged(Air.Inst.Index); + + var case_infos = try gpa.alloc(ControlBranchInfo, ncases + 1); // +1 for else + defer gpa.free(case_infos); + + std.mem.set(ControlBranchInfo, case_infos, .{}); + defer for (case_infos) |*info| { + info.branch_deaths.deinit(gpa); + info.live_set.deinit(gpa); + }; + + var air_extra_index: usize = switch_br.end; + for (case_infos[0..ncases]) |*info| { + const case = a.air.extraData(Air.SwitchBr.Case, air_extra_index); + const case_body = a.air.extra[case.end + case.data.items_len ..][0..case.data.body_len]; + air_extra_index = case.end + case.data.items_len + case_body.len; + info.* = try analyzeBodyResetBranch(a, pass, data, case_body); + } + { // else + const else_body = a.air.extra[air_extra_index..][0..switch_br.data.else_body_len]; + try analyzeBody(a, pass, data, else_body); + case_infos[ncases] = .{ + .branch_deaths = data.branch_deaths.move(), + .live_set = data.live_set.move(), + }; + } + + // Queued deaths common to all cases can be bubbled up + { + // We can't remove from the set we're iterating over, so we'll store the shared deaths here + // temporarily to remove them + var shared_deaths: DeathSet = .{}; + defer shared_deaths.deinit(gpa); + + var it = case_infos[0].branch_deaths.keyIterator(); while (it.next()) |key| { - assert(table.remove(key.*)); + const death = key.*; + for (case_infos[1..]) |*info| { + if (!info.branch_deaths.contains(death)) break; + } else try shared_deaths.put(gpa, death, {}); + } + + log.debug("[{}] %{}: bubbled deaths {}", .{ pass, inst, fmtInstSet(&shared_deaths) }); + + try data.branch_deaths.ensureUnusedCapacity(gpa, shared_deaths.count()); + it = shared_deaths.keyIterator(); + while (it.next()) |key| { + const death = key.*; + data.branch_deaths.putAssumeCapacity(death, {}); + for (case_infos) |*info| { + _ = info.branch_deaths.remove(death); + } + } + + for (case_infos, 0..) |*info, i| { + log.debug("[{}] %{}: case {} remaining branch deaths are {}", .{ pass, inst, i, fmtInstSet(&info.branch_deaths) }); } } - const List = std.ArrayListUnmanaged(Air.Inst.Index); - const case_deaths = try gpa.alloc(List, case_tables.len); // includes else - defer gpa.free(case_deaths); + const mirrored_deaths = try gpa.alloc(DeathList, ncases + 1); + defer gpa.free(mirrored_deaths); - std.mem.set(List, case_deaths, .{}); - defer for (case_deaths) |*cd| cd.deinit(gpa); + std.mem.set(DeathList, mirrored_deaths, .{}); + defer for (mirrored_deaths) |*md| md.deinit(gpa); - var total_deaths: u32 = 0; - for (case_tables, 0..) |*ct, i| { - total_deaths += ct.count(); - var it = ct.keyIterator(); - while (it.next()) |key| { - const case_death = key.*; - for (case_tables, 0..) |*ct_inner, j| { - if (i == j) continue; - if (!ct_inner.contains(case_death)) { - // instruction is not referenced in this case - try case_deaths[j].append(gpa, case_death); + { + var all_alive: DeathSet = .{}; + defer all_alive.deinit(gpa); + + for (case_infos) |*info| { + try all_alive.ensureUnusedCapacity(gpa, info.live_set.count()); + var it = info.live_set.keyIterator(); + while (it.next()) |key| { + const alive = key.*; + all_alive.putAssumeCapacity(alive, {}); + } + } + + for (mirrored_deaths, case_infos) |*mirrored, *info| { + var it = all_alive.keyIterator(); + while (it.next()) |key| { + const alive = key.*; + if (!info.live_set.contains(alive) and !info.branch_deaths.contains(alive)) { + // Should die at the start of this branch + try mirrored.append(gpa, alive); } } - // undo resetting the table - try table.put(gpa, case_death, {}); } + + for (mirrored_deaths, 0..) |mirrored, i| { + log.debug("[{}] %{}: case {} mirrored deaths are {}", .{ pass, inst, i, fmtInstList(mirrored.items) }); + } + + data.live_set.deinit(gpa); + data.live_set = all_alive.move(); + + log.debug("[{}] %{}: new live set is {}", .{ pass, inst, fmtInstSet(&data.live_set) }); } - // Now we have to correctly populate new_set. - if (new_set) |ns| { - try ns.ensureUnusedCapacity(gpa, total_deaths); - for (case_tables) |*ct| { - var it = ct.keyIterator(); - while (it.next()) |key| { - _ = ns.putAssumeCapacity(key.*, {}); - } - } - } + const else_death_count = case_infos[ncases].branch_deaths.count() + @intCast(u32, mirrored_deaths[ncases].items.len); - const else_death_count = @intCast(u32, case_deaths[case_deaths.len - 1].items.len); const extra_index = try a.addExtra(SwitchBr{ .else_death_count = else_death_count, }); - for (case_deaths[0 .. case_deaths.len - 1]) |*cd| { - const case_death_count = @intCast(u32, cd.items.len); - try a.extra.ensureUnusedCapacity(gpa, 1 + case_death_count + else_death_count); - a.extra.appendAssumeCapacity(case_death_count); - a.extra.appendSliceAssumeCapacity(cd.items); + for (mirrored_deaths[0..ncases], case_infos[0..ncases]) |mirrored, info| { + const num = info.branch_deaths.count() + @intCast(u32, mirrored.items.len); + try a.extra.ensureUnusedCapacity(gpa, num + 1); + a.extra.appendAssumeCapacity(num); + a.extra.appendSliceAssumeCapacity(mirrored.items); + { + var it = info.branch_deaths.keyIterator(); + while (it.next()) |key| a.extra.appendAssumeCapacity(key.*); + } + } + try a.extra.ensureUnusedCapacity(gpa, else_death_count); + a.extra.appendSliceAssumeCapacity(mirrored_deaths[ncases].items); + { + var it = case_infos[ncases].branch_deaths.keyIterator(); + while (it.next()) |key| a.extra.appendAssumeCapacity(key.*); } - a.extra.appendSliceAssumeCapacity(case_deaths[case_deaths.len - 1].items); try a.special.put(gpa, inst, extra_index); - - return trackOperands(a, new_set, inst, main_tomb, .{ condition, .none, .none }); - }, - .wasm_memory_grow => { - const pl_op = inst_datas[inst].pl_op; - return trackOperands(a, new_set, inst, main_tomb, .{ pl_op.operand, .none, .none }); }, } + + try analyzeOperands(a, pass, data, inst, .{ condition, .none, .none }); } -fn trackOperands( - a: *Analysis, - new_set: ?*std.AutoHashMapUnmanaged(Air.Inst.Index, void), - inst: Air.Inst.Index, - main_tomb: bool, - operands: [bpi - 1]Air.Inst.Ref, -) Allocator.Error!void { - const table = &a.table; - const gpa = a.gpa; +fn AnalyzeBigOperands(comptime pass: LivenessPass) type { + return struct { + a: *Analysis, + data: *LivenessPassData(pass), + inst: Air.Inst.Index, - var tomb_bits: Bpi = @boolToInt(main_tomb); - var i = operands.len; + operands_remaining: u32, + small: [bpi - 1]Air.Inst.Ref = .{.none} ** (bpi - 1), + extra_tombs: []u32, - while (i > 0) { - i -= 1; - tomb_bits <<= 1; - const op_int = @enumToInt(operands[i]); - if (op_int < Air.Inst.Ref.typed_value_map.len) continue; - const operand: Air.Inst.Index = op_int - @intCast(u32, Air.Inst.Ref.typed_value_map.len); - const prev = try table.fetchPut(gpa, operand, {}); - if (prev == null) { - // Death. - tomb_bits |= 1; - if (new_set) |ns| try ns.putNoClobber(gpa, operand, {}); - } - } - a.storeTombBits(inst, tomb_bits); -} + const Self = @This(); -const ExtraTombs = struct { - analysis: *Analysis, - new_set: ?*std.AutoHashMapUnmanaged(Air.Inst.Index, void), - inst: Air.Inst.Index, - main_tomb: bool, - bit_index: usize = 0, - tomb_bits: Bpi = 0, - big_tomb_bits: u32 = 0, - big_tomb_bits_extra: std.ArrayListUnmanaged(u32) = .{}, + fn init( + a: *Analysis, + data: *LivenessPassData(pass), + inst: Air.Inst.Index, + total_operands: usize, + ) !Self { + const extra_operands = @intCast(u32, total_operands) -| (bpi - 1); + const max_extra_tombs = (extra_operands + 30) / 31; - fn feed(et: *ExtraTombs, op_ref: Air.Inst.Ref) !void { - const this_bit_index = et.bit_index; - et.bit_index += 1; - const gpa = et.analysis.gpa; - const op_index = Air.refToIndex(op_ref) orelse return; - const prev = try et.analysis.table.fetchPut(gpa, op_index, {}); - if (prev == null) { - // Death. - if (et.new_set) |ns| try ns.putNoClobber(gpa, op_index, {}); - const available_tomb_bits = bpi - 1; - if (this_bit_index < available_tomb_bits) { - et.tomb_bits |= @as(Bpi, 1) << @intCast(OperandInt, this_bit_index); - } else { - const big_bit_index = this_bit_index - available_tomb_bits; - while (big_bit_index >= (et.big_tomb_bits_extra.items.len + 1) * 31) { - // We need another element in the extra array. - try et.big_tomb_bits_extra.append(gpa, et.big_tomb_bits); - et.big_tomb_bits = 0; - } else { - const final_bit_index = big_bit_index - et.big_tomb_bits_extra.items.len * 31; - et.big_tomb_bits |= @as(u32, 1) << @intCast(u5, final_bit_index); - } - } - } - } - - fn finish(et: *ExtraTombs) !void { - et.tomb_bits |= @as(Bpi, @boolToInt(et.main_tomb)) << (bpi - 1); - // Signal the terminal big_tomb_bits element. - et.big_tomb_bits |= @as(u32, 1) << 31; - - et.analysis.storeTombBits(et.inst, et.tomb_bits); - const extra_index = @intCast(u32, et.analysis.extra.items.len); - try et.analysis.extra.ensureUnusedCapacity(et.analysis.gpa, et.big_tomb_bits_extra.items.len + 1); - try et.analysis.special.put(et.analysis.gpa, et.inst, extra_index); - et.analysis.extra.appendSliceAssumeCapacity(et.big_tomb_bits_extra.items); - et.analysis.extra.appendAssumeCapacity(et.big_tomb_bits); - } - - fn deinit(et: *ExtraTombs) void { - et.big_tomb_bits_extra.deinit(et.analysis.gpa); - } -}; - -/// Remove any deaths invalidated by the deaths from an enclosing `loop`. Reshuffling deaths stored -/// in `extra` causes it to become non-dense, but that's fine - we won't remove too much data. -/// Making it dense would be a lot more work - it'd require recomputing every index in `special`. -fn removeDeaths( - a: *Analysis, - to_remove: *std.AutoHashMapUnmanaged(Air.Inst.Index, void), - body: []const Air.Inst.Index, -) error{OutOfMemory}!void { - for (body) |inst| { - try removeInstDeaths(a, to_remove, inst); - } -} - -fn removeInstDeaths( - a: *Analysis, - to_remove: *std.AutoHashMapUnmanaged(Air.Inst.Index, void), - inst: Air.Inst.Index, -) !void { - const inst_tags = a.air.instructions.items(.tag); - const inst_datas = a.air.instructions.items(.data); - - switch (inst_tags[inst]) { - .add, - .add_optimized, - .addwrap, - .addwrap_optimized, - .add_sat, - .sub, - .sub_optimized, - .subwrap, - .subwrap_optimized, - .sub_sat, - .mul, - .mul_optimized, - .mulwrap, - .mulwrap_optimized, - .mul_sat, - .div_float, - .div_float_optimized, - .div_trunc, - .div_trunc_optimized, - .div_floor, - .div_floor_optimized, - .div_exact, - .div_exact_optimized, - .rem, - .rem_optimized, - .mod, - .mod_optimized, - .bit_and, - .bit_or, - .xor, - .cmp_lt, - .cmp_lt_optimized, - .cmp_lte, - .cmp_lte_optimized, - .cmp_eq, - .cmp_eq_optimized, - .cmp_gte, - .cmp_gte_optimized, - .cmp_gt, - .cmp_gt_optimized, - .cmp_neq, - .cmp_neq_optimized, - .bool_and, - .bool_or, - .store, - .array_elem_val, - .slice_elem_val, - .ptr_elem_val, - .shl, - .shl_exact, - .shl_sat, - .shr, - .shr_exact, - .atomic_store_unordered, - .atomic_store_monotonic, - .atomic_store_release, - .atomic_store_seq_cst, - .set_union_tag, - .min, - .max, - => { - const o = inst_datas[inst].bin_op; - removeOperandDeaths(a, to_remove, inst, .{ o.lhs, o.rhs, .none }); - }, - - .vector_store_elem => { - const o = inst_datas[inst].vector_store_elem; - const extra = a.air.extraData(Air.Bin, o.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ o.vector_ptr, extra.lhs, extra.rhs }); - }, - - .arg, - .alloc, - .ret_ptr, - .constant, - .const_ty, - .trap, - .breakpoint, - .dbg_stmt, - .dbg_inline_begin, - .dbg_inline_end, - .dbg_block_begin, - .dbg_block_end, - .unreach, - .fence, - .ret_addr, - .frame_addr, - .wasm_memory_size, - .err_return_trace, - .save_err_return_trace_index, - .c_va_start, - .work_item_id, - .work_group_size, - .work_group_id, - => {}, - - .not, - .bitcast, - .load, - .fpext, - .fptrunc, - .intcast, - .trunc, - .optional_payload, - .optional_payload_ptr, - .optional_payload_ptr_set, - .errunion_payload_ptr_set, - .wrap_optional, - .unwrap_errunion_payload, - .unwrap_errunion_err, - .unwrap_errunion_payload_ptr, - .unwrap_errunion_err_ptr, - .wrap_errunion_payload, - .wrap_errunion_err, - .slice_ptr, - .slice_len, - .ptr_slice_len_ptr, - .ptr_slice_ptr_ptr, - .struct_field_ptr_index_0, - .struct_field_ptr_index_1, - .struct_field_ptr_index_2, - .struct_field_ptr_index_3, - .array_to_slice, - .float_to_int, - .float_to_int_optimized, - .int_to_float, - .get_union_tag, - .clz, - .ctz, - .popcount, - .byte_swap, - .bit_reverse, - .splat, - .error_set_has_value, - .addrspace_cast, - .c_va_arg, - .c_va_copy, - => { - const o = inst_datas[inst].ty_op; - removeOperandDeaths(a, to_remove, inst, .{ o.operand, .none, .none }); - }, - - .is_null, - .is_non_null, - .is_null_ptr, - .is_non_null_ptr, - .is_err, - .is_non_err, - .is_err_ptr, - .is_non_err_ptr, - .ptrtoint, - .bool_to_int, - .ret, - .ret_load, - .is_named_enum_value, - .tag_name, - .error_name, - .sqrt, - .sin, - .cos, - .tan, - .exp, - .exp2, - .log, - .log2, - .log10, - .fabs, - .floor, - .ceil, - .round, - .trunc_float, - .neg, - .neg_optimized, - .cmp_lt_errors_len, - .set_err_return_trace, - .c_va_end, - => { - const operand = inst_datas[inst].un_op; - removeOperandDeaths(a, to_remove, inst, .{ operand, .none, .none }); - }, - - .add_with_overflow, - .sub_with_overflow, - .mul_with_overflow, - .shl_with_overflow, - .ptr_add, - .ptr_sub, - .ptr_elem_ptr, - .slice_elem_ptr, - .slice, - => { - const ty_pl = inst_datas[inst].ty_pl; - const extra = a.air.extraData(Air.Bin, ty_pl.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ extra.lhs, extra.rhs, .none }); - }, - - .dbg_var_ptr, - .dbg_var_val, - => { - const operand = inst_datas[inst].pl_op.operand; - removeOperandDeaths(a, to_remove, inst, .{ operand, .none, .none }); - }, - - .prefetch => { - const prefetch = inst_datas[inst].prefetch; - removeOperandDeaths(a, to_remove, inst, .{ prefetch.ptr, .none, .none }); - }, - - .call, .call_always_tail, .call_never_tail, .call_never_inline => { - const inst_data = inst_datas[inst].pl_op; - const callee = inst_data.operand; - const extra = a.air.extraData(Air.Call, inst_data.payload); - const args = @ptrCast([]const Air.Inst.Ref, a.air.extra[extra.end..][0..extra.data.args_len]); - - var death_remover = BigTombDeathRemover.init(a, to_remove, inst); - death_remover.feed(callee); - for (args) |operand| { - death_remover.feed(operand); - } - death_remover.finish(); - }, - .select => { - const pl_op = inst_datas[inst].pl_op; - const extra = a.air.extraData(Air.Bin, pl_op.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ pl_op.operand, extra.lhs, extra.rhs }); - }, - .shuffle => { - const extra = a.air.extraData(Air.Shuffle, inst_datas[inst].ty_pl.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ extra.a, extra.b, .none }); - }, - .reduce, .reduce_optimized => { - const reduce = inst_datas[inst].reduce; - removeOperandDeaths(a, to_remove, inst, .{ reduce.operand, .none, .none }); - }, - .cmp_vector, .cmp_vector_optimized => { - const extra = a.air.extraData(Air.VectorCmp, inst_datas[inst].ty_pl.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ extra.lhs, extra.rhs, .none }); - }, - .aggregate_init => { - const ty_pl = inst_datas[inst].ty_pl; - const aggregate_ty = a.air.getRefType(ty_pl.ty); - const len = @intCast(usize, aggregate_ty.arrayLen()); - const elements = @ptrCast([]const Air.Inst.Ref, a.air.extra[ty_pl.payload..][0..len]); - - var death_remover = BigTombDeathRemover.init(a, to_remove, inst); - for (elements) |elem| { - death_remover.feed(elem); - } - death_remover.finish(); - }, - .union_init => { - const extra = a.air.extraData(Air.UnionInit, inst_datas[inst].ty_pl.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ extra.init, .none, .none }); - }, - .struct_field_ptr, .struct_field_val => { - const extra = a.air.extraData(Air.StructField, inst_datas[inst].ty_pl.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ extra.struct_operand, .none, .none }); - }, - .field_parent_ptr => { - const extra = a.air.extraData(Air.FieldParentPtr, inst_datas[inst].ty_pl.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ extra.field_ptr, .none, .none }); - }, - .cmpxchg_strong, .cmpxchg_weak => { - const extra = a.air.extraData(Air.Cmpxchg, inst_datas[inst].ty_pl.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ extra.ptr, extra.expected_value, extra.new_value }); - }, - .mul_add => { - const pl_op = inst_datas[inst].pl_op; - const extra = a.air.extraData(Air.Bin, pl_op.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ extra.lhs, extra.rhs, pl_op.operand }); - }, - .atomic_load => { - const ptr = inst_datas[inst].atomic_load.ptr; - removeOperandDeaths(a, to_remove, inst, .{ ptr, .none, .none }); - }, - .atomic_rmw => { - const pl_op = inst_datas[inst].pl_op; - const extra = a.air.extraData(Air.AtomicRmw, pl_op.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ pl_op.operand, extra.operand, .none }); - }, - .memset, - .memcpy, - => { - const pl_op = inst_datas[inst].pl_op; - const extra = a.air.extraData(Air.Bin, pl_op.payload).data; - removeOperandDeaths(a, to_remove, inst, .{ pl_op.operand, extra.lhs, extra.rhs }); - }, - - .br => { - const br = inst_datas[inst].br; - removeOperandDeaths(a, to_remove, inst, .{ br.operand, .none, .none }); - }, - .assembly => { - const extra = a.air.extraData(Air.Asm, inst_datas[inst].ty_pl.payload); - var extra_i: usize = extra.end; - const outputs = @ptrCast([]const Air.Inst.Ref, a.air.extra[extra_i..][0..extra.data.outputs_len]); - extra_i += outputs.len; - const inputs = @ptrCast([]const Air.Inst.Ref, a.air.extra[extra_i..][0..extra.data.inputs_len]); - extra_i += inputs.len; - - var death_remover = BigTombDeathRemover.init(a, to_remove, inst); - for (outputs) |output| { - if (output != .none) { - death_remover.feed(output); - } - } - for (inputs) |input| { - death_remover.feed(input); - } - death_remover.finish(); - }, - .block => { - const extra = a.air.extraData(Air.Block, inst_datas[inst].ty_pl.payload); - const body = a.air.extra[extra.end..][0..extra.data.body_len]; - try removeDeaths(a, to_remove, body); - }, - .loop => { - const extra = a.air.extraData(Air.Block, inst_datas[inst].ty_pl.payload); - const body = a.air.extra[extra.end..][0..extra.data.body_len]; - - const liveness_extra_idx = a.special.get(inst) orelse { - try removeDeaths(a, to_remove, body); - return; + const extra_tombs: []u32 = switch (pass) { + .loop_analysis => &.{}, + .main_analysis => try a.gpa.alloc(u32, max_extra_tombs), }; + errdefer a.gpa.free(extra_tombs); - const death_count = a.extra.items[liveness_extra_idx]; - var deaths = a.extra.items[liveness_extra_idx + 1 ..][0..death_count]; + std.mem.set(u32, extra_tombs, 0); - // Remove any deaths in `to_remove` from this loop's deaths - deaths.len = removeExtraDeaths(to_remove, deaths); - a.extra.items[liveness_extra_idx] = @intCast(u32, deaths.len); + return .{ + .a = a, + .data = data, + .inst = inst, + .operands_remaining = @intCast(u32, total_operands), + .extra_tombs = extra_tombs, + }; + } - // Temporarily add any deaths of ours to `to_remove` - try to_remove.ensureUnusedCapacity(a.gpa, @intCast(u32, deaths.len)); - for (deaths) |d| { - to_remove.putAssumeCapacity(d, {}); - } - try removeDeaths(a, to_remove, body); - for (deaths) |d| { - _ = to_remove.remove(d); - } - }, - .@"try" => { - const pl_op = inst_datas[inst].pl_op; - const extra = a.air.extraData(Air.Try, pl_op.payload); - const body = a.air.extra[extra.end..][0..extra.data.body_len]; - try removeDeaths(a, to_remove, body); - removeOperandDeaths(a, to_remove, inst, .{ pl_op.operand, .none, .none }); - }, - .try_ptr => { - const extra = a.air.extraData(Air.TryPtr, inst_datas[inst].ty_pl.payload); - const body = a.air.extra[extra.end..][0..extra.data.body_len]; - try removeDeaths(a, to_remove, body); - removeOperandDeaths(a, to_remove, inst, .{ extra.data.ptr, .none, .none }); - }, - .cond_br => { - const inst_data = inst_datas[inst].pl_op; - const condition = inst_data.operand; - const extra = a.air.extraData(Air.CondBr, inst_data.payload); - const then_body = a.air.extra[extra.end..][0..extra.data.then_body_len]; - const else_body = a.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len]; + /// Must be called with operands in reverse order. + fn feed(big: *Self, op_ref: Air.Inst.Ref) !void { + // Note that after this, `operands_remaining` becomes the index of the current operand + big.operands_remaining -= 1; - if (a.special.get(inst)) |liveness_extra_idx| { - const then_death_count = a.extra.items[liveness_extra_idx + 0]; - const else_death_count = a.extra.items[liveness_extra_idx + 1]; - var then_deaths = a.extra.items[liveness_extra_idx + 2 ..][0..then_death_count]; - var else_deaths = a.extra.items[liveness_extra_idx + 2 + then_death_count ..][0..else_death_count]; - - const new_then_death_count = removeExtraDeaths(to_remove, then_deaths); - const new_else_death_count = removeExtraDeaths(to_remove, else_deaths); - - a.extra.items[liveness_extra_idx + 0] = new_then_death_count; - a.extra.items[liveness_extra_idx + 1] = new_else_death_count; - - if (new_then_death_count < then_death_count) { - // `else` deaths need to be moved earlier in `extra` - const src = a.extra.items[liveness_extra_idx + 2 + then_death_count ..]; - const dest = a.extra.items[liveness_extra_idx + 2 + new_then_death_count ..]; - std.mem.copy(u32, dest, src[0..new_else_death_count]); - } + if (big.operands_remaining < bpi - 1) { + big.small[big.operands_remaining] = op_ref; + return; } - try removeDeaths(a, to_remove, then_body); - try removeDeaths(a, to_remove, else_body); + const operand = Air.refToIndex(op_ref) orelse return; - removeOperandDeaths(a, to_remove, inst, .{ condition, .none, .none }); - }, - .switch_br => { - const pl_op = inst_datas[inst].pl_op; - const condition = pl_op.operand; - const switch_br = a.air.extraData(Air.SwitchBr, pl_op.payload); - - var air_extra_index: usize = switch_br.end; - for (0..switch_br.data.cases_len) |_| { - const case = a.air.extraData(Air.SwitchBr.Case, air_extra_index); - const case_body = a.air.extra[case.end + case.data.items_len ..][0..case.data.body_len]; - air_extra_index = case.end + case.data.items_len + case_body.len; - try removeDeaths(a, to_remove, case_body); - } - { // else - const else_body = a.air.extra[air_extra_index..][0..switch_br.data.else_body_len]; - try removeDeaths(a, to_remove, else_body); + // Don't compute any liveness for constants + const inst_tags = big.a.air.instructions.items(.tag); + switch (inst_tags[operand]) { + .constant, .const_ty => return, + else => {}, } - if (a.special.get(inst)) |liveness_extra_idx| { - const else_death_count = a.extra.items[liveness_extra_idx]; - var read_idx = liveness_extra_idx + 1; - var write_idx = read_idx; // write_idx <= read_idx always - for (0..switch_br.data.cases_len) |_| { - const case_death_count = a.extra.items[read_idx]; - const case_deaths = a.extra.items[read_idx + 1 ..][0..case_death_count]; - const new_death_count = removeExtraDeaths(to_remove, case_deaths); - a.extra.items[write_idx] = new_death_count; - if (write_idx < read_idx) { - std.mem.copy(u32, a.extra.items[write_idx + 1 ..], a.extra.items[read_idx + 1 ..][0..new_death_count]); + const extra_byte = (big.operands_remaining - (bpi - 1)) / 31; + const extra_bit = @intCast(u5, big.operands_remaining - (bpi - 1) - extra_byte * 31); + + const gpa = big.a.gpa; + + switch (pass) { + .loop_analysis => { + _ = try big.data.live_set.put(gpa, operand, {}); + }, + + .main_analysis => { + if ((try big.data.live_set.fetchPut(gpa, operand, {})) == null) { + log.debug("[{}] %{}: added %{} to live set (operand dies here)", .{ pass, big.inst, operand }); + big.extra_tombs[extra_byte] |= @as(u32, 1) << extra_bit; + if (big.data.branch_deaths.remove(operand)) { + log.debug("[{}] %{}: resolved branch death of %{} to this usage", .{ pass, big.inst, operand }); + } } - read_idx += 1 + case_death_count; - write_idx += 1 + new_death_count; - } - const else_deaths = a.extra.items[read_idx..][0..else_death_count]; - const new_else_death_count = removeExtraDeaths(to_remove, else_deaths); - a.extra.items[liveness_extra_idx] = new_else_death_count; - if (write_idx < read_idx) { - std.mem.copy(u32, a.extra.items[write_idx..], a.extra.items[read_idx..][0..new_else_death_count]); - } + }, + } + } + + fn finish(big: *Self) !void { + const gpa = big.a.gpa; + + std.debug.assert(big.operands_remaining == 0); + + switch (pass) { + .loop_analysis => {}, + + .main_analysis => { + // Note that the MSB is set on the final tomb to indicate the terminal element. This + // allows for an optimisation where we only add as many extra tombs as are needed to + // represent the dying operands. Each pass modifies operand bits and so needs to write + // back, so let's figure out how many extra tombs we really need. Note that we always + // keep at least one. + var num: usize = big.extra_tombs.len; + while (num > 1) { + if (@truncate(u31, big.extra_tombs[num - 1]) != 0) { + // Some operand dies here + break; + } + num -= 1; + } + // Mark final tomb + big.extra_tombs[num - 1] |= @as(u32, 1) << 31; + + const extra_tombs = big.extra_tombs[0..num]; + + const extra_index = @intCast(u32, big.a.extra.items.len); + try big.a.extra.appendSlice(gpa, extra_tombs); + try big.a.special.put(gpa, big.inst, extra_index); + }, } - removeOperandDeaths(a, to_remove, inst, .{ condition, .none, .none }); - }, - .wasm_memory_grow => { - const pl_op = inst_datas[inst].pl_op; - removeOperandDeaths(a, to_remove, inst, .{ pl_op.operand, .none, .none }); - }, - } -} - -fn removeOperandDeaths( - a: *Analysis, - to_remove: *const std.AutoHashMapUnmanaged(Air.Inst.Index, void), - inst: Air.Inst.Index, - operands: [bpi - 1]Air.Inst.Ref, -) void { - const usize_index = (inst * bpi) / @bitSizeOf(usize); - - const cur_tomb = @truncate(Bpi, a.tomb_bits[usize_index] >> - @intCast(Log2Int(usize), (inst % (@bitSizeOf(usize) / bpi)) * bpi)); - - var toggle_bits: Bpi = 0; - - for (operands, 0..) |op_ref, i| { - const mask = @as(Bpi, 1) << @intCast(OperandInt, i); - const op_int = @enumToInt(op_ref); - if (op_int < Air.Inst.Ref.typed_value_map.len) continue; - const operand: Air.Inst.Index = op_int - @intCast(u32, Air.Inst.Ref.typed_value_map.len); - if ((cur_tomb & mask) != 0 and to_remove.contains(operand)) { - log.debug("remove death of %{} in %{}", .{ operand, inst }); - toggle_bits ^= mask; + try analyzeOperands(big.a, pass, big.data, big.inst, big.small); } - } - a.tomb_bits[usize_index] ^= @as(usize, toggle_bits) << - @intCast(Log2Int(usize), (inst % (@bitSizeOf(usize) / bpi)) * bpi); -} - -fn removeExtraDeaths( - to_remove: *const std.AutoHashMapUnmanaged(Air.Inst.Index, void), - deaths: []Air.Inst.Index, -) u32 { - var new_len = @intCast(u32, deaths.len); - var i: usize = 0; - while (i < new_len) { - if (to_remove.contains(deaths[i])) { - log.debug("remove extra death of %{}", .{deaths[i]}); - deaths[i] = deaths[new_len - 1]; - new_len -= 1; - } else { - i += 1; + fn deinit(big: *Self) void { + big.a.gpa.free(big.extra_tombs); } - } - return new_len; + }; } -const BigTombDeathRemover = struct { - a: *Analysis, - to_remove: *const std.AutoHashMapUnmanaged(Air.Inst.Index, void), - inst: Air.Inst.Index, +fn fmtInstSet(set: *const std.AutoHashMapUnmanaged(Air.Inst.Index, void)) FmtInstSet { + return .{ .set = set }; +} - operands: [bpi - 1]Air.Inst.Ref = .{.none} ** (bpi - 1), - next_oper: OperandInt = 0, +const FmtInstSet = struct { + set: *const std.AutoHashMapUnmanaged(Air.Inst.Index, void), - bit_index: u32 = 0, - // Initialized once we finish the small tomb operands: see `feed` - extra_start: u32 = undefined, - extra_offset: u32 = 0, - - fn init(a: *Analysis, to_remove: *const std.AutoHashMapUnmanaged(Air.Inst.Index, void), inst: Air.Inst.Index) BigTombDeathRemover { - return .{ - .a = a, - .to_remove = to_remove, - .inst = inst, - }; - } - - fn feed(dr: *BigTombDeathRemover, operand: Air.Inst.Ref) void { - if (dr.next_oper < bpi - 1) { - dr.operands[dr.next_oper] = operand; - dr.next_oper += 1; - if (dr.next_oper == bpi - 1) { - removeOperandDeaths(dr.a, dr.to_remove, dr.inst, dr.operands); - if (dr.a.special.get(dr.inst)) |idx| dr.extra_start = idx; - } + pub fn format(val: FmtInstSet, comptime _: []const u8, _: std.fmt.FormatOptions, w: anytype) !void { + if (val.set.count() == 0) { + try w.writeAll("[no instructions]"); return; } - - defer dr.bit_index += 1; - - const op_int = @enumToInt(operand); - if (op_int < Air.Inst.Ref.typed_value_map.len) return; - - const op_inst: Air.Inst.Index = op_int - @intCast(u32, Air.Inst.Ref.typed_value_map.len); - - while (dr.bit_index - dr.extra_offset * 31 >= 31) { - dr.extra_offset += 1; - } - const dies = @truncate(u1, dr.a.extra.items[dr.extra_start + dr.extra_offset] >> - @intCast(u5, dr.bit_index - dr.extra_offset * 31)) != 0; - - if (dies and dr.to_remove.contains(op_inst)) { - log.debug("remove big death of %{}", .{op_inst}); - dr.a.extra.items[dr.extra_start + dr.extra_offset] ^= - (@as(u32, 1) << @intCast(u5, dr.bit_index - dr.extra_offset * 31)); - } - } - - fn finish(dr: *BigTombDeathRemover) void { - if (dr.next_oper < bpi) { - removeOperandDeaths(dr.a, dr.to_remove, dr.inst, dr.operands); + var it = val.set.keyIterator(); + try w.print("%{}", .{it.next().?.*}); + while (it.next()) |key| { + try w.print(" %{}", .{key.*}); + } + } +}; + +fn fmtInstList(list: []const Air.Inst.Index) FmtInstList { + return .{ .list = list }; +} + +const FmtInstList = struct { + list: []const Air.Inst.Index, + + pub fn format(val: FmtInstList, comptime _: []const u8, _: std.fmt.FormatOptions, w: anytype) !void { + if (val.list.len == 0) { + try w.writeAll("[no instructions]"); + return; + } + try w.print("%{}", .{val.list[0]}); + for (val.list[1..]) |inst| { + try w.print(" %{}", .{inst}); } } }; diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index e08386dfcb..f2116689e9 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -5000,17 +5000,11 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const loop = self.air.extraData(Air.Block, ty_pl.payload); const body = self.air.extra[loop.end..][0..loop.data.body_len]; - const liveness_loop = self.liveness.getLoop(inst); const start_index = @intCast(u32, self.mir_instructions.len); try self.genBody(body); try self.jump(start_index); - try self.ensureProcessDeathCapacity(liveness_loop.deaths.len); - for (liveness_loop.deaths) |operand| { - self.processDeath(operand); - } - return self.finishAirBookkeeping(); } diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 1acf5a5164..70144551d1 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -4923,17 +4923,11 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const loop = self.air.extraData(Air.Block, ty_pl.payload); const body = self.air.extra[loop.end..][0..loop.data.body_len]; - const liveness_loop = self.liveness.getLoop(inst); const start_index = @intCast(Mir.Inst.Index, self.mir_instructions.len); try self.genBody(body); try self.jump(start_index); - try self.ensureProcessDeathCapacity(liveness_loop.deaths.len); - for (liveness_loop.deaths) |operand| { - self.processDeath(operand); - } - return self.finishAirBookkeeping(); } diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index 5405b212a2..197234c13b 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -1750,17 +1750,11 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const loop = self.air.extraData(Air.Block, ty_pl.payload); const body = self.air.extra[loop.end .. loop.end + loop.data.body_len]; - const liveness_loop = self.liveness.getLoop(inst); const start = @intCast(u32, self.mir_instructions.len); try self.genBody(body); try self.jump(start); - try self.ensureProcessDeathCapacity(liveness_loop.deaths.len); - for (liveness_loop.deaths) |operand| { - self.processDeath(operand); - } - return self.finishAirBookkeeping(); } diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 83af640b82..960b0ebf15 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -3183,7 +3183,6 @@ fn airLoop(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const loop = func.air.extraData(Air.Block, ty_pl.payload); const body = func.air.extra[loop.end..][0..loop.data.body_len]; - const liveness_loop = func.liveness.getLoop(inst); // result type of loop is always 'noreturn', meaning we can always // emit the wasm type 'block_empty'. @@ -3194,11 +3193,6 @@ fn airLoop(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.addLabel(.br, 0); try func.endBlock(); - try func.currentBranch().values.ensureUnusedCapacity(func.gpa, @intCast(u32, liveness_loop.deaths.len)); - for (liveness_loop.deaths) |death| { - func.processDeath(Air.indexToRef(death)); - } - func.finishAir(inst, .none, &.{}); } diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 0a59389839..33ff45d27d 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -6439,7 +6439,6 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { const loop = self.air.extraData(Air.Block, ty_pl.payload); const body = self.air.extra[loop.end..][0..loop.data.body_len]; const jmp_target = @intCast(u32, self.mir_instructions.len); - const liveness_loop = self.liveness.getLoop(inst); { try self.branch_stack.append(.{}); @@ -6464,11 +6463,6 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { _ = try self.asmJmpReloc(jmp_target); - try self.ensureProcessDeathCapacity(liveness_loop.deaths.len); - for (liveness_loop.deaths) |operand| { - self.processDeath(operand); - } - return self.finishAirBookkeeping(); } diff --git a/src/codegen/c.zig b/src/codegen/c.zig index f659827f09..9a89574b38 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -4637,17 +4637,12 @@ fn airLoop(f: *Function, inst: Air.Inst.Index) !CValue { const ty_pl = f.air.instructions.items(.data)[inst].ty_pl; const loop = f.air.extraData(Air.Block, ty_pl.payload); const body = f.air.extra[loop.end..][0..loop.data.body_len]; - const liveness_loop = f.liveness.getLoop(inst); const writer = f.object.writer(); try writer.writeAll("for (;;) "); try genBody(f, body); try writer.writeByte('\n'); - for (liveness_loop.deaths) |operand| { - try die(f, inst, Air.indexToRef(operand)); - } - return .none; } diff --git a/src/print_air.zig b/src/print_air.zig index 2970d43925..103ba465bb 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -405,7 +405,6 @@ const Writer = struct { const ty_pl = w.air.instructions.items(.data)[inst].ty_pl; const extra = w.air.extraData(Air.Block, ty_pl.payload); const body = w.air.extra[extra.end..][0..extra.data.body_len]; - const liveness_loop = w.liveness.getLoop(inst); try w.writeType(s, w.air.getRefType(ty_pl.ty)); if (w.skip_body) return s.writeAll(", ..."); @@ -413,14 +412,6 @@ const Writer = struct { const old_indent = w.indent; w.indent += 2; try w.writeBody(s, body); - if (liveness_loop.deaths.len != 0) { - try s.writeByteNTimes(' ', w.indent); - for (liveness_loop.deaths, 0..) |operand, i| { - if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{operand}); - } - try s.writeAll("\n"); - } w.indent = old_indent; try s.writeByteNTimes(' ', w.indent); try s.writeAll("}");