diff --git a/ci/x86_64-linux-debug.sh b/ci/x86_64-linux-debug.sh
index 7204fa29f1..f849e9fca0 100755
--- a/ci/x86_64-linux-debug.sh
+++ b/ci/x86_64-linux-debug.sh
@@ -12,7 +12,7 @@ CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
-export PATH="$HOME/deps/wasmtime-v10.0.2-$ARCH-linux:$HOME/deps/qemu-linux-x86_64-9.1.0/bin:$HOME/local/bin:$PATH"
+export PATH="$HOME/deps/wasmtime-v10.0.2-$ARCH-linux:$HOME/deps/qemu-linux-x86_64-9.2.0-rc1/bin:$HOME/local/bin:$PATH"
 
 # Make the `zig version` number consistent.
 # This will affect the cmake command below.
@@ -64,7 +64,7 @@ stage3-debug/bin/zig build \
 
 stage3-debug/bin/zig build test docs \
   --maxrss 21000000000 \
-  -Dlldb=$HOME/deps/lldb-zig/Debug-6ece8bda1/bin/lldb \
+  -Dlldb=$HOME/deps/lldb-zig/Debug-bfeada333/bin/lldb \
   -fqemu \
   -fwasmtime \
   -Dstatic-llvm \
diff --git a/ci/x86_64-linux-release.sh b/ci/x86_64-linux-release.sh
index b51b6f12d6..4ab6b4810c 100755
--- a/ci/x86_64-linux-release.sh
+++ b/ci/x86_64-linux-release.sh
@@ -12,7 +12,7 @@ CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
-export PATH="$HOME/deps/wasmtime-v10.0.2-$ARCH-linux:$HOME/deps/qemu-linux-x86_64-9.1.0/bin:$HOME/local/bin:$PATH"
+export PATH="$HOME/deps/wasmtime-v10.0.2-$ARCH-linux:$HOME/deps/qemu-linux-x86_64-9.2.0-rc1/bin:$HOME/local/bin:$PATH"
 
 # Make the `zig version` number consistent.
 # This will affect the cmake command below.
@@ -64,7 +64,7 @@ stage3-release/bin/zig build \
 
 stage3-release/bin/zig build test docs \
   --maxrss 21000000000 \
-  -Dlldb=$HOME/deps/lldb-zig/Release-6ece8bda1/bin/lldb \
+  -Dlldb=$HOME/deps/lldb-zig/Release-bfeada333/bin/lldb \
   -fqemu \
   -fwasmtime \
   -Dstatic-llvm \
diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig
index 02d3f75c5b..82aeb7f88e 100644
--- a/lib/compiler_rt.zig
+++ b/lib/compiler_rt.zig
@@ -220,6 +220,7 @@ comptime {
     _ = @import("compiler_rt/aulldiv.zig");
     _ = @import("compiler_rt/aullrem.zig");
     _ = @import("compiler_rt/clear_cache.zig");
+    _ = @import("compiler_rt/hexagon.zig");
 
     if (@import("builtin").object_format != .c) {
         _ = @import("compiler_rt/atomics.zig");
diff --git a/lib/compiler_rt/count0bits.zig b/lib/compiler_rt/count0bits.zig
index 60da0390da..0045f5741f 100644
--- a/lib/compiler_rt/count0bits.zig
+++ b/lib/compiler_rt/count0bits.zig
@@ -73,13 +73,13 @@ fn __clzsi2_thumb1() callconv(.Naked) void {
         \\ subs r1, #4
         \\ movs r0, r2
         \\ 1:
-        \\ ldr r3, =LUT
+        \\ ldr r3, .lut
         \\ ldrb r0, [r3, r0]
         \\ subs r0, r1, r0
         \\ bx lr
         \\ .p2align 2
         \\ // Number of bits set in the 0-15 range
-        \\ LUT:
+        \\ .lut:
         \\ .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4
     );
 
diff --git a/lib/compiler_rt/hexagon.zig b/lib/compiler_rt/hexagon.zig
new file mode 100644
index 0000000000..de7fd96491
--- /dev/null
+++ b/lib/compiler_rt/hexagon.zig
@@ -0,0 +1,1787 @@
+const builtin = @import("builtin");
+const common = @import("./common.zig");
+
+fn __hexagon_divsi3() callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\   p0 = cmp.ge(r0,#0)
+        \\   p1 = cmp.ge(r1,#0)
+        \\   r1 = abs(r0)
+        \\   r2 = abs(r1)
+        \\  }
+        \\  {
+        \\   r3 = cl0(r1)
+        \\   r4 = cl0(r2)
+        \\   r5 = sub(r1,r2)
+        \\   p2 = cmp.gtu(r2,r1)
+        \\  }
+        \\  {
+        \\   r0 = #0
+        \\   p1 = xor(p0,p1)
+        \\   p0 = cmp.gtu(r2,r5)
+        \\   if (p2) jumpr r31
+        \\  }
+        \\
+        \\  {
+        \\   r0 = mux(p1,#-1,#1)
+        \\   if (p0) jumpr r31
+        \\   r4 = sub(r4,r3)
+        \\   r3 = #1
+        \\  }
+        \\  {
+        \\   r0 = #0
+        \\   r3:2 = vlslw(r3:2,r4)
+        \\   loop0(1f,r4)
+        \\  }
+        \\  .falign
+        \\ 1:
+        \\  {
+        \\   p0 = cmp.gtu(r2,r1)
+        \\   if (!p0.new) r1 = sub(r1,r2)
+        \\   if (!p0.new) r0 = add(r0,r3)
+        \\   r3:2 = vlsrw(r3:2,#1)
+        \\  }:endloop0
+        \\  {
+        \\   p0 = cmp.gtu(r2,r1)
+        \\   if (!p0.new) r0 = add(r0,r3)
+        \\   if (!p1) jumpr r31
+        \\  }
+        \\  {
+        \\   r0 = neg(r0)
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+fn __hexagon_umodsi3() callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\   r2 = cl0(r0)
+        \\   r3 = cl0(r1)
+        \\   p0 = cmp.gtu(r1,r0)
+        \\  }
+        \\  {
+        \\   r2 = sub(r3,r2)
+        \\   if (p0) jumpr r31
+        \\  }
+        \\  {
+        \\   loop0(1f,r2)
+        \\   p1 = cmp.eq(r2,#0)
+        \\   r2 = lsl(r1,r2)
+        \\  }
+        \\  .falign
+        \\ 1:
+        \\  {
+        \\   p0 = cmp.gtu(r2,r0)
+        \\   if (!p0.new) r0 = sub(r0,r2)
+        \\   r2 = lsr(r2,#1)
+        \\   if (p1) r1 = #0
+        \\  }:endloop0
+        \\  {
+        \\   p0 = cmp.gtu(r2,r0)
+        \\   if (!p0.new) r0 = sub(r0,r1)
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+fn __hexagon_sqrtf() callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\     r3,p0 = sfinvsqrta(r0)
+        \\     r5 = sffixupr(r0)
+        \\     r4 = ##0x3f000000
+        \\     r1:0 = combine(#0,#0)
+        \\   }
+        \\   {
+        \\     r0 += sfmpy(r3,r5):lib
+        \\     r1 += sfmpy(r3,r4):lib
+        \\     r2 = r4
+        \\     r3 = r5
+        \\   }
+        \\   {
+        \\     r2 -= sfmpy(r0,r1):lib
+        \\     p1 = sfclass(r5,#1)
+        \\
+        \\   }
+        \\   {
+        \\     r0 += sfmpy(r0,r2):lib
+        \\     r1 += sfmpy(r1,r2):lib
+        \\     r2 = r4
+        \\     r3 = r5
+        \\   }
+        \\   {
+        \\     r2 -= sfmpy(r0,r1):lib
+        \\     r3 -= sfmpy(r0,r0):lib
+        \\   }
+        \\   {
+        \\     r0 += sfmpy(r1,r3):lib
+        \\     r1 += sfmpy(r1,r2):lib
+        \\     r2 = r4
+        \\     r3 = r5
+        \\   }
+        \\   {
+        \\
+        \\     r3 -= sfmpy(r0,r0):lib
+        \\     if (p1) r0 = or(r0,r5)
+        \\   }
+        \\   {
+        \\     r0 += sfmpy(r1,r3,p0):scale
+        \\     jumpr r31
+        \\   }
+    );
+}
+
+fn __hexagon_moddi3() callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\   p3 = tstbit(r1,#31)
+        \\  }
+        \\  {
+        \\   r1:0 = abs(r1:0)
+        \\   r3:2 = abs(r3:2)
+        \\  }
+        \\  {
+        \\   r6 = cl0(r1:0)
+        \\   r7 = cl0(r3:2)
+        \\   r5:4 = r3:2
+        \\   r3:2 = r1:0
+        \\  }
+        \\  {
+        \\   r10 = sub(r7,r6)
+        \\   r1:0 = #0
+        \\   r15:14 = #1
+        \\  }
+        \\  {
+        \\   r11 = add(r10,#1)
+        \\   r13:12 = lsl(r5:4,r10)
+        \\   r15:14 = lsl(r15:14,r10)
+        \\  }
+        \\  {
+        \\   p0 = cmp.gtu(r5:4,r3:2)
+        \\   loop0(1f,r11)
+        \\  }
+        \\  {
+        \\   if (p0) jump .hexagon_moddi3_return
+        \\  }
+        \\  .falign
+        \\ 1:
+        \\  {
+        \\   p0 = cmp.gtu(r13:12,r3:2)
+        \\  }
+        \\  {
+        \\   r7:6 = sub(r3:2, r13:12)
+        \\   r9:8 = add(r1:0, r15:14)
+        \\  }
+        \\  {
+        \\   r1:0 = vmux(p0, r1:0, r9:8)
+        \\   r3:2 = vmux(p0, r3:2, r7:6)
+        \\  }
+        \\  {
+        \\   r15:14 = lsr(r15:14, #1)
+        \\   r13:12 = lsr(r13:12, #1)
+        \\  }:endloop0
+        \\
+        \\ .hexagon_moddi3_return:
+        \\  {
+        \\   r1:0 = neg(r3:2)
+        \\  }
+        \\  {
+        \\   r1:0 = vmux(p3,r1:0,r3:2)
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+fn __hexagon_divdi3() callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\   p2 = tstbit(r1,#31)
+        \\   p3 = tstbit(r3,#31)
+        \\  }
+        \\  {
+        \\   r1:0 = abs(r1:0)
+        \\   r3:2 = abs(r3:2)
+        \\  }
+        \\  {
+        \\   r6 = cl0(r1:0)
+        \\   r7 = cl0(r3:2)
+        \\   r5:4 = r3:2
+        \\   r3:2 = r1:0
+        \\  }
+        \\  {
+        \\   p3 = xor(p2,p3)
+        \\   r10 = sub(r7,r6)
+        \\   r1:0 = #0
+        \\   r15:14 = #1
+        \\  }
+        \\  {
+        \\   r11 = add(r10,#1)
+        \\   r13:12 = lsl(r5:4,r10)
+        \\   r15:14 = lsl(r15:14,r10)
+        \\  }
+        \\  {
+        \\   p0 = cmp.gtu(r5:4,r3:2)
+        \\   loop0(1f,r11)
+        \\  }
+        \\  {
+        \\   if (p0) jump .hexagon_divdi3_return
+        \\  }
+        \\  .falign
+        \\ 1:
+        \\  {
+        \\   p0 = cmp.gtu(r13:12,r3:2)
+        \\  }
+        \\  {
+        \\   r7:6 = sub(r3:2, r13:12)
+        \\   r9:8 = add(r1:0, r15:14)
+        \\  }
+        \\  {
+        \\   r1:0 = vmux(p0, r1:0, r9:8)
+        \\   r3:2 = vmux(p0, r3:2, r7:6)
+        \\  }
+        \\  {
+        \\   r15:14 = lsr(r15:14, #1)
+        \\   r13:12 = lsr(r13:12, #1)
+        \\  }:endloop0
+        \\
+        \\ .hexagon_divdi3_return:
+        \\  {
+        \\   r3:2 = neg(r1:0)
+        \\  }
+        \\  {
+        \\   r1:0 = vmux(p3,r3:2,r1:0)
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+fn __hexagon_divsf3() callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\     r2,p0 = sfrecipa(r0,r1)
+        \\     r4 = sffixupd(r0,r1)
+        \\     r3 = ##0x3f800000
+        \\   }
+        \\   {
+        \\     r5 = sffixupn(r0,r1)
+        \\     r3 -= sfmpy(r4,r2):lib
+        \\     r6 = ##0x80000000
+        \\     r7 = r3
+        \\   }
+        \\   {
+        \\     r2 += sfmpy(r3,r2):lib
+        \\     r3 = r7
+        \\     r6 = r5
+        \\     r0 = and(r6,r5)
+        \\   }
+        \\   {
+        \\     r3 -= sfmpy(r4,r2):lib
+        \\     r0 += sfmpy(r5,r2):lib
+        \\   }
+        \\   {
+        \\     r2 += sfmpy(r3,r2):lib
+        \\     r6 -= sfmpy(r0,r4):lib
+        \\   }
+        \\   {
+        \\     r0 += sfmpy(r6,r2):lib
+        \\   }
+        \\   {
+        \\     r5 -= sfmpy(r0,r4):lib
+        \\   }
+        \\   {
+        \\     r0 += sfmpy(r5,r2,p0):scale
+        \\     jumpr r31
+        \\   }
+    );
+}
+
+fn __hexagon_udivdi3() callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\   r6 = cl0(r1:0)
+        \\   r7 = cl0(r3:2)
+        \\   r5:4 = r3:2
+        \\   r3:2 = r1:0
+        \\  }
+        \\  {
+        \\   r10 = sub(r7,r6)
+        \\   r1:0 = #0
+        \\   r15:14 = #1
+        \\  }
+        \\  {
+        \\   r11 = add(r10,#1)
+        \\   r13:12 = lsl(r5:4,r10)
+        \\   r15:14 = lsl(r15:14,r10)
+        \\  }
+        \\  {
+        \\   p0 = cmp.gtu(r5:4,r3:2)
+        \\   loop0(1f,r11)
+        \\  }
+        \\  {
+        \\   if (p0) jumpr r31
+        \\  }
+        \\  .falign
+        \\ 1:
+        \\  {
+        \\   p0 = cmp.gtu(r13:12,r3:2)
+        \\  }
+        \\  {
+        \\   r7:6 = sub(r3:2, r13:12)
+        \\   r9:8 = add(r1:0, r15:14)
+        \\  }
+        \\  {
+        \\   r1:0 = vmux(p0, r1:0, r9:8)
+        \\   r3:2 = vmux(p0, r3:2, r7:6)
+        \\  }
+        \\  {
+        \\   r15:14 = lsr(r15:14, #1)
+        \\   r13:12 = lsr(r13:12, #1)
+        \\  }:endloop0
+        \\  {
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+fn __hexagon_umoddi3() callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\   r6 = cl0(r1:0)
+        \\   r7 = cl0(r3:2)
+        \\   r5:4 = r3:2
+        \\   r3:2 = r1:0
+        \\  }
+        \\  {
+        \\   r10 = sub(r7,r6)
+        \\   r1:0 = #0
+        \\   r15:14 = #1
+        \\  }
+        \\  {
+        \\   r11 = add(r10,#1)
+        \\   r13:12 = lsl(r5:4,r10)
+        \\   r15:14 = lsl(r15:14,r10)
+        \\  }
+        \\  {
+        \\   p0 = cmp.gtu(r5:4,r3:2)
+        \\   loop0(1f,r11)
+        \\  }
+        \\  {
+        \\   if (p0) jump .hexagon_umoddi3_return
+        \\  }
+        \\  .falign
+        \\ 1:
+        \\  {
+        \\   p0 = cmp.gtu(r13:12,r3:2)
+        \\  }
+        \\  {
+        \\   r7:6 = sub(r3:2, r13:12)
+        \\   r9:8 = add(r1:0, r15:14)
+        \\  }
+        \\  {
+        \\   r1:0 = vmux(p0, r1:0, r9:8)
+        \\   r3:2 = vmux(p0, r3:2, r7:6)
+        \\  }
+        \\  {
+        \\   r15:14 = lsr(r15:14, #1)
+        \\   r13:12 = lsr(r13:12, #1)
+        \\  }:endloop0
+        \\
+        \\ .hexagon_umoddi3_return:
+        \\  {
+        \\   r1:0 = r3:2
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+fn __hexagon_modsi3() callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\   p2 = cmp.ge(r0,#0)
+        \\   r2 = abs(r0)
+        \\   r1 = abs(r1)
+        \\  }
+        \\  {
+        \\   r3 = cl0(r2)
+        \\   r4 = cl0(r1)
+        \\   p0 = cmp.gtu(r1,r2)
+        \\  }
+        \\  {
+        \\   r3 = sub(r4,r3)
+        \\   if (p0) jumpr r31
+        \\  }
+        \\  {
+        \\   p1 = cmp.eq(r3,#0)
+        \\   loop0(1f,r3)
+        \\   r0 = r2
+        \\   r2 = lsl(r1,r3)
+        \\  }
+        \\  .falign
+        \\ 1:
+        \\  {
+        \\   p0 = cmp.gtu(r2,r0)
+        \\   if (!p0.new) r0 = sub(r0,r2)
+        \\   r2 = lsr(r2,#1)
+        \\   if (p1) r1 = #0
+        \\  }:endloop0
+        \\  {
+        \\   p0 = cmp.gtu(r2,r0)
+        \\   if (!p0.new) r0 = sub(r0,r1)
+        \\   if (p2) jumpr r31
+        \\  }
+        \\  {
+        \\   r0 = neg(r0)
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+fn __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes() callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\   p0 = bitsclr(r1,#7)
+        \\   p0 = bitsclr(r0,#7)
+        \\   if (p0.new) r5:4 = memd(r1)
+        \\   r3 = #-3
+        \\  }
+        \\  {
+        \\   if (!p0) jump .Lmemcpy_call
+        \\   if (p0) memd(r0++#8) = r5:4
+        \\   if (p0) r5:4 = memd(r1+#8)
+        \\   r3 += lsr(r2,#3)
+        \\  }
+        \\  {
+        \\   memd(r0++#8) = r5:4
+        \\   r5:4 = memd(r1+#16)
+        \\   r1 = add(r1,#24)
+        \\   loop0(1f,r3)
+        \\  }
+        \\  .falign
+        \\ 1:
+        \\  {
+        \\   memd(r0++#8) = r5:4
+        \\   r5:4 = memd(r1++#8)
+        \\  }:endloop0
+        \\  {
+        \\   memd(r0) = r5:4
+        \\   r0 -= add(r2,#-8)
+        \\   jumpr r31
+        \\  }
+        \\ .Lmemcpy_call:
+        \\      jump memcpy@PLT
+    );
+}
+
+fn __hexagon_udivsi3() callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\   r2 = cl0(r0)
+        \\   r3 = cl0(r1)
+        \\   r5:4 = combine(#1,#0)
+        \\   p0 = cmp.gtu(r1,r0)
+        \\  }
+        \\  {
+        \\   r6 = sub(r3,r2)
+        \\   r4 = r1
+        \\   r1:0 = combine(r0,r4)
+        \\   if (p0) jumpr r31
+        \\  }
+        \\  {
+        \\   r3:2 = vlslw(r5:4,r6)
+        \\   loop0(1f,r6)
+        \\  }
+        \\  .falign
+        \\ 1:
+        \\  {
+        \\   p0 = cmp.gtu(r2,r1)
+        \\   if (!p0.new) r1 = sub(r1,r2)
+        \\   if (!p0.new) r0 = add(r0,r3)
+        \\   r3:2 = vlsrw(r3:2,#1)
+        \\  }:endloop0
+        \\  {
+        \\   p0 = cmp.gtu(r2,r1)
+        \\   if (!p0.new) r0 = add(r0,r3)
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+fn __hexagon_adddf3() align(32) callconv(.naked) noreturn {
+    asm volatile (
+        \\  {
+        \\   r4 = extractu(r1,#11,#20)
+        \\   r5 = extractu(r3,#11,#20)
+        \\   r13:12 = combine(##0x20000000,#0)
+        \\  }
+        \\  {
+        \\   p3 = dfclass(r1:0,#2)
+        \\   p3 = dfclass(r3:2,#2)
+        \\   r9:8 = r13:12
+        \\   p2 = cmp.gtu(r5,r4)
+        \\  }
+        \\  {
+        \\   if (!p3) jump .Ladd_abnormal
+        \\   if (p2) r1:0 = r3:2
+        \\   if (p2) r3:2 = r1:0
+        \\   if (p2) r5:4 = combine(r4,r5)
+        \\  }
+        \\  {
+        \\   r13:12 = insert(r1:0,#52,#11 -2)
+        \\   r9:8 = insert(r3:2,#52,#11 -2)
+        \\   r15 = sub(r4,r5)
+        \\   r7:6 = combine(#62,#1)
+        \\  }
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\ .Ladd_continue:
+        \\  {
+        \\   r15 = min(r15,r7)
+        \\
+        \\   r11:10 = neg(r13:12)
+        \\   p2 = cmp.gt(r1,#-1)
+        \\   r14 = #0
+        \\  }
+        \\  {
+        \\   if (!p2) r13:12 = r11:10
+        \\   r11:10 = extractu(r9:8,r15:14)
+        \\   r9:8 = ASR(r9:8,r15)
+        \\
+        \\
+        \\
+        \\
+        \\   r15:14 = #0
+        \\  }
+        \\  {
+        \\   p1 = cmp.eq(r11:10,r15:14)
+        \\   if (!p1.new) r8 = or(r8,r6)
+        \\   r5 = add(r4,#-1024 -60)
+        \\   p3 = cmp.gt(r3,#-1)
+        \\  }
+        \\  {
+        \\   r13:12 = add(r13:12,r9:8)
+        \\   r11:10 = sub(r13:12,r9:8)
+        \\   r7:6 = combine(#54,##2045)
+        \\  }
+        \\  {
+        \\   p0 = cmp.gtu(r4,r7)
+        \\   p0 = !cmp.gtu(r4,r6)
+        \\   if (!p0.new) jump:nt .Ladd_ovf_unf
+        \\   if (!p3) r13:12 = r11:10
+        \\  }
+        \\  {
+        \\   r1:0 = convert_d2df(r13:12)
+        \\   p0 = cmp.eq(r13,#0)
+        \\   p0 = cmp.eq(r12,#0)
+        \\   if (p0.new) jump:nt .Ladd_zero
+        \\  }
+        \\  {
+        \\   r1 += asl(r5,#20)
+        \\   jumpr r31
+        \\  }
+        \\
+        \\  .falign
+        \\ .Ladd_zero:
+        \\
+        \\
+        \\  {
+        \\   r28 = USR
+        \\   r1:0 = #0
+        \\   r3 = #1
+        \\  }
+        \\  {
+        \\   r28 = extractu(r28,#2,#22)
+        \\   r3 = asl(r3,#31)
+        \\  }
+        \\  {
+        \\   p0 = cmp.eq(r28,#2)
+        \\   if (p0.new) r1 = xor(r1,r3)
+        \\   jumpr r31
+        \\  }
+        \\  .falign
+        \\ .Ladd_ovf_unf:
+        \\  {
+        \\   r1:0 = convert_d2df(r13:12)
+        \\   p0 = cmp.eq(r13,#0)
+        \\   p0 = cmp.eq(r12,#0)
+        \\   if (p0.new) jump:nt .Ladd_zero
+        \\  }
+        \\  {
+        \\   r28 = extractu(r1,#11,#20)
+        \\   r1 += asl(r5,#20)
+        \\  }
+        \\  {
+        \\   r5 = add(r5,r28)
+        \\   r3:2 = combine(##0x00100000,#0)
+        \\  }
+        \\  {
+        \\   p0 = cmp.gt(r5,##1024 +1024 -2)
+        \\   if (p0.new) jump:nt .Ladd_ovf
+        \\  }
+        \\  {
+        \\   p0 = cmp.gt(r5,#0)
+        \\   if (p0.new) jumpr:t r31
+        \\   r28 = sub(#1,r5)
+        \\  }
+        \\  {
+        \\   r3:2 = insert(r1:0,#52,#0)
+        \\   r1:0 = r13:12
+        \\  }
+        \\  {
+        \\   r3:2 = lsr(r3:2,r28)
+        \\  }
+        \\  {
+        \\   r1:0 = insert(r3:2,#63,#0)
+        \\   jumpr r31
+        \\  }
+        \\  .falign
+        \\ .Ladd_ovf:
+        \\
+        \\  {
+        \\   r1:0 = r13:12
+        \\   r28 = USR
+        \\   r13:12 = combine(##0x7fefffff,#-1)
+        \\  }
+        \\  {
+        \\   r5 = extractu(r28,#2,#22)
+        \\   r28 = or(r28,#0x28)
+        \\   r9:8 = combine(##0x7ff00000,#0)
+        \\  }
+        \\  {
+        \\   USR = r28
+        \\   r5 ^= lsr(r1,#31)
+        \\   r28 = r5
+        \\  }
+        \\  {
+        \\   p0 = !cmp.eq(r28,#1)
+        \\   p0 = !cmp.eq(r5,#2)
+        \\   if (p0.new) r13:12 = r9:8
+        \\  }
+        \\  {
+        \\   r1:0 = insert(r13:12,#63,#0)
+        \\  }
+        \\  {
+        \\   p0 = dfcmp.eq(r1:0,r1:0)
+        \\   jumpr r31
+        \\  }
+        \\
+        \\ .Ladd_abnormal:
+        \\  {
+        \\   r13:12 = extractu(r1:0,#63,#0)
+        \\   r9:8 = extractu(r3:2,#63,#0)
+        \\  }
+        \\  {
+        \\   p3 = cmp.gtu(r13:12,r9:8)
+        \\   if (!p3.new) r1:0 = r3:2
+        \\   if (!p3.new) r3:2 = r1:0
+        \\  }
+        \\  {
+        \\
+        \\   p0 = dfclass(r1:0,#0x0f)
+        \\   if (!p0.new) jump:nt .Linvalid_nan_add
+        \\   if (!p3) r13:12 = r9:8
+        \\   if (!p3) r9:8 = r13:12
+        \\  }
+        \\  {
+        \\
+        \\
+        \\   p1 = dfclass(r1:0,#0x08)
+        \\   if (p1.new) jump:nt .Linf_add
+        \\  }
+        \\  {
+        \\   p2 = dfclass(r3:2,#0x01)
+        \\   if (p2.new) jump:nt .LB_zero
+        \\   r13:12 = #0
+        \\  }
+        \\
+        \\  {
+        \\   p0 = dfclass(r1:0,#4)
+        \\   if (p0.new) jump:nt .Ladd_two_subnormal
+        \\   r13:12 = combine(##0x20000000,#0)
+        \\  }
+        \\  {
+        \\   r4 = extractu(r1,#11,#20)
+        \\   r5 = #1
+        \\
+        \\   r9:8 = asl(r9:8,#11 -2)
+        \\  }
+        \\
+        \\
+        \\
+        \\  {
+        \\   r13:12 = insert(r1:0,#52,#11 -2)
+        \\   r15 = sub(r4,r5)
+        \\   r7:6 = combine(#62,#1)
+        \\   jump .Ladd_continue
+        \\  }
+        \\
+        \\ .Ladd_two_subnormal:
+        \\  {
+        \\   r13:12 = extractu(r1:0,#63,#0)
+        \\   r9:8 = extractu(r3:2,#63,#0)
+        \\  }
+        \\  {
+        \\   r13:12 = neg(r13:12)
+        \\   r9:8 = neg(r9:8)
+        \\   p0 = cmp.gt(r1,#-1)
+        \\   p1 = cmp.gt(r3,#-1)
+        \\  }
+        \\  {
+        \\   if (p0) r13:12 = r1:0
+        \\   if (p1) r9:8 = r3:2
+        \\  }
+        \\  {
+        \\   r13:12 = add(r13:12,r9:8)
+        \\  }
+        \\  {
+        \\   r9:8 = neg(r13:12)
+        \\   p0 = cmp.gt(r13,#-1)
+        \\   r3:2 = #0
+        \\  }
+        \\  {
+        \\   if (!p0) r1:0 = r9:8
+        \\   if (p0) r1:0 = r13:12
+        \\   r3 = ##0x80000000
+        \\  }
+        \\  {
+        \\   if (!p0) r1 = or(r1,r3)
+        \\   p0 = dfcmp.eq(r1:0,r3:2)
+        \\   if (p0.new) jump:nt .Lzero_plus_zero
+        \\  }
+        \\  {
+        \\   jumpr r31
+        \\  }
+        \\
+        \\ .Linvalid_nan_add:
+        \\  {
+        \\   r28 = convert_df2sf(r1:0)
+        \\   p0 = dfclass(r3:2,#0x0f)
+        \\   if (p0.new) r3:2 = r1:0
+        \\  }
+        \\  {
+        \\   r2 = convert_df2sf(r3:2)
+        \\   r1:0 = #-1
+        \\   jumpr r31
+        \\  }
+        \\  .falign
+        \\ .LB_zero:
+        \\  {
+        \\   p0 = dfcmp.eq(r13:12,r1:0)
+        \\   if (!p0.new) jumpr:t r31
+        \\  }
+        \\
+        \\
+        \\
+        \\
+        \\ .Lzero_plus_zero:
+        \\  {
+        \\   p0 = cmp.eq(r1:0,r3:2)
+        \\   if (p0.new) jumpr:t r31
+        \\  }
+        \\  {
+        \\   r28 = USR
+        \\  }
+        \\  {
+        \\   r28 = extractu(r28,#2,#22)
+        \\   r1:0 = #0
+        \\  }
+        \\  {
+        \\   p0 = cmp.eq(r28,#2)
+        \\   if (p0.new) r1 = ##0x80000000
+        \\   jumpr r31
+        \\  }
+        \\ .Linf_add:
+        \\
+        \\  {
+        \\   p0 = !cmp.eq(r1,r3)
+        \\   p0 = dfclass(r3:2,#8)
+        \\   if (!p0.new) jumpr:t r31
+        \\  }
+        \\  {
+        \\   r2 = ##0x7f800001
+        \\  }
+        \\  {
+        \\   r1:0 = convert_sf2df(r2)
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+fn __hexagon_subdf3() align(32) callconv(.naked) noreturn {
+    asm volatile (
+        \\ {
+        \\   r3 = togglebit(r3,#31)
+        \\   jump ##__hexagon_adddf3
+        \\ }
+    );
+}
+
+fn __hexagon_divdf3() align(32) callconv(.naked) noreturn {
+    asm volatile (
+        \\  {
+        \\   p2 = dfclass(r1:0,#0x02)
+        \\   p2 = dfclass(r3:2,#0x02)
+        \\   r13:12 = combine(r3,r1)
+        \\   r28 = xor(r1,r3)
+        \\  }
+        \\  {
+        \\   if (!p2) jump .Ldiv_abnormal
+        \\   r7:6 = extractu(r3:2,#23,#52 -23)
+        \\   r8 = ##0x3f800001
+        \\  }
+        \\  {
+        \\   r9 = or(r8,r6)
+        \\   r13 = extractu(r13,#11,#52 -32)
+        \\   r12 = extractu(r12,#11,#52 -32)
+        \\   p3 = cmp.gt(r28,#-1)
+        \\  }
+        \\
+        \\
+        \\ .Ldenorm_continue:
+        \\  {
+        \\   r11,p0 = sfrecipa(r8,r9)
+        \\   r10 = and(r8,#-2)
+        \\   r28 = #1
+        \\   r12 = sub(r12,r13)
+        \\  }
+        \\
+        \\
+        \\  {
+        \\   r10 -= sfmpy(r11,r9):lib
+        \\   r1 = insert(r28,#11 +1,#52 -32)
+        \\   r13 = ##0x00800000 << 3
+        \\  }
+        \\  {
+        \\   r11 += sfmpy(r11,r10):lib
+        \\   r3 = insert(r28,#11 +1,#52 -32)
+        \\   r10 = and(r8,#-2)
+        \\  }
+        \\  {
+        \\   r10 -= sfmpy(r11,r9):lib
+        \\   r5 = #-0x3ff +1
+        \\   r4 = #0x3ff -1
+        \\  }
+        \\  {
+        \\   r11 += sfmpy(r11,r10):lib
+        \\   p1 = cmp.gt(r12,r5)
+        \\   p1 = !cmp.gt(r12,r4)
+        \\  }
+        \\  {
+        \\   r13 = insert(r11,#23,#3)
+        \\   r5:4 = #0
+        \\   r12 = add(r12,#-61)
+        \\  }
+        \\
+        \\
+        \\
+        \\
+        \\  {
+        \\   r13 = add(r13,#((-3) << 3))
+        \\  }
+        \\  { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASL(r7:6, # ( 14 )); r1:0 -= asl(r15:14, # 32); }
+        \\  { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 1 )); r1:0 -= asl(r15:14, # 32); }
+        \\  { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 16 )); r1:0 -= asl(r15:14, # 32); }
+        \\  { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 31 )); r1:0 -= asl(r15:14, # 32); r7:6=# ( 0 ); }
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\  {
+        \\
+        \\   r15:14 = sub(r1:0,r3:2)
+        \\   p0 = cmp.gtu(r3:2,r1:0)
+        \\
+        \\   if (!p0.new) r6 = #2
+        \\  }
+        \\  {
+        \\   r5:4 = add(r5:4,r7:6)
+        \\   if (!p0) r1:0 = r15:14
+        \\   r15:14 = #0
+        \\  }
+        \\  {
+        \\   p0 = cmp.eq(r1:0,r15:14)
+        \\   if (!p0.new) r4 = or(r4,r28)
+        \\  }
+        \\  {
+        \\   r7:6 = neg(r5:4)
+        \\  }
+        \\  {
+        \\   if (!p3) r5:4 = r7:6
+        \\  }
+        \\  {
+        \\   r1:0 = convert_d2df(r5:4)
+        \\   if (!p1) jump .Ldiv_ovf_unf
+        \\  }
+        \\  {
+        \\   r1 += asl(r12,#52 -32)
+        \\   jumpr r31
+        \\  }
+        \\
+        \\ .Ldiv_ovf_unf:
+        \\  {
+        \\   r1 += asl(r12,#52 -32)
+        \\   r13 = extractu(r1,#11,#52 -32)
+        \\  }
+        \\  {
+        \\   r7:6 = abs(r5:4)
+        \\   r12 = add(r12,r13)
+        \\  }
+        \\  {
+        \\   p0 = cmp.gt(r12,##0x3ff +0x3ff)
+        \\   if (p0.new) jump:nt .Ldiv_ovf
+        \\  }
+        \\  {
+        \\   p0 = cmp.gt(r12,#0)
+        \\   if (p0.new) jump:nt .Ldiv_possible_unf
+        \\  }
+        \\  {
+        \\   r13 = add(clb(r7:6),#-1)
+        \\   r12 = sub(#7,r12)
+        \\   r10 = USR
+        \\   r11 = #63
+        \\  }
+        \\  {
+        \\   r13 = min(r12,r11)
+        \\   r11 = or(r10,#0x030)
+        \\   r7:6 = asl(r7:6,r13)
+        \\   r12 = #0
+        \\  }
+        \\  {
+        \\   r15:14 = extractu(r7:6,r13:12)
+        \\   r7:6 = lsr(r7:6,r13)
+        \\   r3:2 = #1
+        \\  }
+        \\  {
+        \\   p0 = cmp.gtu(r3:2,r15:14)
+        \\   if (!p0.new) r6 = or(r2,r6)
+        \\   r7 = setbit(r7,#52 -32+4)
+        \\  }
+        \\  {
+        \\   r5:4 = neg(r7:6)
+        \\   p0 = bitsclr(r6,#(1<<4)-1)
+        \\   if (!p0.new) r10 = r11
+        \\  }
+        \\  {
+        \\   USR = r10
+        \\   if (p3) r5:4 = r7:6
+        \\   r10 = #-0x3ff -(52 +4)
+        \\  }
+        \\  {
+        \\   r1:0 = convert_d2df(r5:4)
+        \\  }
+        \\  {
+        \\   r1 += asl(r10,#52 -32)
+        \\   jumpr r31
+        \\  }
+        \\
+        \\
+        \\ .Ldiv_possible_unf:
+        \\
+        \\
+        \\  {
+        \\   r3:2 = extractu(r1:0,#63,#0)
+        \\   r15:14 = combine(##0x00100000,#0)
+        \\   r10 = #0x7FFF
+        \\  }
+        \\  {
+        \\   p0 = dfcmp.eq(r15:14,r3:2)
+        \\   p0 = bitsset(r7,r10)
+        \\  }
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\  {
+        \\   if (!p0) jumpr r31
+        \\   r10 = USR
+        \\  }
+        \\
+        \\  {
+        \\   r10 = or(r10,#0x30)
+        \\  }
+        \\  {
+        \\   USR = r10
+        \\  }
+        \\  {
+        \\   p0 = dfcmp.eq(r1:0,r1:0)
+        \\   jumpr r31
+        \\  }
+        \\
+        \\ .Ldiv_ovf:
+        \\
+        \\
+        \\
+        \\  {
+        \\   r10 = USR
+        \\   r3:2 = combine(##0x7fefffff,#-1)
+        \\   r1 = mux(p3,#0,#-1)
+        \\  }
+        \\  {
+        \\   r7:6 = combine(##0x7ff00000,#0)
+        \\   r5 = extractu(r10,#2,#22)
+        \\   r10 = or(r10,#0x28)
+        \\  }
+        \\  {
+        \\   USR = r10
+        \\   r5 ^= lsr(r1,#31)
+        \\   r4 = r5
+        \\  }
+        \\  {
+        \\   p0 = !cmp.eq(r4,#1)
+        \\   p0 = !cmp.eq(r5,#2)
+        \\   if (p0.new) r3:2 = r7:6
+        \\   p0 = dfcmp.eq(r3:2,r3:2)
+        \\  }
+        \\  {
+        \\   r1:0 = insert(r3:2,#63,#0)
+        \\   jumpr r31
+        \\  }
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\ .Ldiv_abnormal:
+        \\  {
+        \\   p0 = dfclass(r1:0,#0x0F)
+        \\   p0 = dfclass(r3:2,#0x0F)
+        \\   p3 = cmp.gt(r28,#-1)
+        \\  }
+        \\  {
+        \\   p1 = dfclass(r1:0,#0x08)
+        \\   p1 = dfclass(r3:2,#0x08)
+        \\  }
+        \\  {
+        \\   p2 = dfclass(r1:0,#0x01)
+        \\   p2 = dfclass(r3:2,#0x01)
+        \\  }
+        \\  {
+        \\   if (!p0) jump .Ldiv_nan
+        \\   if (p1) jump .Ldiv_invalid
+        \\  }
+        \\  {
+        \\   if (p2) jump .Ldiv_invalid
+        \\  }
+        \\  {
+        \\   p2 = dfclass(r1:0,#(0x0F ^ 0x01))
+        \\   p2 = dfclass(r3:2,#(0x0F ^ 0x08))
+        \\  }
+        \\  {
+        \\   p1 = dfclass(r1:0,#(0x0F ^ 0x08))
+        \\   p1 = dfclass(r3:2,#(0x0F ^ 0x01))
+        \\  }
+        \\  {
+        \\   if (!p2) jump .Ldiv_zero_result
+        \\   if (!p1) jump .Ldiv_inf_result
+        \\  }
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\  {
+        \\   p0 = dfclass(r1:0,#0x02)
+        \\   p1 = dfclass(r3:2,#0x02)
+        \\   r10 = ##0x00100000
+        \\  }
+        \\  {
+        \\   r13:12 = combine(r3,r1)
+        \\   r1 = insert(r10,#11 +1,#52 -32)
+        \\   r3 = insert(r10,#11 +1,#52 -32)
+        \\  }
+        \\  {
+        \\   if (p0) r1 = or(r1,r10)
+        \\   if (p1) r3 = or(r3,r10)
+        \\  }
+        \\  {
+        \\   r5 = add(clb(r1:0),#-11)
+        \\   r4 = add(clb(r3:2),#-11)
+        \\   r10 = #1
+        \\  }
+        \\  {
+        \\   r12 = extractu(r12,#11,#52 -32)
+        \\   r13 = extractu(r13,#11,#52 -32)
+        \\  }
+        \\  {
+        \\   r1:0 = asl(r1:0,r5)
+        \\   r3:2 = asl(r3:2,r4)
+        \\   if (!p0) r12 = sub(r10,r5)
+        \\   if (!p1) r13 = sub(r10,r4)
+        \\  }
+        \\  {
+        \\   r7:6 = extractu(r3:2,#23,#52 -23)
+        \\  }
+        \\  {
+        \\   r9 = or(r8,r6)
+        \\   jump .Ldenorm_continue
+        \\  }
+        \\
+        \\ .Ldiv_zero_result:
+        \\  {
+        \\   r1 = xor(r1,r3)
+        \\   r3:2 = #0
+        \\  }
+        \\  {
+        \\   r1:0 = insert(r3:2,#63,#0)
+        \\   jumpr r31
+        \\  }
+        \\ .Ldiv_inf_result:
+        \\  {
+        \\   p2 = dfclass(r3:2,#0x01)
+        \\   p2 = dfclass(r1:0,#(0x0F ^ 0x08))
+        \\  }
+        \\  {
+        \\   r10 = USR
+        \\   if (!p2) jump 1f
+        \\   r1 = xor(r1,r3)
+        \\  }
+        \\  {
+        \\   r10 = or(r10,#0x04)
+        \\  }
+        \\  {
+        \\   USR = r10
+        \\  }
+        \\ 1:
+        \\  {
+        \\   r3:2 = combine(##0x7ff00000,#0)
+        \\   p0 = dfcmp.uo(r3:2,r3:2)
+        \\  }
+        \\  {
+        \\   r1:0 = insert(r3:2,#63,#0)
+        \\   jumpr r31
+        \\  }
+        \\ .Ldiv_nan:
+        \\  {
+        \\   p0 = dfclass(r1:0,#0x10)
+        \\   p1 = dfclass(r3:2,#0x10)
+        \\   if (!p0.new) r1:0 = r3:2
+        \\   if (!p1.new) r3:2 = r1:0
+        \\  }
+        \\  {
+        \\   r5 = convert_df2sf(r1:0)
+        \\   r4 = convert_df2sf(r3:2)
+        \\  }
+        \\  {
+        \\   r1:0 = #-1
+        \\   jumpr r31
+        \\  }
+        \\
+        \\ .Ldiv_invalid:
+        \\  {
+        \\   r10 = ##0x7f800001
+        \\  }
+        \\  {
+        \\   r1:0 = convert_sf2df(r10)
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+fn __hexagon_muldf3() align(32) callconv(.naked) noreturn {
+    asm volatile (
+        \\  {
+        \\   p0 = dfclass(r1:0,#2)
+        \\   p0 = dfclass(r3:2,#2)
+        \\   r13:12 = combine(##0x40000000,#0)
+        \\  }
+        \\  {
+        \\   r13:12 = insert(r1:0,#52,#11 -1)
+        \\   r5:4 = asl(r3:2,#11 -1)
+        \\   r28 = #-1024
+        \\   r9:8 = #1
+        \\  }
+        \\  {
+        \\   r7:6 = mpyu(r4,r13)
+        \\   r5:4 = insert(r9:8,#2,#62)
+        \\  }
+        \\
+        \\
+        \\
+        \\
+        \\  {
+        \\   r15:14 = mpyu(r12,r4)
+        \\   r7:6 += mpyu(r12,r5)
+        \\  }
+        \\  {
+        \\   r7:6 += lsr(r15:14,#32)
+        \\   r11:10 = mpyu(r13,r5)
+        \\   r5:4 = combine(##1024 +1024 -4,#0)
+        \\  }
+        \\  {
+        \\   r11:10 += lsr(r7:6,#32)
+        \\   if (!p0) jump .Lmul_abnormal
+        \\   p1 = cmp.eq(r14,#0)
+        \\   p1 = cmp.eq(r6,#0)
+        \\  }
+        \\  {
+        \\   if (!p1) r10 = or(r10,r8)
+        \\   r6 = extractu(r1,#11,#20)
+        \\   r7 = extractu(r3,#11,#20)
+        \\  }
+        \\  {
+        \\   r15:14 = neg(r11:10)
+        \\   r6 += add(r28,r7)
+        \\   r28 = xor(r1,r3)
+        \\  }
+        \\  {
+        \\   if (!p2.new) r11:10 = r15:14
+        \\   p2 = cmp.gt(r28,#-1)
+        \\   p0 = !cmp.gt(r6,r5)
+        \\   p0 = cmp.gt(r6,r4)
+        \\   if (!p0.new) jump:nt .Lmul_ovf_unf
+        \\  }
+        \\  {
+        \\   r1:0 = convert_d2df(r11:10)
+        \\   r6 = add(r6,#-1024 -58)
+        \\  }
+        \\  {
+        \\   r1 += asl(r6,#20)
+        \\   jumpr r31
+        \\  }
+        \\
+        \\  .falign
+        \\ .Lmul_possible_unf:
+        \\  {
+        \\   p0 = cmp.eq(r0,#0)
+        \\   p0 = bitsclr(r1,r4)
+        \\   if (!p0.new) jumpr:t r31
+        \\   r5 = #0x7fff
+        \\  }
+        \\  {
+        \\   p0 = bitsset(r13,r5)
+        \\   r4 = USR
+        \\   r5 = #0x030
+        \\  }
+        \\  {
+        \\   if (p0) r4 = or(r4,r5)
+        \\  }
+        \\  {
+        \\   USR = r4
+        \\  }
+        \\  {
+        \\   p0 = dfcmp.eq(r1:0,r1:0)
+        \\   jumpr r31
+        \\  }
+        \\  .falign
+        \\ .Lmul_ovf_unf:
+        \\  {
+        \\   r1:0 = convert_d2df(r11:10)
+        \\   r13:12 = abs(r11:10)
+        \\   r7 = add(r6,#-1024 -58)
+        \\  }
+        \\  {
+        \\   r1 += asl(r7,#20)
+        \\   r7 = extractu(r1,#11,#20)
+        \\   r4 = ##0x7FEFFFFF
+        \\  }
+        \\  {
+        \\   r7 += add(r6,##-1024 -58)
+        \\
+        \\   r5 = #0
+        \\  }
+        \\  {
+        \\   p0 = cmp.gt(r7,##1024 +1024 -2)
+        \\   if (p0.new) jump:nt .Lmul_ovf
+        \\  }
+        \\  {
+        \\   p0 = cmp.gt(r7,#0)
+        \\   if (p0.new) jump:nt .Lmul_possible_unf
+        \\   r5 = sub(r6,r5)
+        \\   r28 = #63
+        \\  }
+        \\  {
+        \\   r4 = #0
+        \\   r5 = sub(#5,r5)
+        \\  }
+        \\  {
+        \\   p3 = cmp.gt(r11,#-1)
+        \\   r5 = min(r5,r28)
+        \\   r11:10 = r13:12
+        \\  }
+        \\  {
+        \\   r28 = USR
+        \\   r15:14 = extractu(r11:10,r5:4)
+        \\  }
+        \\  {
+        \\   r11:10 = asr(r11:10,r5)
+        \\   r4 = #0x0030
+        \\   r1 = insert(r9,#11,#20)
+        \\  }
+        \\  {
+        \\   p0 = cmp.gtu(r9:8,r15:14)
+        \\   if (!p0.new) r10 = or(r10,r8)
+        \\   r11 = setbit(r11,#20 +3)
+        \\  }
+        \\  {
+        \\   r15:14 = neg(r11:10)
+        \\   p1 = bitsclr(r10,#0x7)
+        \\   if (!p1.new) r28 = or(r4,r28)
+        \\  }
+        \\  {
+        \\   if (!p3) r11:10 = r15:14
+        \\   USR = r28
+        \\  }
+        \\  {
+        \\   r1:0 = convert_d2df(r11:10)
+        \\   p0 = dfcmp.eq(r1:0,r1:0)
+        \\  }
+        \\  {
+        \\   r1 = insert(r9,#11 -1,#20 +1)
+        \\   jumpr r31
+        \\  }
+        \\  .falign
+        \\ .Lmul_ovf:
+        \\
+        \\  {
+        \\   r28 = USR
+        \\   r13:12 = combine(##0x7fefffff,#-1)
+        \\   r1:0 = r11:10
+        \\  }
+        \\  {
+        \\   r14 = extractu(r28,#2,#22)
+        \\   r28 = or(r28,#0x28)
+        \\   r5:4 = combine(##0x7ff00000,#0)
+        \\  }
+        \\  {
+        \\   USR = r28
+        \\   r14 ^= lsr(r1,#31)
+        \\   r28 = r14
+        \\  }
+        \\  {
+        \\   p0 = !cmp.eq(r28,#1)
+        \\   p0 = !cmp.eq(r14,#2)
+        \\   if (p0.new) r13:12 = r5:4
+        \\   p0 = dfcmp.eq(r1:0,r1:0)
+        \\  }
+        \\  {
+        \\   r1:0 = insert(r13:12,#63,#0)
+        \\   jumpr r31
+        \\  }
+        \\
+        \\ .Lmul_abnormal:
+        \\  {
+        \\   r13:12 = extractu(r1:0,#63,#0)
+        \\   r5:4 = extractu(r3:2,#63,#0)
+        \\  }
+        \\  {
+        \\   p3 = cmp.gtu(r13:12,r5:4)
+        \\   if (!p3.new) r1:0 = r3:2
+        \\   if (!p3.new) r3:2 = r1:0
+        \\  }
+        \\  {
+        \\
+        \\   p0 = dfclass(r1:0,#0x0f)
+        \\   if (!p0.new) jump:nt .Linvalid_nan
+        \\   if (!p3) r13:12 = r5:4
+        \\   if (!p3) r5:4 = r13:12
+        \\  }
+        \\  {
+        \\
+        \\   p1 = dfclass(r1:0,#0x08)
+        \\   p1 = dfclass(r3:2,#0x0e)
+        \\  }
+        \\  {
+        \\
+        \\
+        \\   p0 = dfclass(r1:0,#0x08)
+        \\   p0 = dfclass(r3:2,#0x01)
+        \\  }
+        \\  {
+        \\   if (p1) jump .Ltrue_inf
+        \\   p2 = dfclass(r3:2,#0x01)
+        \\  }
+        \\  {
+        \\   if (p0) jump .Linvalid_zeroinf
+        \\   if (p2) jump .Ltrue_zero
+        \\   r28 = ##0x7c000000
+        \\  }
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\  {
+        \\   p0 = bitsclr(r1,r28)
+        \\   if (p0.new) jump:nt .Lmul_tiny
+        \\  }
+        \\  {
+        \\   r28 = cl0(r5:4)
+        \\  }
+        \\  {
+        \\   r28 = add(r28,#-11)
+        \\  }
+        \\  {
+        \\   r5:4 = asl(r5:4,r28)
+        \\  }
+        \\  {
+        \\   r3:2 = insert(r5:4,#63,#0)
+        \\   r1 -= asl(r28,#20)
+        \\  }
+        \\  jump __hexagon_muldf3
+        \\ .Lmul_tiny:
+        \\  {
+        \\   r28 = USR
+        \\   r1:0 = xor(r1:0,r3:2)
+        \\  }
+        \\  {
+        \\   r28 = or(r28,#0x30)
+        \\   r1:0 = insert(r9:8,#63,#0)
+        \\   r5 = extractu(r28,#2,#22)
+        \\  }
+        \\  {
+        \\   USR = r28
+        \\   p0 = cmp.gt(r5,#1)
+        \\   if (!p0.new) r0 = #0
+        \\   r5 ^= lsr(r1,#31)
+        \\  }
+        \\  {
+        \\   p0 = cmp.eq(r5,#3)
+        \\   if (!p0.new) r0 = #0
+        \\   jumpr r31
+        \\  }
+        \\ .Linvalid_zeroinf:
+        \\  {
+        \\   r28 = USR
+        \\  }
+        \\  {
+        \\   r1:0 = #-1
+        \\   r28 = or(r28,#2)
+        \\  }
+        \\  {
+        \\   USR = r28
+        \\  }
+        \\  {
+        \\   p0 = dfcmp.uo(r1:0,r1:0)
+        \\   jumpr r31
+        \\  }
+        \\ .Linvalid_nan:
+        \\  {
+        \\   p0 = dfclass(r3:2,#0x0f)
+        \\   r28 = convert_df2sf(r1:0)
+        \\   if (p0.new) r3:2 = r1:0
+        \\  }
+        \\  {
+        \\   r2 = convert_df2sf(r3:2)
+        \\   r1:0 = #-1
+        \\   jumpr r31
+        \\  }
+        \\  .falign
+        \\ .Ltrue_zero:
+        \\  {
+        \\   r1:0 = r3:2
+        \\   r3:2 = r1:0
+        \\  }
+        \\ .Ltrue_inf:
+        \\  {
+        \\   r3 = extract(r3,#1,#31)
+        \\  }
+        \\  {
+        \\   r1 ^= asl(r3,#31)
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+fn __hexagon_sqrtdf2() align(32) callconv(.naked) noreturn {
+    asm volatile (
+        \\  {
+        \\   r15:14 = extractu(r1:0,#23 +1,#52 -23)
+        \\   r28 = extractu(r1,#11,#52 -32)
+        \\   r5:4 = combine(##0x3f000004,#1)
+        \\  }
+        \\  {
+        \\   p2 = dfclass(r1:0,#0x02)
+        \\   p2 = cmp.gt(r1,#-1)
+        \\   if (!p2.new) jump:nt .Lsqrt_abnormal
+        \\   r9 = or(r5,r14)
+        \\  }
+        \\
+        \\ .Ldenormal_restart:
+        \\  {
+        \\   r11:10 = r1:0
+        \\   r7,p0 = sfinvsqrta(r9)
+        \\   r5 = and(r5,#-16)
+        \\   r3:2 = #0
+        \\  }
+        \\  {
+        \\   r3 += sfmpy(r7,r9):lib
+        \\   r2 += sfmpy(r7,r5):lib
+        \\   r6 = r5
+        \\
+        \\
+        \\   r9 = and(r28,#1)
+        \\  }
+        \\  {
+        \\   r6 -= sfmpy(r3,r2):lib
+        \\   r11 = insert(r4,#11 +1,#52 -32)
+        \\   p1 = cmp.gtu(r9,#0)
+        \\  }
+        \\  {
+        \\   r3 += sfmpy(r3,r6):lib
+        \\   r2 += sfmpy(r2,r6):lib
+        \\   r6 = r5
+        \\   r9 = mux(p1,#8,#9)
+        \\  }
+        \\  {
+        \\   r6 -= sfmpy(r3,r2):lib
+        \\   r11:10 = asl(r11:10,r9)
+        \\   r9 = mux(p1,#3,#2)
+        \\  }
+        \\  {
+        \\   r2 += sfmpy(r2,r6):lib
+        \\
+        \\   r15:14 = asl(r11:10,r9)
+        \\  }
+        \\  {
+        \\   r2 = and(r2,##0x007fffff)
+        \\  }
+        \\  {
+        \\   r2 = add(r2,##0x00800000 - 3)
+        \\   r9 = mux(p1,#7,#8)
+        \\  }
+        \\  {
+        \\   r8 = asl(r2,r9)
+        \\   r9 = mux(p1,#15-(1+1),#15-(1+0))
+        \\  }
+        \\  {
+        \\   r13:12 = mpyu(r8,r15)
+        \\  }
+        \\  {
+        \\   r1:0 = asl(r11:10,#15)
+        \\   r15:14 = mpyu(r13,r13)
+        \\   p1 = cmp.eq(r0,r0)
+        \\  }
+        \\  {
+        \\   r1:0 -= asl(r15:14,#15)
+        \\   r15:14 = mpyu(r13,r12)
+        \\   p2 = cmp.eq(r0,r0)
+        \\  }
+        \\  {
+        \\   r1:0 -= lsr(r15:14,#16)
+        \\   p3 = cmp.eq(r0,r0)
+        \\  }
+        \\  {
+        \\   r1:0 = mpyu(r1,r8)
+        \\  }
+        \\  {
+        \\   r13:12 += lsr(r1:0,r9)
+        \\   r9 = add(r9,#16)
+        \\   r1:0 = asl(r11:10,#31)
+        \\  }
+        \\
+        \\  {
+        \\   r15:14 = mpyu(r13,r13)
+        \\   r1:0 -= mpyu(r13,r12)
+        \\  }
+        \\  {
+        \\   r1:0 -= asl(r15:14,#31)
+        \\   r15:14 = mpyu(r12,r12)
+        \\  }
+        \\  {
+        \\   r1:0 -= lsr(r15:14,#33)
+        \\  }
+        \\  {
+        \\   r1:0 = mpyu(r1,r8)
+        \\  }
+        \\  {
+        \\   r13:12 += lsr(r1:0,r9)
+        \\   r9 = add(r9,#16)
+        \\   r1:0 = asl(r11:10,#47)
+        \\  }
+        \\
+        \\  {
+        \\   r15:14 = mpyu(r13,r13)
+        \\  }
+        \\  {
+        \\   r1:0 -= asl(r15:14,#47)
+        \\   r15:14 = mpyu(r13,r12)
+        \\  }
+        \\  {
+        \\   r1:0 -= asl(r15:14,#16)
+        \\   r15:14 = mpyu(r12,r12)
+        \\  }
+        \\  {
+        \\   r1:0 -= lsr(r15:14,#17)
+        \\  }
+        \\  {
+        \\   r1:0 = mpyu(r1,r8)
+        \\  }
+        \\  {
+        \\   r13:12 += lsr(r1:0,r9)
+        \\  }
+        \\  {
+        \\   r3:2 = mpyu(r13,r12)
+        \\   r5:4 = mpyu(r12,r12)
+        \\   r15:14 = #0
+        \\   r1:0 = #0
+        \\  }
+        \\  {
+        \\   r3:2 += lsr(r5:4,#33)
+        \\   r5:4 += asl(r3:2,#33)
+        \\   p1 = cmp.eq(r0,r0)
+        \\  }
+        \\  {
+        \\   r7:6 = mpyu(r13,r13)
+        \\   r1:0 = sub(r1:0,r5:4,p1):carry
+        \\   r9:8 = #1
+        \\  }
+        \\  {
+        \\   r7:6 += lsr(r3:2,#31)
+        \\   r9:8 += asl(r13:12,#1)
+        \\  }
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\  {
+        \\   r15:14 = sub(r11:10,r7:6,p1):carry
+        \\   r5:4 = sub(r1:0,r9:8,p2):carry
+        \\
+        \\
+        \\
+        \\
+        \\   r7:6 = #1
+        \\   r11:10 = #0
+        \\  }
+        \\  {
+        \\   r3:2 = sub(r15:14,r11:10,p2):carry
+        \\   r7:6 = add(r13:12,r7:6)
+        \\   r28 = add(r28,#-0x3ff)
+        \\  }
+        \\  {
+        \\
+        \\   if (p2) r13:12 = r7:6
+        \\   if (p2) r1:0 = r5:4
+        \\   if (p2) r15:14 = r3:2
+        \\  }
+        \\  {
+        \\   r5:4 = sub(r1:0,r9:8,p3):carry
+        \\   r7:6 = #1
+        \\   r28 = asr(r28,#1)
+        \\  }
+        \\  {
+        \\   r3:2 = sub(r15:14,r11:10,p3):carry
+        \\   r7:6 = add(r13:12,r7:6)
+        \\  }
+        \\  {
+        \\   if (p3) r13:12 = r7:6
+        \\   if (p3) r1:0 = r5:4
+        \\
+        \\
+        \\
+        \\
+        \\
+        \\   r2 = #1
+        \\  }
+        \\  {
+        \\   p0 = cmp.eq(r1:0,r11:10)
+        \\   if (!p0.new) r12 = or(r12,r2)
+        \\   r3 = cl0(r13:12)
+        \\   r28 = add(r28,#-63)
+        \\  }
+        \\
+        \\
+        \\
+        \\  {
+        \\   r1:0 = convert_ud2df(r13:12)
+        \\   r28 = add(r28,r3)
+        \\  }
+        \\  {
+        \\   r1 += asl(r28,#52 -32)
+        \\   jumpr r31
+        \\  }
+        \\ .Lsqrt_abnormal:
+        \\  {
+        \\   p0 = dfclass(r1:0,#0x01)
+        \\   if (p0.new) jumpr:t r31
+        \\  }
+        \\  {
+        \\   p0 = dfclass(r1:0,#0x10)
+        \\   if (p0.new) jump:nt .Lsqrt_nan
+        \\  }
+        \\  {
+        \\   p0 = cmp.gt(r1,#-1)
+        \\   if (!p0.new) jump:nt .Lsqrt_invalid_neg
+        \\   if (!p0.new) r28 = ##0x7F800001
+        \\  }
+        \\  {
+        \\   p0 = dfclass(r1:0,#0x08)
+        \\   if (p0.new) jumpr:nt r31
+        \\  }
+        \\
+        \\
+        \\  {
+        \\   r1:0 = extractu(r1:0,#52,#0)
+        \\  }
+        \\  {
+        \\   r28 = add(clb(r1:0),#-11)
+        \\  }
+        \\  {
+        \\   r1:0 = asl(r1:0,r28)
+        \\   r28 = sub(#1,r28)
+        \\  }
+        \\  {
+        \\   r1 = insert(r28,#1,#52 -32)
+        \\  }
+        \\  {
+        \\   r3:2 = extractu(r1:0,#23 +1,#52 -23)
+        \\   r5 = ##0x3f000004
+        \\  }
+        \\  {
+        \\   r9 = or(r5,r2)
+        \\   r5 = and(r5,#-16)
+        \\   jump .Ldenormal_restart
+        \\  }
+        \\ .Lsqrt_nan:
+        \\  {
+        \\   r28 = convert_df2sf(r1:0)
+        \\   r1:0 = #-1
+        \\   jumpr r31
+        \\  }
+        \\ .Lsqrt_invalid_neg:
+        \\  {
+        \\   r1:0 = convert_sf2df(r28)
+        \\   jumpr r31
+        \\  }
+    );
+}
+
+comptime {
+    if (builtin.cpu.arch == .hexagon) {
+        @export(&__hexagon_adddf3, .{ .name = "__hexagon_adddf3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_adddf3, .{ .name = "__hexagon_fast_adddf3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_subdf3, .{ .name = "__hexagon_subdf3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_subdf3, .{ .name = "__hexagon_fast_subdf3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_divdf3, .{ .name = "__hexagon_divdf3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_divdf3, .{ .name = "__hexagon_fast_divdf3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_muldf3, .{ .name = "__hexagon_muldf3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_muldf3, .{ .name = "__hexagon_fast_muldf3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_sqrtdf2", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_fast2_sqrtdf2", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_sqrt", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_divsf3, .{ .name = "__hexagon_divsf3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_divsf3, .{ .name = "__hexagon_fast_divsf3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_divsi3, .{ .name = "__hexagon_divsi3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_umodsi3, .{ .name = "__hexagon_umodsi3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_sqrtf, .{ .name = "__hexagon_sqrtf", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_sqrtf, .{ .name = "__hexagon_fast2_sqrtf", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_moddi3, .{ .name = "__hexagon_moddi3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_divdi3, .{ .name = "__hexagon_divdi3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_udivdi3, .{ .name = "__hexagon_udivdi3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_umoddi3, .{ .name = "__hexagon_umoddi3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_modsi3, .{ .name = "__hexagon_modsi3", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes, .{ .name = "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes", .linkage = common.linkage, .visibility = common.visibility });
+        @export(&__hexagon_udivsi3, .{ .name = "__hexagon_udivsi3", .linkage = common.linkage, .visibility = common.visibility });
+    }
+}
diff --git a/lib/libc/musl/arch/i386/crt_arch.h b/lib/libc/musl/arch/i386/crt_arch.h
index 43c8477a81..1a80fce353 100644
--- a/lib/libc/musl/arch/i386/crt_arch.h
+++ b/lib/libc/musl/arch/i386/crt_arch.h
@@ -3,6 +3,7 @@ __asm__(
 ".weak _DYNAMIC \n"
 ".hidden _DYNAMIC \n"
 ".global " START "\n"
+".type " START ",%function \n"
 START ":\n"
 "	xor %ebp,%ebp \n"
 "	mov %esp,%eax \n"
diff --git a/lib/libc/musl/arch/x86_64/crt_arch.h b/lib/libc/musl/arch/x86_64/crt_arch.h
index 3eec61bdcd..b1c9c4761d 100644
--- a/lib/libc/musl/arch/x86_64/crt_arch.h
+++ b/lib/libc/musl/arch/x86_64/crt_arch.h
@@ -1,6 +1,7 @@
 __asm__(
 ".text \n"
 ".global " START " \n"
+".type " START ",%function \n"
 START ": \n"
 "	xor %rbp,%rbp \n"
 "	mov %rsp,%rdi \n"
diff --git a/lib/libc/musl/libc.S b/lib/libc/musl/libc.S
index cb8b590a9e..36875b8a7b 100644
--- a/lib/libc/musl/libc.S
+++ b/lib/libc/musl/libc.S
@@ -7,6 +7,13 @@
 #define PTR_SIZE_BYTES 4
 #define PTR2_SIZE_BYTES 8
 #endif
+
+#ifdef TIME32
+#define WEAKTIME64 .globl
+#else
+#define WEAKTIME64 .weak
+#endif
+
 .bss
 .weak ___environ
 .type ___environ, %object;
@@ -168,18 +175,64 @@ _IO_putc:
 .weak _IO_putc_unlocked
 .type _IO_putc_unlocked, %function;
 _IO_putc_unlocked:
-#if !defined(ARCH_riscv64) && !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64)
+#ifdef ARCH_i386
 .globl ___tls_get_addr
 .type ___tls_get_addr, %function;
 ___tls_get_addr:
 #endif
-#ifdef PTR32
+#ifdef TIME32
 .globl __adjtime64
 .type __adjtime64, %function;
 __adjtime64:
 .globl __adjtimex_time64
 .type __adjtimex_time64, %function;
 __adjtimex_time64:
+#endif
+#ifdef ARCH_arm
+.globl __aeabi_atexit
+.type __aeabi_atexit, %function;
+__aeabi_atexit:
+.globl __aeabi_memclr
+.type __aeabi_memclr, %function;
+__aeabi_memclr:
+.globl __aeabi_memclr4
+.type __aeabi_memclr4, %function;
+__aeabi_memclr4:
+.globl __aeabi_memclr8
+.type __aeabi_memclr8, %function;
+__aeabi_memclr8:
+.globl __aeabi_memcpy
+.type __aeabi_memcpy, %function;
+__aeabi_memcpy:
+.globl __aeabi_memcpy4
+.type __aeabi_memcpy4, %function;
+__aeabi_memcpy4:
+.globl __aeabi_memcpy8
+.type __aeabi_memcpy8, %function;
+__aeabi_memcpy8:
+.globl __aeabi_memmove
+.type __aeabi_memmove, %function;
+__aeabi_memmove:
+.globl __aeabi_memmove4
+.type __aeabi_memmove4, %function;
+__aeabi_memmove4:
+.globl __aeabi_memmove8
+.type __aeabi_memmove8, %function;
+__aeabi_memmove8:
+.globl __aeabi_memset
+.type __aeabi_memset, %function;
+__aeabi_memset:
+.globl __aeabi_memset4
+.type __aeabi_memset4, %function;
+__aeabi_memset4:
+.globl __aeabi_memset8
+.type __aeabi_memset8, %function;
+__aeabi_memset8:
+.globl __aeabi_read_tp
+.type __aeabi_read_tp, %function;
+__aeabi_read_tp:
+#endif
+#ifdef TIME32
 .globl __aio_suspend_time64
 .type __aio_suspend_time64, %function;
 __aio_suspend_time64:
@@ -187,12 +240,12 @@ __aio_suspend_time64:
 .globl __assert_fail
 .type __assert_fail, %function;
 __assert_fail:
-#if !defined(ARCH_riscv64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64)
+#ifdef FAMILY_mips
 .globl __cachectl
 .type __cachectl, %function;
 __cachectl:
 #endif
-#ifdef PTR32
+#ifdef TIME32
 .globl __clock_adjtime64
 .type __clock_adjtime64, %function;
 __clock_adjtime64:
@@ -236,7 +289,7 @@ __cxa_atexit:
 .globl __cxa_finalize
 .type __cxa_finalize, %function;
 __cxa_finalize:
-#ifdef PTR32
+#ifdef TIME32
 .globl __difftime64
 .type __difftime64, %function;
 __difftime64:
@@ -247,7 +300,12 @@ __dls2b:
 .globl __dls3
 .type __dls3, %function;
 __dls3:
-#ifdef PTR32
+#ifdef FAMILY_mips
+.globl __dlstart
+.type __dlstart, %function;
+__dlstart:
+#endif
+#ifdef TIME32
 .globl __dlsym_time64
 .type __dlsym_time64, %function;
 __dlsym_time64:
@@ -312,7 +370,7 @@ __fseterr:
 .globl __fsetlocking
 .type __fsetlocking, %function;
 __fsetlocking:
-#ifdef PTR32
+#ifdef TIME32
 .weak __fstat_time64
 .type __fstat_time64, %function;
 __fstat_time64:
@@ -347,7 +405,7 @@ __fxstatat:
 .weak __getdelim
 .type __getdelim, %function;
 __getdelim:
-#ifdef PTR32
+#ifdef TIME32
 .globl __getitimer_time64
 .type __getitimer_time64, %function;
 __getitimer_time64:
@@ -364,6 +422,11 @@ __gmtime64:
 .type __gmtime64_r, %function;
 __gmtime64_r:
 #endif
+#ifdef ARCH_arm
+.globl __gnu_Unwind_Find_exidx
+.type __gnu_Unwind_Find_exidx, %function;
+__gnu_Unwind_Find_exidx:
+#endif
 .globl __h_errno_location
 .type __h_errno_location, %function;
 __h_errno_location:
@@ -490,7 +553,7 @@ __libc_current_sigrtmin:
 .globl __libc_start_main
 .type __libc_start_main, %function;
 __libc_start_main:
-#ifdef PTR32
+#ifdef TIME32
 .globl __localtime64
 .type __localtime64, %function;
 __localtime64:
@@ -498,12 +561,12 @@ __localtime64:
 .type __localtime64_r, %function;
 __localtime64_r:
 #endif
-#if !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_loongarch64)
+#ifdef FAMILY_riscv
 .globl __longjmp
 .type __longjmp, %function;
 __longjmp:
 #endif
-#ifdef PTR32
+#ifdef TIME32
 .globl __lstat_time64
 .type __lstat_time64, %function;
 __lstat_time64:
@@ -514,7 +577,7 @@ __lutimes_time64:
 .globl __lxstat
 .type __lxstat, %function;
 __lxstat:
-#ifdef PTR32
+#ifdef TIME32
 .globl __mktime64
 .type __mktime64, %function;
 __mktime64:
@@ -547,7 +610,7 @@ __overflow:
 .weak __posix_getopt
 .type __posix_getopt, %function;
 __posix_getopt:
-#ifdef PTR32
+#ifdef TIME32
 .globl __ppoll_time64
 .type __ppoll_time64, %function;
 __ppoll_time64:
@@ -576,17 +639,7 @@ __recvmmsg_time64:
 .globl __res_state
 .type __res_state, %function;
 __res_state:
-#if !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64)
-.globl __restore
-.type __restore, %function;
-__restore:
-#endif
-#if !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64)
-.globl __restore_rt
-.type __restore_rt, %function;
-__restore_rt:
-#endif
-#if !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_loongarch64)
+#ifdef FAMILY_riscv
 .globl __riscv_flush_icache
 .type __riscv_flush_icache, %function;
 __riscv_flush_icache:
@@ -594,7 +647,7 @@ __riscv_flush_icache:
 .globl __sched_cpucount
 .type __sched_cpucount, %function;
 __sched_cpucount:
-#ifdef PTR32
+#ifdef TIME32
 .globl __sched_rr_get_interval_time64
 .type __sched_rr_get_interval_time64, %function;
 __sched_rr_get_interval_time64:
@@ -614,7 +667,7 @@ __setitimer_time64:
 .globl __setjmp
 .type __setjmp, %function;
 __setjmp:
-#ifdef PTR32
+#ifdef TIME32
 .globl __settimeofday_time64
 .type __settimeofday_time64, %function;
 __settimeofday_time64:
@@ -631,7 +684,7 @@ __signbitl:
 .globl __sigsetjmp
 .type __sigsetjmp, %function;
 __sigsetjmp:
-#ifdef PTR32
+#ifdef TIME32
 .globl __sigtimedwait_time64
 .type __sigtimedwait_time64, %function;
 __sigtimedwait_time64:
@@ -639,7 +692,7 @@ __sigtimedwait_time64:
 .globl __stack_chk_fail
 .type __stack_chk_fail, %function;
 __stack_chk_fail:
-#ifdef PTR32
+#ifdef TIME32
 .globl __stat_time64
 .type __stat_time64, %function;
 __stat_time64:
@@ -692,7 +745,7 @@ __strxfrm_l:
 .weak __sysv_signal
 .type __sysv_signal, %function;
 __sysv_signal:
-#ifdef PTR32
+#ifdef TIME32
 .globl __thrd_sleep_time64
 .type __thrd_sleep_time64, %function;
 __thrd_sleep_time64:
@@ -718,9 +771,16 @@ __timerfd_settime64:
 .type __timespec_get_time64, %function;
 __timespec_get_time64:
 #endif
+#if !defined(ARCH_s390x)
 .globl __tls_get_addr
 .type __tls_get_addr, %function;
 __tls_get_addr:
+#endif
+#ifdef ARCH_s390x
+.globl __tls_get_offset
+.type __tls_get_offset, %function;
+__tls_get_offset:
+#endif
 .globl __tolower_l
 .type __tolower_l, %function;
 __tolower_l:
@@ -743,7 +803,7 @@ __uflow:
 .globl __uselocale
 .type __uselocale, %function;
 __uselocale:
-#ifdef PTR32
+#ifdef TIME32
 .globl __utime64
 .type __utime64, %function;
 __utime64:
@@ -796,7 +856,7 @@ _dl_debug_state:
 .globl _dlstart
 .type _dlstart, %function;
 _dlstart:
-#if !defined(ARCH_riscv64) && !defined(ARCH_mips) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64)
+#ifdef FAMILY_mips
 .globl _dlstart_data
 .type _dlstart_data, %function;
 _dlstart_data:
@@ -807,7 +867,7 @@ _exit:
 .weak _fini
 .type _fini, %function;
 _fini:
-#if !defined(ARCH_riscv64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64)
+#ifdef FAMILY_mips
 .globl _flush_cache
 .type _flush_cache, %function;
 _flush_cache:
@@ -908,7 +968,7 @@ aligned_alloc:
 .globl alphasort
 .type alphasort, %function;
 alphasort:
-#if !defined(ARCH_riscv64) && !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64)
+#ifdef FAMILY_x86
 .globl arch_prctl
 .type arch_prctl, %function;
 arch_prctl:
@@ -1033,12 +1093,10 @@ cabsf:
 .globl cabsl
 .type cabsl, %function;
 cabsl:
-#if !defined(ARCH_riscv64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64)
+#ifdef FAMILY_mips
 .weak cachectl
 .type cachectl, %function;
 cachectl:
-#endif
-#if !defined(ARCH_riscv64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64)
 .weak cacheflush
 .type cacheflush, %function;
 cacheflush:
@@ -1232,10 +1290,10 @@ clock_getcpuclockid:
 .globl clock_getres
 .type clock_getres, %function;
 clock_getres:
-WEAK64 clock_gettime
+WEAKTIME64 clock_gettime
 .type clock_gettime, %function;
 clock_gettime:
-WEAK64 clock_nanosleep
+WEAKTIME64 clock_nanosleep
 .type clock_nanosleep, %function;
 clock_nanosleep:
 .globl clock_settime
@@ -2018,10 +2076,10 @@ fsetpos:
 .globl fsetxattr
 .type fsetxattr, %function;
 fsetxattr:
-WEAK64 fstat
+WEAKTIME64 fstat
 .type fstat, %function;
 fstat:
-WEAK64 fstatat
+WEAKTIME64 fstatat
 .type fstatat, %function;
 fstatat:
 .weak fstatfs
@@ -2063,7 +2121,7 @@ futimens:
 .globl futimes
 .type futimes, %function;
 futimes:
-WEAK64 futimesat
+WEAKTIME64 futimesat
 .type futimesat, %function;
 futimesat:
 .globl fwide
@@ -2408,7 +2466,7 @@ globfree:
 .globl gmtime
 .type gmtime, %function;
 gmtime:
-WEAK64 gmtime_r
+WEAKTIME64 gmtime_r
 .type gmtime_r, %function;
 gmtime_r:
 .globl grantpt
@@ -2549,12 +2607,12 @@ insque:
 .globl ioctl
 .type ioctl, %function;
 ioctl:
-#if !defined(ARCH_riscv64) && !defined(ARCH_mips64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64)
+#if !defined(ARCH_aarch64) && !defined(ARCH_arm) && !defined(ARCH_loongarch64) && !defined(ARCH_mips64) && !defined(ARCH_mipsn32) && !defined(ARCH_riscv32) && !defined(ARCH_riscv64) && !defined(ARCH_s390x)
 .globl ioperm
 .type ioperm, %function;
 ioperm:
 #endif
-#if !defined(ARCH_riscv64) && !defined(ARCH_mips64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64)
+#if !defined(ARCH_aarch64) && !defined(ARCH_arm) && !defined(ARCH_loongarch64) && !defined(ARCH_mips64) && !defined(ARCH_mipsn32) && !defined(ARCH_riscv32) && !defined(ARCH_riscv64) && !defined(ARCH_s390x)
 .globl iopl
 .type iopl, %function;
 iopl:
@@ -2853,7 +2911,7 @@ localeconv:
 .globl localtime
 .type localtime, %function;
 localtime:
-WEAK64 localtime_r
+WEAKTIME64 localtime_r
 .type localtime_r, %function;
 localtime_r:
 .globl lockf
@@ -3552,7 +3610,7 @@ pthread_cond_init:
 .globl pthread_cond_signal
 .type pthread_cond_signal, %function;
 pthread_cond_signal:
-WEAK64 pthread_cond_timedwait
+WEAKTIME64 pthread_cond_timedwait
 .type pthread_cond_timedwait, %function;
 pthread_cond_timedwait:
 .globl pthread_cond_wait
@@ -3642,7 +3700,7 @@ pthread_mutex_lock:
 .globl pthread_mutex_setprioceiling
 .type pthread_mutex_setprioceiling, %function;
 pthread_mutex_setprioceiling:
-WEAK64 pthread_mutex_timedlock
+WEAKTIME64 pthread_mutex_timedlock
 .type pthread_mutex_timedlock, %function;
 pthread_mutex_timedlock:
 .weak pthread_mutex_trylock
@@ -3693,10 +3751,10 @@ pthread_rwlock_init:
 .weak pthread_rwlock_rdlock
 .type pthread_rwlock_rdlock, %function;
 pthread_rwlock_rdlock:
-WEAK64 pthread_rwlock_timedrdlock
+WEAKTIME64 pthread_rwlock_timedrdlock
 .type pthread_rwlock_timedrdlock, %function;
 pthread_rwlock_timedrdlock:
-WEAK64 pthread_rwlock_timedwrlock
+WEAKTIME64 pthread_rwlock_timedwrlock
 .type pthread_rwlock_timedwrlock, %function;
 pthread_rwlock_timedwrlock:
 .weak pthread_rwlock_tryrdlock
@@ -3774,7 +3832,7 @@ pthread_spin_unlock:
 .weak pthread_testcancel
 .type pthread_testcancel, %function;
 pthread_testcancel:
-WEAK64 pthread_timedjoin_np
+WEAKTIME64 pthread_timedjoin_np
 .type pthread_timedjoin_np, %function;
 pthread_timedjoin_np:
 .weak pthread_tryjoin_np
@@ -3999,7 +4057,7 @@ rintf:
 .globl rintl
 .type rintl, %function;
 rintl:
-#if !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_loongarch64)
+#ifdef FAMILY_riscv
 .weak riscv_flush_icache
 .type riscv_flush_icache, %function;
 riscv_flush_icache:
diff --git a/lib/libc/musl/src/signal/riscv32/restore.s b/lib/libc/musl/src/signal/riscv32/restore.s
index 40012c757a..5a0af6959d 100644
--- a/lib/libc/musl/src/signal/riscv32/restore.s
+++ b/lib/libc/musl/src/signal/riscv32/restore.s
@@ -1,7 +1,9 @@
 .global __restore
+.hidden __restore
 .type __restore, %function
 __restore:
 .global __restore_rt
+.hidden __restore_rt
 .type __restore_rt, %function
 __restore_rt:
 	li a7, 139 # SYS_rt_sigreturn
diff --git a/lib/libc/musl/src/signal/riscv64/restore.s b/lib/libc/musl/src/signal/riscv64/restore.s
index 40012c757a..5a0af6959d 100644
--- a/lib/libc/musl/src/signal/riscv64/restore.s
+++ b/lib/libc/musl/src/signal/riscv64/restore.s
@@ -1,7 +1,9 @@
 .global __restore
+.hidden __restore
 .type __restore, %function
 __restore:
 .global __restore_rt
+.hidden __restore_rt
 .type __restore_rt, %function
 __restore_rt:
 	li a7, 139 # SYS_rt_sigreturn
diff --git a/lib/libc/musl/src/thread/s390x/__tls_get_offset.s b/lib/libc/musl/src/thread/s390x/__tls_get_offset.s
index 8ee92de8ea..2e0913ccb0 100644
--- a/lib/libc/musl/src/thread/s390x/__tls_get_offset.s
+++ b/lib/libc/musl/src/thread/s390x/__tls_get_offset.s
@@ -5,6 +5,7 @@ __tls_get_offset:
 	aghi  %r15, -160
 
 	la    %r2, 0(%r2, %r12)
+.hidden __tls_get_addr
 	brasl %r14, __tls_get_addr
 
 	ear   %r1, %a0
diff --git a/lib/std/crypto.zig b/lib/std/crypto.zig
index aa524fa2c2..7b167a467a 100644
--- a/lib/std/crypto.zig
+++ b/lib/std/crypto.zig
@@ -7,10 +7,23 @@ pub const timing_safe = @import("crypto/timing_safe.zig");
 /// Authenticated Encryption with Associated Data
 pub const aead = struct {
     pub const aegis = struct {
-        pub const Aegis128L = @import("crypto/aegis.zig").Aegis128L;
-        pub const Aegis128L_256 = @import("crypto/aegis.zig").Aegis128L_256;
-        pub const Aegis256 = @import("crypto/aegis.zig").Aegis256;
-        pub const Aegis256_256 = @import("crypto/aegis.zig").Aegis256_256;
+        const variants = @import("crypto/aegis.zig");
+
+        pub const Aegis128X4 = variants.Aegis128X4;
+        pub const Aegis128X2 = variants.Aegis128X2;
+        pub const Aegis128L = variants.Aegis128L;
+
+        pub const Aegis256X4 = variants.Aegis256X4;
+        pub const Aegis256X2 = variants.Aegis256X2;
+        pub const Aegis256 = variants.Aegis256;
+
+        pub const Aegis128X4_256 = variants.Aegis128X4_256;
+        pub const Aegis128X2_256 = variants.Aegis128X2_256;
+        pub const Aegis128L_256 = variants.Aegis128L_256;
+
+        pub const Aegis256X4_256 = variants.Aegis256X4_256;
+        pub const Aegis256X2_256 = variants.Aegis256X2_256;
+        pub const Aegis256_256 = variants.Aegis256_256;
     };
 
     pub const aes_gcm = struct {
@@ -44,10 +57,22 @@ pub const auth = struct {
     pub const hmac = @import("crypto/hmac.zig");
     pub const siphash = @import("crypto/siphash.zig");
     pub const aegis = struct {
-        pub const Aegis128LMac = @import("crypto/aegis.zig").Aegis128LMac;
-        pub const Aegis128LMac_128 = @import("crypto/aegis.zig").Aegis128LMac_128;
-        pub const Aegis256Mac = @import("crypto/aegis.zig").Aegis256Mac;
-        pub const Aegis256Mac_128 = @import("crypto/aegis.zig").Aegis256Mac_128;
+        const variants = @import("crypto/aegis.zig");
+        pub const Aegis128X4Mac = variants.Aegis128X4Mac;
+        pub const Aegis128X2Mac = variants.Aegis128X2Mac;
+        pub const Aegis128LMac = variants.Aegis128LMac;
+
+        pub const Aegis256X4Mac = variants.Aegis256X4Mac;
+        pub const Aegis256X2Mac = variants.Aegis256X2Mac;
+        pub const Aegis256Mac = variants.Aegis256Mac;
+
+        pub const Aegis128X4Mac_128 = variants.Aegis128X4Mac_128;
+        pub const Aegis128X2Mac_128 = variants.Aegis128X2Mac_128;
+        pub const Aegis128LMac_128 = variants.Aegis128LMac_128;
+
+        pub const Aegis256X4Mac_128 = variants.Aegis256X4Mac_128;
+        pub const Aegis256X2Mac_128 = variants.Aegis256X2Mac_128;
+        pub const Aegis256Mac_128 = variants.Aegis256Mac_128;
     };
     pub const cmac = @import("crypto/cmac.zig");
 };
diff --git a/lib/std/crypto/aegis.zig b/lib/std/crypto/aegis.zig
index 67cc13c8c0..be6a655850 100644
--- a/lib/std/crypto/aegis.zig
+++ b/lib/std/crypto/aegis.zig
@@ -1,16 +1,21 @@
 //! AEGIS is a very fast authenticated encryption system built on top of the core AES function.
 //!
-//! The AEGIS-128L variant has a 128 bit key, a 128 bit nonce, and processes 256 bit message blocks.
-//! The AEGIS-256  variant has a 256 bit key, a 256 bit nonce, and processes 128 bit message blocks.
+//! The AEGIS-128* variants have a 128 bit key and a 128 bit nonce.
+//! The AEGIS-256* variants have a 256 bit key and a 256 bit nonce.
+//! All of them can compute 128 and 256 bit authentication tags.
 //!
 //! The AEGIS cipher family offers performance that significantly exceeds that of AES-GCM with
 //! hardware support for parallelizable AES block encryption.
 //!
-//! Unlike with AES-GCM, nonces can be safely chosen at random with no practical limit when using AEGIS-256.
-//! AEGIS-128L also allows for more messages to be safely encrypted when using random nonces.
+//! On high-end Intel CPUs with AVX-512 support, AEGIS-128X4 and AEGIS-256X4 are the fastest options.
+//! On other modern server, desktop and mobile CPUs, AEGIS-128X2 and AEGIS-256X2 are usually the fastest options.
+//! AEGIS-128L and AEGIS-256 perform well on a broad range of platforms, including WebAssembly.
 //!
-//! AEGIS is believed to be key-committing, making it a safer choice than most other AEADs
-//! when the key has low entropy, or can be controlled by an attacker.
+//! Unlike with AES-GCM, nonces can be safely chosen at random with no practical limit when using AEGIS-256*.
+//! AEGIS-128* also allows for more messages to be safely encrypted when using random nonces.
+//!
+//! Unless the associated data can be fully controled by an adversary, AEGIS is believed to be key-committing,
+//! making it a safer choice than most other AEADs when the key has low entropy, or can be controlled by an attacker.
 //!
 //! Finally, leaking the state does not leak the key.
 //!
@@ -20,305 +25,202 @@ const std = @import("std");
 const crypto = std.crypto;
 const mem = std.mem;
 const assert = std.debug.assert;
-const AesBlock = crypto.core.aes.Block;
 const AuthenticationError = crypto.errors.AuthenticationError;
 
-/// AEGIS-128L with a 128-bit authentication tag.
-pub const Aegis128L = Aegis128LGeneric(128);
+/// AEGIS-128X4 with a 128 bit tag
+pub const Aegis128X4 = Aegis128XGeneric(4, 128);
+/// AEGIS-128X2 with a 128 bit tag
+pub const Aegis128X2 = Aegis128XGeneric(2, 128);
+/// AEGIS-128L with a 128 bit tag
+pub const Aegis128L = Aegis128XGeneric(1, 128);
 
-/// AEGIS-128L with a 256-bit authentication tag.
-pub const Aegis128L_256 = Aegis128LGeneric(256);
+/// AEGIS-256X4 with a 128 bit tag
+pub const Aegis256X4 = Aegis256XGeneric(4, 128);
+/// AEGIS-256X2 with a 128 bit tag
+pub const Aegis256X2 = Aegis256XGeneric(2, 128);
+/// AEGIS-256 with a 128 bit tag
+pub const Aegis256 = Aegis256XGeneric(1, 128);
 
-/// AEGIS-256 with a 128-bit authentication tag.
-pub const Aegis256 = Aegis256Generic(128);
+/// AEGIS-128X4 with a 256 bit tag
+pub const Aegis128X4_256 = Aegis128XGeneric(4, 256);
+/// AEGIS-128X2 with a 256 bit tag
+pub const Aegis128X2_256 = Aegis128XGeneric(2, 256);
+/// AEGIS-128L with a 256 bit tag
+pub const Aegis128L_256 = Aegis128XGeneric(1, 256);
 
-/// AEGIS-256 with a 256-bit authentication tag.
-pub const Aegis256_256 = Aegis256Generic(256);
-
-const State128L = struct {
-    blocks: [8]AesBlock,
-
-    fn init(key: [16]u8, nonce: [16]u8) State128L {
-        const c1 = AesBlock.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd });
-        const c2 = AesBlock.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 });
-        const key_block = AesBlock.fromBytes(&key);
-        const nonce_block = AesBlock.fromBytes(&nonce);
-        const blocks = [8]AesBlock{
-            key_block.xorBlocks(nonce_block),
-            c1,
-            c2,
-            c1,
-            key_block.xorBlocks(nonce_block),
-            key_block.xorBlocks(c2),
-            key_block.xorBlocks(c1),
-            key_block.xorBlocks(c2),
-        };
-        var state = State128L{ .blocks = blocks };
-        var i: usize = 0;
-        while (i < 10) : (i += 1) {
-            state.update(nonce_block, key_block);
-        }
-        return state;
-    }
-
-    inline fn update(state: *State128L, d1: AesBlock, d2: AesBlock) void {
-        const blocks = &state.blocks;
-        const tmp = blocks[7];
-        comptime var i: usize = 7;
-        inline while (i > 0) : (i -= 1) {
-            blocks[i] = blocks[i - 1].encrypt(blocks[i]);
-        }
-        blocks[0] = tmp.encrypt(blocks[0]);
-        blocks[0] = blocks[0].xorBlocks(d1);
-        blocks[4] = blocks[4].xorBlocks(d2);
-    }
-
-    fn absorb(state: *State128L, src: *const [32]u8) void {
-        const msg0 = AesBlock.fromBytes(src[0..16]);
-        const msg1 = AesBlock.fromBytes(src[16..32]);
-        state.update(msg0, msg1);
-    }
-
-    fn enc(state: *State128L, dst: *[32]u8, src: *const [32]u8) void {
-        const blocks = &state.blocks;
-        const msg0 = AesBlock.fromBytes(src[0..16]);
-        const msg1 = AesBlock.fromBytes(src[16..32]);
-        var tmp0 = msg0.xorBlocks(blocks[6]).xorBlocks(blocks[1]);
-        var tmp1 = msg1.xorBlocks(blocks[2]).xorBlocks(blocks[5]);
-        tmp0 = tmp0.xorBlocks(blocks[2].andBlocks(blocks[3]));
-        tmp1 = tmp1.xorBlocks(blocks[6].andBlocks(blocks[7]));
-        dst[0..16].* = tmp0.toBytes();
-        dst[16..32].* = tmp1.toBytes();
-        state.update(msg0, msg1);
-    }
-
-    fn dec(state: *State128L, dst: *[32]u8, src: *const [32]u8) void {
-        const blocks = &state.blocks;
-        var msg0 = AesBlock.fromBytes(src[0..16]).xorBlocks(blocks[6]).xorBlocks(blocks[1]);
-        var msg1 = AesBlock.fromBytes(src[16..32]).xorBlocks(blocks[2]).xorBlocks(blocks[5]);
-        msg0 = msg0.xorBlocks(blocks[2].andBlocks(blocks[3]));
-        msg1 = msg1.xorBlocks(blocks[6].andBlocks(blocks[7]));
-        dst[0..16].* = msg0.toBytes();
-        dst[16..32].* = msg1.toBytes();
-        state.update(msg0, msg1);
-    }
-
-    fn mac(state: *State128L, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 {
-        const blocks = &state.blocks;
-        var sizes: [16]u8 = undefined;
-        mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little);
-        mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little);
-        const tmp = AesBlock.fromBytes(&sizes).xorBlocks(blocks[2]);
-        var i: usize = 0;
-        while (i < 7) : (i += 1) {
-            state.update(tmp, tmp);
-        }
-        return switch (tag_bits) {
-            128 => blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3])
-                .xorBlocks(blocks[4]).xorBlocks(blocks[5]).xorBlocks(blocks[6]).toBytes(),
-            256 => tag: {
-                const t1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]);
-                const t2 = blocks[4].xorBlocks(blocks[5]).xorBlocks(blocks[6]).xorBlocks(blocks[7]);
-                break :tag t1.toBytes() ++ t2.toBytes();
-            },
-            else => unreachable,
-        };
-    }
-};
-
-fn Aegis128LGeneric(comptime tag_bits: u9) type {
-    comptime assert(tag_bits == 128 or tag_bits == 256); // tag must be 128 or 256 bits
+/// AEGIS-256X4 with a 256 bit tag
+pub const Aegis256X4_256 = Aegis256XGeneric(4, 256);
+/// AEGIS-256X2 with a 256 bit tag
+pub const Aegis256X2_256 = Aegis256XGeneric(2, 256);
+/// AEGIS-256 with a 256 bit tag
+pub const Aegis256_256 = Aegis256XGeneric(1, 256);
 
+fn State128X(comptime degree: u7) type {
     return struct {
-        pub const tag_length = tag_bits / 8;
-        pub const nonce_length = 16;
-        pub const key_length = 16;
-        pub const block_length = 32;
+        const AesBlockVec = crypto.core.aes.BlockVec(degree);
+        const State = @This();
 
-        const State = State128L;
+        blocks: [8]AesBlockVec,
 
-        /// c: ciphertext: output buffer should be of size m.len
-        /// tag: authentication tag: output MAC
-        /// m: message
-        /// ad: Associated Data
-        /// npub: public nonce
-        /// k: private key
-        pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) void {
-            assert(c.len == m.len);
-            var state = State128L.init(key, npub);
-            var src: [32]u8 align(16) = undefined;
-            var dst: [32]u8 align(16) = undefined;
-            var i: usize = 0;
-            while (i + 32 <= ad.len) : (i += 32) {
-                state.absorb(ad[i..][0..32]);
+        const aes_block_length = AesBlockVec.block_length;
+        const rate = aes_block_length * 2;
+        const alignment = AesBlockVec.native_word_size;
+
+        fn init(key: [16]u8, nonce: [16]u8) State {
+            const c1 = AesBlockVec.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd } ** degree);
+            const c2 = AesBlockVec.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 } ** degree);
+            const key_block = AesBlockVec.fromBytes(&(key ** degree));
+            const nonce_block = AesBlockVec.fromBytes(&(nonce ** degree));
+            const blocks = [8]AesBlockVec{
+                key_block.xorBlocks(nonce_block),
+                c1,
+                c2,
+                c1,
+                key_block.xorBlocks(nonce_block),
+                key_block.xorBlocks(c2),
+                key_block.xorBlocks(c1),
+                key_block.xorBlocks(c2),
+            };
+            var state = State{ .blocks = blocks };
+            if (degree > 1) {
+                const context_block = ctx: {
+                    var contexts_bytes = [_]u8{0} ** aes_block_length;
+                    for (0..degree) |i| {
+                        contexts_bytes[i * 16] = @intCast(i);
+                        contexts_bytes[i * 16 + 1] = @intCast(degree - 1);
+                    }
+                    break :ctx AesBlockVec.fromBytes(&contexts_bytes);
+                };
+                for (0..10) |_| {
+                    state.blocks[3] = state.blocks[3].xorBlocks(context_block);
+                    state.blocks[7] = state.blocks[7].xorBlocks(context_block);
+                    state.update(nonce_block, key_block);
+                }
+            } else {
+                for (0..10) |_| {
+                    state.update(nonce_block, key_block);
+                }
             }
-            if (ad.len % 32 != 0) {
-                @memset(src[0..], 0);
-                @memcpy(src[0 .. ad.len % 32], ad[i..][0 .. ad.len % 32]);
-                state.absorb(&src);
-            }
-            i = 0;
-            while (i + 32 <= m.len) : (i += 32) {
-                state.enc(c[i..][0..32], m[i..][0..32]);
-            }
-            if (m.len % 32 != 0) {
-                @memset(src[0..], 0);
-                @memcpy(src[0 .. m.len % 32], m[i..][0 .. m.len % 32]);
-                state.enc(&dst, &src);
-                @memcpy(c[i..][0 .. m.len % 32], dst[0 .. m.len % 32]);
-            }
-            tag.* = state.mac(tag_bits, ad.len, m.len);
+            return state;
         }
 
-        /// `m`: Message
-        /// `c`: Ciphertext
-        /// `tag`: Authentication tag
-        /// `ad`: Associated data
-        /// `npub`: Public nonce
-        /// `k`: Private key
-        /// Asserts `c.len == m.len`.
-        ///
-        /// Contents of `m` are undefined if an error is returned.
-        pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) AuthenticationError!void {
-            assert(c.len == m.len);
-            var state = State128L.init(key, npub);
-            var src: [32]u8 align(16) = undefined;
-            var dst: [32]u8 align(16) = undefined;
-            var i: usize = 0;
-            while (i + 32 <= ad.len) : (i += 32) {
-                state.absorb(ad[i..][0..32]);
+        inline fn update(state: *State, d1: AesBlockVec, d2: AesBlockVec) void {
+            const blocks = &state.blocks;
+            const tmp = blocks[7];
+            comptime var i: usize = 7;
+            inline while (i > 0) : (i -= 1) {
+                blocks[i] = blocks[i - 1].encrypt(blocks[i]);
             }
-            if (ad.len % 32 != 0) {
-                @memset(src[0..], 0);
-                @memcpy(src[0 .. ad.len % 32], ad[i..][0 .. ad.len % 32]);
-                state.absorb(&src);
+            blocks[0] = tmp.encrypt(blocks[0]);
+            blocks[0] = blocks[0].xorBlocks(d1);
+            blocks[4] = blocks[4].xorBlocks(d2);
+        }
+
+        fn absorb(state: *State, src: *const [rate]u8) void {
+            const msg0 = AesBlockVec.fromBytes(src[0..aes_block_length]);
+            const msg1 = AesBlockVec.fromBytes(src[aes_block_length..rate]);
+            state.update(msg0, msg1);
+        }
+
+        fn enc(state: *State, dst: *[rate]u8, src: *const [rate]u8) void {
+            const blocks = &state.blocks;
+            const msg0 = AesBlockVec.fromBytes(src[0..aes_block_length]);
+            const msg1 = AesBlockVec.fromBytes(src[aes_block_length..rate]);
+            var tmp0 = msg0.xorBlocks(blocks[6]).xorBlocks(blocks[1]);
+            var tmp1 = msg1.xorBlocks(blocks[2]).xorBlocks(blocks[5]);
+            tmp0 = tmp0.xorBlocks(blocks[2].andBlocks(blocks[3]));
+            tmp1 = tmp1.xorBlocks(blocks[6].andBlocks(blocks[7]));
+            dst[0..aes_block_length].* = tmp0.toBytes();
+            dst[aes_block_length..rate].* = tmp1.toBytes();
+            state.update(msg0, msg1);
+        }
+
+        fn dec(state: *State, dst: *[rate]u8, src: *const [rate]u8) void {
+            const blocks = &state.blocks;
+            var msg0 = AesBlockVec.fromBytes(src[0..aes_block_length]).xorBlocks(blocks[6]).xorBlocks(blocks[1]);
+            var msg1 = AesBlockVec.fromBytes(src[aes_block_length..rate]).xorBlocks(blocks[2]).xorBlocks(blocks[5]);
+            msg0 = msg0.xorBlocks(blocks[2].andBlocks(blocks[3]));
+            msg1 = msg1.xorBlocks(blocks[6].andBlocks(blocks[7]));
+            dst[0..aes_block_length].* = msg0.toBytes();
+            dst[aes_block_length..rate].* = msg1.toBytes();
+            state.update(msg0, msg1);
+        }
+
+        fn decLast(state: *State, dst: []u8, src: []const u8) void {
+            const blocks = &state.blocks;
+            const z0 = blocks[6].xorBlocks(blocks[1]).xorBlocks(blocks[2].andBlocks(blocks[3]));
+            const z1 = blocks[2].xorBlocks(blocks[5]).xorBlocks(blocks[6].andBlocks(blocks[7]));
+            var pad = [_]u8{0} ** rate;
+            pad[0..aes_block_length].* = z0.toBytes();
+            pad[aes_block_length..].* = z1.toBytes();
+            for (pad[0..src.len], src) |*p, x| p.* ^= x;
+            @memcpy(dst, pad[0..src.len]);
+            @memset(pad[src.len..], 0);
+            const msg0 = AesBlockVec.fromBytes(pad[0..aes_block_length]);
+            const msg1 = AesBlockVec.fromBytes(pad[aes_block_length..rate]);
+            state.update(msg0, msg1);
+        }
+
+        fn mac(state: *State, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 {
+            const blocks = &state.blocks;
+            var sizes: [aes_block_length]u8 = undefined;
+            mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little);
+            mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little);
+            for (1..degree) |i| {
+                @memcpy(sizes[i * 16 ..][0..16], sizes[0..16]);
             }
-            i = 0;
-            while (i + 32 <= m.len) : (i += 32) {
-                state.dec(m[i..][0..32], c[i..][0..32]);
+            const tmp = AesBlockVec.fromBytes(&sizes).xorBlocks(blocks[2]);
+            for (0..7) |_| {
+                state.update(tmp, tmp);
             }
-            if (m.len % 32 != 0) {
-                @memset(src[0..], 0);
-                @memcpy(src[0 .. m.len % 32], c[i..][0 .. m.len % 32]);
-                state.dec(&dst, &src);
-                @memcpy(m[i..][0 .. m.len % 32], dst[0 .. m.len % 32]);
-                @memset(dst[0 .. m.len % 32], 0);
-                const blocks = &state.blocks;
-                blocks[0] = blocks[0].xorBlocks(AesBlock.fromBytes(dst[0..16]));
-                blocks[4] = blocks[4].xorBlocks(AesBlock.fromBytes(dst[16..32]));
-            }
-            var computed_tag = state.mac(tag_bits, ad.len, m.len);
-            const verify = crypto.timing_safe.eql([tag_length]u8, computed_tag, tag);
-            if (!verify) {
-                crypto.secureZero(u8, &computed_tag);
-                @memset(m, undefined);
-                return error.AuthenticationFailed;
+            switch (tag_bits) {
+                128 => {
+                    var tag_multi = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]).xorBlocks(blocks[4]).xorBlocks(blocks[5]).xorBlocks(blocks[6]).toBytes();
+                    var tag = tag_multi[0..16].*;
+                    @memcpy(tag[0..], tag_multi[0..16]);
+                    for (1..degree) |d| {
+                        for (0..16) |i| {
+                            tag[i] ^= tag_multi[d * 16 + i];
+                        }
+                    }
+                    return tag;
+                },
+                256 => {
+                    const tag_multi_1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]).toBytes();
+                    const tag_multi_2 = blocks[4].xorBlocks(blocks[5]).xorBlocks(blocks[6]).xorBlocks(blocks[7]).toBytes();
+                    var tag = tag_multi_1[0..16].* ++ tag_multi_2[0..16].*;
+                    for (1..degree) |d| {
+                        for (0..16) |i| {
+                            tag[i] ^= tag_multi_1[d * 16 + i];
+                            tag[i + 16] ^= tag_multi_2[d * 16 + i];
+                        }
+                    }
+                    return tag;
+                },
+                else => unreachable,
             }
         }
     };
 }
 
-const State256 = struct {
-    blocks: [6]AesBlock,
-
-    fn init(key: [32]u8, nonce: [32]u8) State256 {
-        const c1 = AesBlock.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd });
-        const c2 = AesBlock.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 });
-        const key_block1 = AesBlock.fromBytes(key[0..16]);
-        const key_block2 = AesBlock.fromBytes(key[16..32]);
-        const nonce_block1 = AesBlock.fromBytes(nonce[0..16]);
-        const nonce_block2 = AesBlock.fromBytes(nonce[16..32]);
-        const kxn1 = key_block1.xorBlocks(nonce_block1);
-        const kxn2 = key_block2.xorBlocks(nonce_block2);
-        const blocks = [6]AesBlock{
-            kxn1,
-            kxn2,
-            c1,
-            c2,
-            key_block1.xorBlocks(c2),
-            key_block2.xorBlocks(c1),
-        };
-        var state = State256{ .blocks = blocks };
-        var i: usize = 0;
-        while (i < 4) : (i += 1) {
-            state.update(key_block1);
-            state.update(key_block2);
-            state.update(kxn1);
-            state.update(kxn2);
-        }
-        return state;
-    }
-
-    inline fn update(state: *State256, d: AesBlock) void {
-        const blocks = &state.blocks;
-        const tmp = blocks[5].encrypt(blocks[0]);
-        comptime var i: usize = 5;
-        inline while (i > 0) : (i -= 1) {
-            blocks[i] = blocks[i - 1].encrypt(blocks[i]);
-        }
-        blocks[0] = tmp.xorBlocks(d);
-    }
-
-    fn absorb(state: *State256, src: *const [16]u8) void {
-        const msg = AesBlock.fromBytes(src);
-        state.update(msg);
-    }
-
-    fn enc(state: *State256, dst: *[16]u8, src: *const [16]u8) void {
-        const blocks = &state.blocks;
-        const msg = AesBlock.fromBytes(src);
-        var tmp = msg.xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]);
-        tmp = tmp.xorBlocks(blocks[2].andBlocks(blocks[3]));
-        dst.* = tmp.toBytes();
-        state.update(msg);
-    }
-
-    fn dec(state: *State256, dst: *[16]u8, src: *const [16]u8) void {
-        const blocks = &state.blocks;
-        var msg = AesBlock.fromBytes(src).xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]);
-        msg = msg.xorBlocks(blocks[2].andBlocks(blocks[3]));
-        dst.* = msg.toBytes();
-        state.update(msg);
-    }
-
-    fn mac(state: *State256, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 {
-        const blocks = &state.blocks;
-        var sizes: [16]u8 = undefined;
-        mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little);
-        mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little);
-        const tmp = AesBlock.fromBytes(&sizes).xorBlocks(blocks[3]);
-        var i: usize = 0;
-        while (i < 7) : (i += 1) {
-            state.update(tmp);
-        }
-        return switch (tag_bits) {
-            128 => blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3])
-                .xorBlocks(blocks[4]).xorBlocks(blocks[5]).toBytes(),
-            256 => tag: {
-                const t1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]);
-                const t2 = blocks[3].xorBlocks(blocks[4]).xorBlocks(blocks[5]);
-                break :tag t1.toBytes() ++ t2.toBytes();
-            },
-            else => unreachable,
-        };
-    }
-};
-
 /// AEGIS is a very fast authenticated encryption system built on top of the core AES function.
 ///
-/// The 256 bit variant of AEGIS has a 256 bit key, a 256 bit nonce, and processes 128 bit message blocks.
+/// The 128 bits variants of AEGIS have a 128 bit key and a 128 bit nonce.
 ///
 /// https://datatracker.ietf.org/doc/draft-irtf-cfrg-aegis-aead/
-fn Aegis256Generic(comptime tag_bits: u9) type {
+fn Aegis128XGeneric(comptime degree: u7, comptime tag_bits: u9) type {
+    comptime assert(degree > 0); // degree must be greater than 0
     comptime assert(tag_bits == 128 or tag_bits == 256); // tag must be 128 or 256 bits
 
     return struct {
-        pub const tag_length = tag_bits / 8;
-        pub const nonce_length = 32;
-        pub const key_length = 32;
-        pub const block_length = 16;
+        const State = State128X(degree);
 
-        const State = State256;
+        pub const tag_length = tag_bits / 8;
+        pub const nonce_length = 16;
+        pub const key_length = 16;
+        pub const block_length = State.rate;
+
+        const alignment = State.alignment;
 
         /// c: ciphertext: output buffer should be of size m.len
         /// tag: authentication tag: output MAC
@@ -328,27 +230,27 @@ fn Aegis256Generic(comptime tag_bits: u9) type {
         /// k: private key
         pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) void {
             assert(c.len == m.len);
-            var state = State256.init(key, npub);
-            var src: [16]u8 align(16) = undefined;
-            var dst: [16]u8 align(16) = undefined;
+            var state = State.init(key, npub);
+            var src: [block_length]u8 align(alignment) = undefined;
+            var dst: [block_length]u8 align(alignment) = undefined;
             var i: usize = 0;
-            while (i + 16 <= ad.len) : (i += 16) {
-                state.enc(&dst, ad[i..][0..16]);
+            while (i + block_length <= ad.len) : (i += block_length) {
+                state.absorb(ad[i..][0..block_length]);
             }
-            if (ad.len % 16 != 0) {
+            if (ad.len % block_length != 0) {
                 @memset(src[0..], 0);
-                @memcpy(src[0 .. ad.len % 16], ad[i..][0 .. ad.len % 16]);
-                state.enc(&dst, &src);
+                @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]);
+                state.absorb(&src);
             }
             i = 0;
-            while (i + 16 <= m.len) : (i += 16) {
-                state.enc(c[i..][0..16], m[i..][0..16]);
+            while (i + block_length <= m.len) : (i += block_length) {
+                state.enc(c[i..][0..block_length], m[i..][0..block_length]);
             }
-            if (m.len % 16 != 0) {
+            if (m.len % block_length != 0) {
                 @memset(src[0..], 0);
-                @memcpy(src[0 .. m.len % 16], m[i..][0 .. m.len % 16]);
+                @memcpy(src[0 .. m.len % block_length], m[i..][0 .. m.len % block_length]);
                 state.enc(&dst, &src);
-                @memcpy(c[i..][0 .. m.len % 16], dst[0 .. m.len % 16]);
+                @memcpy(c[i..][0 .. m.len % block_length], dst[0 .. m.len % block_length]);
             }
             tag.* = state.mac(tag_bits, ad.len, m.len);
         }
@@ -364,30 +266,23 @@ fn Aegis256Generic(comptime tag_bits: u9) type {
         /// Contents of `m` are undefined if an error is returned.
         pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) AuthenticationError!void {
             assert(c.len == m.len);
-            var state = State256.init(key, npub);
-            var src: [16]u8 align(16) = undefined;
-            var dst: [16]u8 align(16) = undefined;
+            var state = State.init(key, npub);
+            var src: [block_length]u8 align(alignment) = undefined;
             var i: usize = 0;
-            while (i + 16 <= ad.len) : (i += 16) {
-                state.enc(&dst, ad[i..][0..16]);
+            while (i + block_length <= ad.len) : (i += block_length) {
+                state.absorb(ad[i..][0..block_length]);
             }
-            if (ad.len % 16 != 0) {
+            if (ad.len % block_length != 0) {
                 @memset(src[0..], 0);
-                @memcpy(src[0 .. ad.len % 16], ad[i..][0 .. ad.len % 16]);
-                state.enc(&dst, &src);
+                @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]);
+                state.absorb(&src);
             }
             i = 0;
-            while (i + 16 <= m.len) : (i += 16) {
-                state.dec(m[i..][0..16], c[i..][0..16]);
+            while (i + block_length <= m.len) : (i += block_length) {
+                state.dec(m[i..][0..block_length], c[i..][0..block_length]);
             }
-            if (m.len % 16 != 0) {
-                @memset(src[0..], 0);
-                @memcpy(src[0 .. m.len % 16], c[i..][0 .. m.len % 16]);
-                state.dec(&dst, &src);
-                @memcpy(m[i..][0 .. m.len % 16], dst[0 .. m.len % 16]);
-                @memset(dst[0 .. m.len % 16], 0);
-                const blocks = &state.blocks;
-                blocks[0] = blocks[0].xorBlocks(AesBlock.fromBytes(&dst));
+            if (m.len % block_length != 0) {
+                state.decLast(m[i..], c[i..]);
             }
             var computed_tag = state.mac(tag_bits, ad.len, m.len);
             const verify = crypto.timing_safe.eql([tag_length]u8, computed_tag, tag);
@@ -400,6 +295,264 @@ fn Aegis256Generic(comptime tag_bits: u9) type {
     };
 }
 
+fn State256X(comptime degree: u7) type {
+    return struct {
+        const AesBlockVec = crypto.core.aes.BlockVec(degree);
+        const State = @This();
+
+        blocks: [6]AesBlockVec,
+
+        const aes_block_length = AesBlockVec.block_length;
+        const rate = aes_block_length;
+        const alignment = AesBlockVec.native_word_size;
+
+        fn init(key: [32]u8, nonce: [32]u8) State {
+            const c1 = AesBlockVec.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd } ** degree);
+            const c2 = AesBlockVec.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 } ** degree);
+            const key_block1 = AesBlockVec.fromBytes(key[0..16] ** degree);
+            const key_block2 = AesBlockVec.fromBytes(key[16..32] ** degree);
+            const nonce_block1 = AesBlockVec.fromBytes(nonce[0..16] ** degree);
+            const nonce_block2 = AesBlockVec.fromBytes(nonce[16..32] ** degree);
+            const kxn1 = key_block1.xorBlocks(nonce_block1);
+            const kxn2 = key_block2.xorBlocks(nonce_block2);
+            const blocks = [6]AesBlockVec{
+                kxn1,
+                kxn2,
+                c1,
+                c2,
+                key_block1.xorBlocks(c2),
+                key_block2.xorBlocks(c1),
+            };
+            var state = State{ .blocks = blocks };
+            if (degree > 1) {
+                const context_block = ctx: {
+                    var contexts_bytes = [_]u8{0} ** aes_block_length;
+                    for (0..degree) |i| {
+                        contexts_bytes[i * 16] = @intCast(i);
+                        contexts_bytes[i * 16 + 1] = @intCast(degree - 1);
+                    }
+                    break :ctx AesBlockVec.fromBytes(&contexts_bytes);
+                };
+                for (0..4) |_| {
+                    state.blocks[3] = state.blocks[3].xorBlocks(context_block);
+                    state.blocks[5] = state.blocks[5].xorBlocks(context_block);
+                    state.update(key_block1);
+                    state.blocks[3] = state.blocks[3].xorBlocks(context_block);
+                    state.blocks[5] = state.blocks[5].xorBlocks(context_block);
+                    state.update(key_block2);
+                    state.blocks[3] = state.blocks[3].xorBlocks(context_block);
+                    state.blocks[5] = state.blocks[5].xorBlocks(context_block);
+                    state.update(kxn1);
+                    state.blocks[3] = state.blocks[3].xorBlocks(context_block);
+                    state.blocks[5] = state.blocks[5].xorBlocks(context_block);
+                    state.update(kxn2);
+                }
+            } else {
+                for (0..4) |_| {
+                    state.update(key_block1);
+                    state.update(key_block2);
+                    state.update(kxn1);
+                    state.update(kxn2);
+                }
+            }
+            return state;
+        }
+
+        inline fn update(state: *State, d: AesBlockVec) void {
+            const blocks = &state.blocks;
+            const tmp = blocks[5].encrypt(blocks[0]);
+            comptime var i: usize = 5;
+            inline while (i > 0) : (i -= 1) {
+                blocks[i] = blocks[i - 1].encrypt(blocks[i]);
+            }
+            blocks[0] = tmp.xorBlocks(d);
+        }
+
+        fn absorb(state: *State, src: *const [rate]u8) void {
+            const msg = AesBlockVec.fromBytes(src);
+            state.update(msg);
+        }
+
+        fn enc(state: *State, dst: *[rate]u8, src: *const [rate]u8) void {
+            const blocks = &state.blocks;
+            const msg = AesBlockVec.fromBytes(src);
+            var tmp = msg.xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]);
+            tmp = tmp.xorBlocks(blocks[2].andBlocks(blocks[3]));
+            dst.* = tmp.toBytes();
+            state.update(msg);
+        }
+
+        fn dec(state: *State, dst: *[rate]u8, src: *const [rate]u8) void {
+            const blocks = &state.blocks;
+            var msg = AesBlockVec.fromBytes(src).xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]);
+            msg = msg.xorBlocks(blocks[2].andBlocks(blocks[3]));
+            dst.* = msg.toBytes();
+            state.update(msg);
+        }
+
+        fn decLast(state: *State, dst: []u8, src: []const u8) void {
+            const blocks = &state.blocks;
+            const z = blocks[5].xorBlocks(blocks[4]).xorBlocks(blocks[1]).xorBlocks(blocks[2].andBlocks(blocks[3]));
+            var pad = z.toBytes();
+            for (pad[0..src.len], src) |*p, x| p.* ^= x;
+            @memcpy(dst, pad[0..src.len]);
+            @memset(pad[src.len..], 0);
+            const msg = AesBlockVec.fromBytes(pad[0..]);
+            state.update(msg);
+        }
+
+        fn mac(state: *State, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 {
+            const blocks = &state.blocks;
+            var sizes: [aes_block_length]u8 = undefined;
+            mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little);
+            mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little);
+            for (1..degree) |i| {
+                @memcpy(sizes[i * 16 ..][0..16], sizes[0..16]);
+            }
+            const tmp = AesBlockVec.fromBytes(&sizes).xorBlocks(blocks[3]);
+            for (0..7) |_| {
+                state.update(tmp);
+            }
+            switch (tag_bits) {
+                128 => {
+                    var tag_multi = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]).xorBlocks(blocks[4]).xorBlocks(blocks[5]).toBytes();
+                    var tag = tag_multi[0..16].*;
+                    @memcpy(tag[0..], tag_multi[0..16]);
+                    for (1..degree) |d| {
+                        for (0..16) |i| {
+                            tag[i] ^= tag_multi[d * 16 + i];
+                        }
+                    }
+                    return tag;
+                },
+                256 => {
+                    const tag_multi_1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).toBytes();
+                    const tag_multi_2 = blocks[3].xorBlocks(blocks[4]).xorBlocks(blocks[5]).toBytes();
+                    var tag = tag_multi_1[0..16].* ++ tag_multi_2[0..16].*;
+                    for (1..degree) |d| {
+                        for (0..16) |i| {
+                            tag[i] ^= tag_multi_1[d * 16 + i];
+                            tag[i + 16] ^= tag_multi_2[d * 16 + i];
+                        }
+                    }
+                    return tag;
+                },
+                else => unreachable,
+            }
+        }
+    };
+}
+
+/// AEGIS is a very fast authenticated encryption system built on top of the core AES function.
+///
+/// The 256 bits variants of AEGIS have a 256 bit key and a 256 bit nonce.
+///
+/// https://datatracker.ietf.org/doc/draft-irtf-cfrg-aegis-aead/
+fn Aegis256XGeneric(comptime degree: u7, comptime tag_bits: u9) type {
+    comptime assert(degree > 0); // degree must be greater than 0
+    comptime assert(tag_bits == 128 or tag_bits == 256); // tag must be 128 or 256 bits
+
+    return struct {
+        const State = State256X(degree);
+
+        pub const tag_length = tag_bits / 8;
+        pub const nonce_length = 32;
+        pub const key_length = 32;
+        pub const block_length = State.rate;
+
+        const alignment = State.alignment;
+
+        /// c: ciphertext: output buffer should be of size m.len
+        /// tag: authentication tag: output MAC
+        /// m: message
+        /// ad: Associated Data
+        /// npub: public nonce
+        /// k: private key
+        pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) void {
+            assert(c.len == m.len);
+            var state = State.init(key, npub);
+            var src: [block_length]u8 align(alignment) = undefined;
+            var dst: [block_length]u8 align(alignment) = undefined;
+            var i: usize = 0;
+            while (i + block_length <= ad.len) : (i += block_length) {
+                state.enc(&dst, ad[i..][0..block_length]);
+            }
+            if (ad.len % block_length != 0) {
+                @memset(src[0..], 0);
+                @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]);
+                state.enc(&dst, &src);
+            }
+            i = 0;
+            while (i + block_length <= m.len) : (i += block_length) {
+                state.enc(c[i..][0..block_length], m[i..][0..block_length]);
+            }
+            if (m.len % block_length != 0) {
+                @memset(src[0..], 0);
+                @memcpy(src[0 .. m.len % block_length], m[i..][0 .. m.len % block_length]);
+                state.enc(&dst, &src);
+                @memcpy(c[i..][0 .. m.len % block_length], dst[0 .. m.len % block_length]);
+            }
+            tag.* = state.mac(tag_bits, ad.len, m.len);
+        }
+
+        /// `m`: Message
+        /// `c`: Ciphertext
+        /// `tag`: Authentication tag
+        /// `ad`: Associated data
+        /// `npub`: Public nonce
+        /// `k`: Private key
+        /// Asserts `c.len == m.len`.
+        ///
+        /// Contents of `m` are undefined if an error is returned.
+        pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) AuthenticationError!void {
+            assert(c.len == m.len);
+            var state = State.init(key, npub);
+            var src: [block_length]u8 align(alignment) = undefined;
+            var i: usize = 0;
+            while (i + block_length <= ad.len) : (i += block_length) {
+                state.absorb(ad[i..][0..block_length]);
+            }
+            if (ad.len % block_length != 0) {
+                @memset(src[0..], 0);
+                @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]);
+                state.absorb(&src);
+            }
+            i = 0;
+            while (i + block_length <= m.len) : (i += block_length) {
+                state.dec(m[i..][0..block_length], c[i..][0..block_length]);
+            }
+            if (m.len % block_length != 0) {
+                state.decLast(m[i..], c[i..]);
+            }
+            var computed_tag = state.mac(tag_bits, ad.len, m.len);
+            const verify = crypto.timing_safe.eql([tag_length]u8, computed_tag, tag);
+            if (!verify) {
+                crypto.secureZero(u8, &computed_tag);
+                @memset(m, undefined);
+                return error.AuthenticationFailed;
+            }
+        }
+    };
+}
+
+/// The `Aegis128X4Mac` message authentication function outputs 256 bit tags.
+/// In addition to being extremely fast, its large state, non-linearity
+/// and non-invertibility provides the following properties:
+/// - 128 bit security, stronger than GHash/Polyval/Poly1305.
+/// - Recovering the secret key from the state would require ~2^128 attempts,
+///   which is infeasible for any practical adversary.
+/// - It has a large security margin against internal collisions.
+pub const Aegis128X4Mac = AegisMac(Aegis128X4_256);
+
+/// The `Aegis128X2Mac` message authentication function outputs 256 bit tags.
+/// In addition to being extremely fast, its large state, non-linearity
+/// and non-invertibility provides the following properties:
+/// - 128 bit security, stronger than GHash/Polyval/Poly1305.
+/// - Recovering the secret key from the state would require ~2^128 attempts,
+///   which is infeasible for any practical adversary.
+/// - It has a large security margin against internal collisions.
+pub const Aegis128X2Mac = AegisMac(Aegis128X2_256);
+
 /// The `Aegis128LMac` message authentication function outputs 256 bit tags.
 /// In addition to being extremely fast, its large state, non-linearity
 /// and non-invertibility provides the following properties:
@@ -409,34 +562,60 @@ fn Aegis256Generic(comptime tag_bits: u9) type {
 /// - It has a large security margin against internal collisions.
 pub const Aegis128LMac = AegisMac(Aegis128L_256);
 
+/// The `Aegis256X4Mac` message authentication function has a 256-bit key size,
+/// and outputs 256 bit tags. Unless theoretical multi-target attacks are a
+/// concern, the AEGIS-128L variant should be preferred.
+/// AEGIS' large state, non-linearity and non-invertibility provides the
+/// following properties:
+/// - 256 bit security against forgery.
+/// - Recovering the secret key from the state would require ~2^256 attempts,
+///   which is infeasible for any practical adversary.
+/// - It has a large security margin against internal collisions.
+pub const Aegis256X4Mac = AegisMac(Aegis256X4_256);
+
+/// The `Aegis256X2Mac` message authentication function has a 256-bit key size,
+/// and outputs 256 bit tags. Unless theoretical multi-target attacks are a
+/// concern, the AEGIS-128L variant should be preferred.
+/// AEGIS' large state, non-linearity and non-invertibility provides the
+/// following properties:
+/// - 256 bit security against forgery.
+/// - Recovering the secret key from the state would require ~2^256 attempts,
+///   which is infeasible for any practical adversary.
+/// - It has a large security margin against internal collisions.
+pub const Aegis256X2Mac = AegisMac(Aegis256X2_256);
+
 /// The `Aegis256Mac` message authentication function has a 256-bit key size,
 /// and outputs 256 bit tags. Unless theoretical multi-target attacks are a
 /// concern, the AEGIS-128L variant should be preferred.
 /// AEGIS' large state, non-linearity and non-invertibility provides the
 /// following properties:
-/// - More than 128 bit security against forgery.
+/// - 256 bit security against forgery.
 /// - Recovering the secret key from the state would require ~2^256 attempts,
 ///   which is infeasible for any practical adversary.
 /// - It has a large security margin against internal collisions.
 pub const Aegis256Mac = AegisMac(Aegis256_256);
 
-/// Aegis128L MAC with a 128-bit output.
-/// A MAC with a 128-bit output is not safe unless the number of messages
-/// authenticated with the same key remains small.
-/// After 2^48 messages, the probability of a collision is already ~ 2^-33.
-/// If unsure, use the  Aegis128LMac type, that has a 256 bit output.
+/// AEGIS-128X4 MAC with 128-bit tags
+pub const Aegis128X4Mac_128 = AegisMac(Aegis128X4);
+
+/// AEGIS-128X2 MAC with 128-bit tags
+pub const Aegis128X2Mac_128 = AegisMac(Aegis128X2);
+
+/// AEGIS-128L MAC with 128-bit tags
 pub const Aegis128LMac_128 = AegisMac(Aegis128L);
 
-/// Aegis256 MAC with a 128-bit output.
-/// A MAC with a 128-bit output is not safe unless the number of messages
-/// authenticated with the same key remains small.
-/// After 2^48 messages, the probability of a collision is already ~ 2^-33.
-/// If unsure, use the  Aegis256Mac type, that has a 256 bit output.
+/// AEGIS-256X4 MAC with 128-bit tags
+pub const Aegis256X4Mac_128 = AegisMac(Aegis256X4);
+
+/// AEGIS-256X2 MAC with 128-bit tags
+pub const Aegis256X2Mac_128 = AegisMac(Aegis256X2);
+
+/// AEGIS-256 MAC with 128-bit tags
 pub const Aegis256Mac_128 = AegisMac(Aegis256);
 
 fn AegisMac(comptime T: type) type {
     return struct {
-        const Self = @This();
+        const Mac = @This();
 
         pub const mac_length = T.tag_length;
         pub const key_length = T.key_length;
@@ -448,15 +627,15 @@ fn AegisMac(comptime T: type) type {
         msg_len: usize = 0,
 
         /// Initialize a state for the MAC function
-        pub fn init(key: *const [key_length]u8) Self {
+        pub fn init(key: *const [key_length]u8) Mac {
             const nonce = [_]u8{0} ** T.nonce_length;
-            return Self{
+            return Mac{
                 .state = T.State.init(key.*, nonce),
             };
         }
 
         /// Add data to the state
-        pub fn update(self: *Self, b: []const u8) void {
+        pub fn update(self: *Mac, b: []const u8) void {
             self.msg_len += b.len;
 
             const len_partial = @min(b.len, block_length - self.off);
@@ -469,6 +648,10 @@ fn AegisMac(comptime T: type) type {
 
             var i = len_partial;
             self.off = 0;
+            while (i + block_length * 2 <= b.len) : (i += block_length * 2) {
+                self.state.absorb(b[i..][0..block_length]);
+                self.state.absorb(b[i..][block_length .. block_length * 2]);
+            }
             while (i + block_length <= b.len) : (i += block_length) {
                 self.state.absorb(b[i..][0..block_length]);
             }
@@ -479,7 +662,7 @@ fn AegisMac(comptime T: type) type {
         }
 
         /// Return an authentication tag for the current state
-        pub fn final(self: *Self, out: *[mac_length]u8) void {
+        pub fn final(self: *Mac, out: *[mac_length]u8) void {
             if (self.off > 0) {
                 var pad = [_]u8{0} ** block_length;
                 @memcpy(pad[0..self.off], self.buf[0..self.off]);
@@ -490,20 +673,20 @@ fn AegisMac(comptime T: type) type {
 
         /// Return an authentication tag for a message and a key
         pub fn create(out: *[mac_length]u8, msg: []const u8, key: *const [key_length]u8) void {
-            var ctx = Self.init(key);
+            var ctx = Mac.init(key);
             ctx.update(msg);
             ctx.final(out);
         }
 
         pub const Error = error{};
-        pub const Writer = std.io.Writer(*Self, Error, write);
+        pub const Writer = std.io.Writer(*Mac, Error, write);
 
-        fn write(self: *Self, bytes: []const u8) Error!usize {
+        fn write(self: *Mac, bytes: []const u8) Error!usize {
             self.update(bytes);
             return bytes.len;
         }
 
-        pub fn writer(self: *Self) Writer {
+        pub fn writer(self: *Mac) Writer {
             return .{ .context = self };
         }
     };
@@ -568,6 +751,23 @@ test "Aegis128L test vector 3" {
     try htest.assertEqual("83cc600dc4e3e7e62d4055826174f149", &tag);
 }
 
+test "Aegis128X2 test vector 1" {
+    const key: [Aegis128X2.key_length]u8 = [_]u8{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f };
+    const nonce: [Aegis128X2.nonce_length]u8 = [_]u8{ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f };
+    var empty = [_]u8{};
+    var tag: [Aegis128X2.tag_length]u8 = undefined;
+    var tag256: [Aegis128X2_256.tag_length]u8 = undefined;
+
+    Aegis128X2.encrypt(&empty, &tag, &empty, &empty, nonce, key);
+    Aegis128X2_256.encrypt(&empty, &tag256, &empty, &empty, nonce, key);
+    try htest.assertEqual("63117dc57756e402819a82e13eca8379", &tag);
+    try htest.assertEqual("b92c71fdbd358b8a4de70b27631ace90cffd9b9cfba82028412bac41b4f53759", &tag256);
+    tag[0] +%= 1;
+    try testing.expectError(error.AuthenticationFailed, Aegis128X2.decrypt(&empty, &empty, tag, &empty, nonce, key));
+    tag256[0] +%= 1;
+    try testing.expectError(error.AuthenticationFailed, Aegis128X2_256.decrypt(&empty, &empty, tag256, &empty, nonce, key));
+}
+
 test "Aegis256 test vector 1" {
     const key: [Aegis256.key_length]u8 = [_]u8{ 0x10, 0x01 } ++ [_]u8{0x00} ** 30;
     const nonce: [Aegis256.nonce_length]u8 = [_]u8{ 0x10, 0x00, 0x02 } ++ [_]u8{0x00} ** 29;
@@ -624,6 +824,23 @@ test "Aegis256 test vector 3" {
     try htest.assertEqual("f7a0878f68bd083e8065354071fc27c3", &tag);
 }
 
+test "Aegis256X4 test vector 1" {
+    const key: [Aegis256X4.key_length]u8 = [_]u8{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f };
+    const nonce: [Aegis256X4.nonce_length]u8 = [_]u8{ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f };
+    var empty = [_]u8{};
+    var tag: [Aegis256X4.tag_length]u8 = undefined;
+    var tag256: [Aegis256X4_256.tag_length]u8 = undefined;
+
+    Aegis256X4.encrypt(&empty, &tag, &empty, &empty, nonce, key);
+    Aegis256X4_256.encrypt(&empty, &tag256, &empty, &empty, nonce, key);
+    try htest.assertEqual("3b7fee6cee7bf17888ad11ed2397beb4", &tag);
+    try htest.assertEqual("6093a1a8aab20ec635dc1ca71745b01b5bec4fc444c9ffbebd710d4a34d20eaf", &tag256);
+    tag[0] +%= 1;
+    try testing.expectError(error.AuthenticationFailed, Aegis256X4.decrypt(&empty, &empty, tag, &empty, nonce, key));
+    tag256[0] +%= 1;
+    try testing.expectError(error.AuthenticationFailed, Aegis256X4_256.decrypt(&empty, &empty, tag256, &empty, nonce, key));
+}
+
 test "Aegis MAC" {
     const key = [_]u8{0x00} ** Aegis128LMac.key_length;
     var msg: [64]u8 = undefined;
diff --git a/lib/std/crypto/aes.zig b/lib/std/crypto/aes.zig
index 5e5ae04b58..d14b82c937 100644
--- a/lib/std/crypto/aes.zig
+++ b/lib/std/crypto/aes.zig
@@ -22,6 +22,7 @@ pub const has_hardware_support =
     (builtin.cpu.arch == .aarch64 and has_armaes);
 
 pub const Block = impl.Block;
+pub const BlockVec = impl.BlockVec;
 pub const AesEncryptCtx = impl.AesEncryptCtx;
 pub const AesDecryptCtx = impl.AesDecryptCtx;
 pub const Aes128 = impl.Aes128;
diff --git a/lib/std/crypto/aes/aesni.zig b/lib/std/crypto/aes/aesni.zig
index e0893cfba8..2793ff4184 100644
--- a/lib/std/crypto/aes/aesni.zig
+++ b/lib/std/crypto/aes/aesni.zig
@@ -2,18 +2,23 @@ const std = @import("../../std.zig");
 const builtin = @import("builtin");
 const mem = std.mem;
 const debug = std.debug;
-const BlockVec = @Vector(2, u64);
+
+const has_vaes = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .vaes);
+const has_avx512f = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
 
 /// A single AES block.
 pub const Block = struct {
+    const Repr = @Vector(2, u64);
+
+    /// The length of an AES block in bytes.
     pub const block_length: usize = 16;
 
     /// Internal representation of a block.
-    repr: BlockVec,
+    repr: Repr,
 
     /// Convert a byte sequence into an internal representation.
     pub inline fn fromBytes(bytes: *const [16]u8) Block {
-        const repr = mem.bytesToValue(BlockVec, bytes);
+        const repr = mem.bytesToValue(Repr, bytes);
         return Block{ .repr = repr };
     }
 
@@ -33,7 +38,7 @@ pub const Block = struct {
         return Block{
             .repr = asm (
                 \\ vaesenc %[rk], %[in], %[out]
-                : [out] "=x" (-> BlockVec),
+                : [out] "=x" (-> Repr),
                 : [in] "x" (block.repr),
                   [rk] "x" (round_key.repr),
             ),
@@ -45,7 +50,7 @@ pub const Block = struct {
         return Block{
             .repr = asm (
                 \\ vaesenclast %[rk], %[in], %[out]
-                : [out] "=x" (-> BlockVec),
+                : [out] "=x" (-> Repr),
                 : [in] "x" (block.repr),
                   [rk] "x" (round_key.repr),
             ),
@@ -57,7 +62,7 @@ pub const Block = struct {
         return Block{
             .repr = asm (
                 \\ vaesdec %[rk], %[in], %[out]
-                : [out] "=x" (-> BlockVec),
+                : [out] "=x" (-> Repr),
                 : [in] "x" (block.repr),
                   [rk] "x" (inv_round_key.repr),
             ),
@@ -69,7 +74,7 @@ pub const Block = struct {
         return Block{
             .repr = asm (
                 \\ vaesdeclast %[rk], %[in], %[out]
-                : [out] "=x" (-> BlockVec),
+                : [out] "=x" (-> Repr),
                 : [in] "x" (block.repr),
                   [rk] "x" (inv_round_key.repr),
             ),
@@ -168,17 +173,158 @@ pub const Block = struct {
     };
 };
 
+/// A fixed-size vector of AES blocks.
+/// All operations are performed in parallel, using SIMD instructions when available.
+pub fn BlockVec(comptime blocks_count: comptime_int) type {
+    return struct {
+        const Self = @This();
+
+        /// The number of AES blocks the target architecture can process with a single instruction.
+        pub const native_vector_size = w: {
+            if (has_avx512f and blocks_count % 4 == 0) break :w 4;
+            if (has_vaes and blocks_count % 2 == 0) break :w 2;
+            break :w 1;
+        };
+
+        /// The size of the AES block vector that the target architecture can process with a single instruction, in bytes.
+        pub const native_word_size = native_vector_size * 16;
+
+        const native_words = blocks_count / native_vector_size;
+
+        const Repr = @Vector(native_vector_size * 2, u64);
+
+        /// Internal representation of a block vector.
+        repr: [native_words]Repr,
+
+        /// Length of the block vector in bytes.
+        pub const block_length: usize = blocks_count * 16;
+
+        /// Convert a byte sequence into an internal representation.
+        pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = mem.bytesToValue(Repr, bytes[i * native_word_size ..][0..native_word_size]);
+            }
+            return out;
+        }
+
+        /// Convert the internal representation of a block vector into a byte sequence.
+        pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 {
+            var out: [blocks_count * 16]u8 = undefined;
+            inline for (0..native_words) |i| {
+                out[i * native_word_size ..][0..native_word_size].* = mem.toBytes(block_vec.repr[i]);
+            }
+            return out;
+        }
+
+        /// XOR the block vector with a byte sequence.
+        pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [blocks_count * 16]u8 {
+            var x: Self = undefined;
+            inline for (0..native_words) |i| {
+                x.repr[i] = block_vec.repr[i] ^ mem.bytesToValue(Repr, bytes[i * native_word_size ..][0..native_word_size]);
+            }
+            return x.toBytes();
+        }
+
+        /// Apply the forward AES operation to the block vector with a vector of round keys.
+        pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = asm (
+                    \\ vaesenc %[rk], %[in], %[out]
+                    : [out] "=x" (-> Repr),
+                    : [in] "x" (block_vec.repr[i]),
+                      [rk] "x" (round_key_vec.repr[i]),
+                );
+            }
+            return out;
+        }
+
+        /// Apply the forward AES operation to the block vector with a vector of last round keys.
+        pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = asm (
+                    \\ vaesenclast %[rk], %[in], %[out]
+                    : [out] "=x" (-> Repr),
+                    : [in] "x" (block_vec.repr[i]),
+                      [rk] "x" (round_key_vec.repr[i]),
+                );
+            }
+            return out;
+        }
+
+        /// Apply the inverse AES operation to the block vector with a vector of round keys.
+        pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = asm (
+                    \\ vaesdec %[rk], %[in], %[out]
+                    : [out] "=x" (-> Repr),
+                    : [in] "x" (block_vec.repr[i]),
+                      [rk] "x" (inv_round_key_vec.repr[i]),
+                );
+            }
+            return out;
+        }
+
+        /// Apply the inverse AES operation to the block vector with a vector of last round keys.
+        pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = asm (
+                    \\ vaesdeclast %[rk], %[in], %[out]
+                    : [out] "=x" (-> Repr),
+                    : [in] "x" (block_vec.repr[i]),
+                      [rk] "x" (inv_round_key_vec.repr[i]),
+                );
+            }
+            return out;
+        }
+
+        /// Apply the bitwise XOR operation to the content of two block vectors.
+        pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = block_vec1.repr[i] ^ block_vec2.repr[i];
+            }
+            return out;
+        }
+
+        /// Apply the bitwise AND operation to the content of two block vectors.
+        pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = block_vec1.repr[i] & block_vec2.repr[i];
+            }
+            return out;
+        }
+
+        /// Apply the bitwise OR operation to the content of two block vectors.
+        pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = block_vec1.repr[i] | block_vec2.repr[i];
+            }
+            return out;
+        }
+    };
+}
+
 fn KeySchedule(comptime Aes: type) type {
     std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14);
     const rounds = Aes.rounds;
 
     return struct {
         const Self = @This();
+
+        const Repr = Aes.block.Repr;
+
         round_keys: [rounds + 1]Block,
 
-        fn drc(comptime second: bool, comptime rc: u8, t: BlockVec, tx: BlockVec) BlockVec {
-            var s: BlockVec = undefined;
-            var ts: BlockVec = undefined;
+        fn drc(comptime second: bool, comptime rc: u8, t: Repr, tx: Repr) Repr {
+            var s: Repr = undefined;
+            var ts: Repr = undefined;
             return asm (
                 \\ vaeskeygenassist %[rc], %[t], %[s]
                 \\ vpslldq $4, %[tx], %[ts]
@@ -187,7 +333,7 @@ fn KeySchedule(comptime Aes: type) type {
                 \\ vpxor   %[ts], %[r], %[r]
                 \\ vpshufd %[mask], %[s], %[ts]
                 \\ vpxor   %[ts], %[r], %[r]
-                : [r] "=&x" (-> BlockVec),
+                : [r] "=&x" (-> Repr),
                   [s] "=&x" (s),
                   [ts] "=&x" (ts),
                 : [rc] "n" (rc),
@@ -234,7 +380,7 @@ fn KeySchedule(comptime Aes: type) type {
                 inv_round_keys[i] = Block{
                     .repr = asm (
                         \\ vaesimc %[rk], %[inv_rk]
-                        : [inv_rk] "=x" (-> BlockVec),
+                        : [inv_rk] "=x" (-> Repr),
                         : [rk] "x" (round_keys[rounds - i].repr),
                     ),
                 };
diff --git a/lib/std/crypto/aes/armcrypto.zig b/lib/std/crypto/aes/armcrypto.zig
index a6574c372a..2487ab7e72 100644
--- a/lib/std/crypto/aes/armcrypto.zig
+++ b/lib/std/crypto/aes/armcrypto.zig
@@ -1,18 +1,19 @@
 const std = @import("../../std.zig");
 const mem = std.mem;
 const debug = std.debug;
-const BlockVec = @Vector(2, u64);
 
 /// A single AES block.
 pub const Block = struct {
+    const Repr = @Vector(2, u64);
+
     pub const block_length: usize = 16;
 
     /// Internal representation of a block.
-    repr: BlockVec,
+    repr: Repr,
 
     /// Convert a byte sequence into an internal representation.
     pub inline fn fromBytes(bytes: *const [16]u8) Block {
-        const repr = mem.bytesToValue(BlockVec, bytes);
+        const repr = mem.bytesToValue(Repr, bytes);
         return Block{ .repr = repr };
     }
 
@@ -36,7 +37,7 @@ pub const Block = struct {
                 \\ mov   %[out].16b, %[in].16b
                 \\ aese  %[out].16b, %[zero].16b
                 \\ aesmc %[out].16b, %[out].16b
-                : [out] "=&x" (-> BlockVec),
+                : [out] "=&x" (-> Repr),
                 : [in] "x" (block.repr),
                   [zero] "x" (zero),
             )) ^ round_key.repr,
@@ -49,7 +50,7 @@ pub const Block = struct {
             .repr = (asm (
                 \\ mov   %[out].16b, %[in].16b
                 \\ aese  %[out].16b, %[zero].16b
-                : [out] "=&x" (-> BlockVec),
+                : [out] "=&x" (-> Repr),
                 : [in] "x" (block.repr),
                   [zero] "x" (zero),
             )) ^ round_key.repr,
@@ -63,7 +64,7 @@ pub const Block = struct {
                 \\ mov   %[out].16b, %[in].16b
                 \\ aesd  %[out].16b, %[zero].16b
                 \\ aesimc %[out].16b, %[out].16b
-                : [out] "=&x" (-> BlockVec),
+                : [out] "=&x" (-> Repr),
                 : [in] "x" (block.repr),
                   [zero] "x" (zero),
             )) ^ inv_round_key.repr,
@@ -76,7 +77,7 @@ pub const Block = struct {
             .repr = (asm (
                 \\ mov   %[out].16b, %[in].16b
                 \\ aesd  %[out].16b, %[zero].16b
-                : [out] "=&x" (-> BlockVec),
+                : [out] "=&x" (-> Repr),
                 : [in] "x" (block.repr),
                   [zero] "x" (zero),
             )) ^ inv_round_key.repr,
@@ -165,6 +166,118 @@ pub const Block = struct {
     };
 };
 
+/// A fixed-size vector of AES blocks.
+/// All operations are performed in parallel, using SIMD instructions when available.
+pub fn BlockVec(comptime blocks_count: comptime_int) type {
+    return struct {
+        const Self = @This();
+
+        /// The number of AES blocks the target architecture can process with a single instruction.
+        pub const native_vector_size = 1;
+
+        /// The size of the AES block vector that the target architecture can process with a single instruction, in bytes.
+        pub const native_word_size = native_vector_size * 16;
+
+        const native_words = blocks_count;
+
+        /// Internal representation of a block vector.
+        repr: [native_words]Block,
+
+        /// Length of the block vector in bytes.
+        pub const block_length: usize = blocks_count * 16;
+
+        /// Convert a byte sequence into an internal representation.
+        pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = Block.fromBytes(bytes[i * native_word_size ..][0..native_word_size]);
+            }
+            return out;
+        }
+
+        /// Convert the internal representation of a block vector into a byte sequence.
+        pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 {
+            var out: [blocks_count * 16]u8 = undefined;
+            inline for (0..native_words) |i| {
+                out[i * native_word_size ..][0..native_word_size].* = block_vec.repr[i].toBytes();
+            }
+            return out;
+        }
+
+        /// XOR the block vector with a byte sequence.
+        pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [32]u8 {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = block_vec.repr[i].xorBytes(bytes[i * native_word_size ..][0..native_word_size]);
+            }
+            return out;
+        }
+
+        /// Apply the forward AES operation to the block vector with a vector of round keys.
+        pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = block_vec.repr[i].encrypt(round_key_vec.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the forward AES operation to the block vector with a vector of last round keys.
+        pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = block_vec.repr[i].encryptLast(round_key_vec.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the inverse AES operation to the block vector with a vector of round keys.
+        pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = block_vec.repr[i].decrypt(inv_round_key_vec.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the inverse AES operation to the block vector with a vector of last round keys.
+        pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = block_vec.repr[i].decryptLast(inv_round_key_vec.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the bitwise XOR operation to the content of two block vectors.
+        pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = block_vec1.repr[i].xorBlocks(block_vec2.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the bitwise AND operation to the content of two block vectors.
+        pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = block_vec1.repr[i].andBlocks(block_vec2.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the bitwise OR operation to the content of two block vectors.
+        pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self {
+            var out: Self = undefined;
+            inline for (0..native_words) |i| {
+                out.repr[i] = block_vec1.repr[i].orBlocks(block_vec2.repr[i]);
+            }
+            return out;
+        }
+    };
+}
+
 fn KeySchedule(comptime Aes: type) type {
     std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14);
     const rounds = Aes.rounds;
@@ -172,17 +285,19 @@ fn KeySchedule(comptime Aes: type) type {
     return struct {
         const Self = @This();
 
+        const Repr = Aes.block.Repr;
+
         const zero = @Vector(2, u64){ 0, 0 };
         const mask1 = @Vector(16, u8){ 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 };
         const mask2 = @Vector(16, u8){ 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 };
 
         round_keys: [rounds + 1]Block,
 
-        fn drc128(comptime rc: u8, t: BlockVec) BlockVec {
-            var v1: BlockVec = undefined;
-            var v2: BlockVec = undefined;
-            var v3: BlockVec = undefined;
-            var v4: BlockVec = undefined;
+        fn drc128(comptime rc: u8, t: Repr) Repr {
+            var v1: Repr = undefined;
+            var v2: Repr = undefined;
+            var v3: Repr = undefined;
+            var v4: Repr = undefined;
 
             return asm (
                 \\ movi %[v2].4s, %[rc]
@@ -196,7 +311,7 @@ fn KeySchedule(comptime Aes: type) type {
                 \\ eor  %[v1].16b, %[v1].16b, %[r].16b
                 \\ eor  %[r].16b, %[v1].16b, %[v3].16b
                 \\ eor  %[r].16b, %[r].16b, %[v4].16b
-                : [r] "=&x" (-> BlockVec),
+                : [r] "=&x" (-> Repr),
                   [v1] "=&x" (v1),
                   [v2] "=&x" (v2),
                   [v3] "=&x" (v3),
@@ -208,11 +323,11 @@ fn KeySchedule(comptime Aes: type) type {
             );
         }
 
-        fn drc256(comptime second: bool, comptime rc: u8, t: BlockVec, tx: BlockVec) BlockVec {
-            var v1: BlockVec = undefined;
-            var v2: BlockVec = undefined;
-            var v3: BlockVec = undefined;
-            var v4: BlockVec = undefined;
+        fn drc256(comptime second: bool, comptime rc: u8, t: Repr, tx: Repr) Repr {
+            var v1: Repr = undefined;
+            var v2: Repr = undefined;
+            var v3: Repr = undefined;
+            var v4: Repr = undefined;
 
             return asm (
                 \\ movi %[v2].4s, %[rc]
@@ -226,7 +341,7 @@ fn KeySchedule(comptime Aes: type) type {
                 \\ eor  %[v1].16b, %[v1].16b, %[v2].16b
                 \\ eor  %[v1].16b, %[v1].16b, %[v3].16b
                 \\ eor  %[r].16b, %[v1].16b, %[v4].16b
-                : [r] "=&x" (-> BlockVec),
+                : [r] "=&x" (-> Repr),
                   [v1] "=&x" (v1),
                   [v2] "=&x" (v2),
                   [v3] "=&x" (v3),
@@ -276,7 +391,7 @@ fn KeySchedule(comptime Aes: type) type {
                 inv_round_keys[i] = Block{
                     .repr = asm (
                         \\ aesimc %[inv_rk].16b, %[rk].16b
-                        : [inv_rk] "=x" (-> BlockVec),
+                        : [inv_rk] "=x" (-> Repr),
                         : [rk] "x" (round_keys[rounds - i].repr),
                     ),
                 };
diff --git a/lib/std/crypto/aes/soft.zig b/lib/std/crypto/aes/soft.zig
index 8430a3af7e..7f3d298a3a 100644
--- a/lib/std/crypto/aes/soft.zig
+++ b/lib/std/crypto/aes/soft.zig
@@ -2,16 +2,16 @@ const std = @import("../../std.zig");
 const math = std.math;
 const mem = std.mem;
 
-const BlockVec = [4]u32;
-
 const side_channels_mitigations = std.options.side_channels_mitigations;
 
 /// A single AES block.
 pub const Block = struct {
+    const Repr = [4]u32;
+
     pub const block_length: usize = 16;
 
     /// Internal representation of a block.
-    repr: BlockVec align(16),
+    repr: Repr align(16),
 
     /// Convert a byte sequence into an internal representation.
     pub inline fn fromBytes(bytes: *const [16]u8) Block {
@@ -19,7 +19,7 @@ pub const Block = struct {
         const s1 = mem.readInt(u32, bytes[4..8], .little);
         const s2 = mem.readInt(u32, bytes[8..12], .little);
         const s3 = mem.readInt(u32, bytes[12..16], .little);
-        return Block{ .repr = BlockVec{ s0, s1, s2, s3 } };
+        return Block{ .repr = Repr{ s0, s1, s2, s3 } };
     }
 
     /// Convert the internal representation of a block into a byte sequence.
@@ -65,7 +65,7 @@ pub const Block = struct {
         t2 ^= round_key.repr[2];
         t3 ^= round_key.repr[3];
 
-        return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
+        return Block{ .repr = Repr{ t0, t1, t2, t3 } };
     }
 
     /// Encrypt a block with a round key *WITHOUT ANY PROTECTION AGAINST SIDE CHANNELS*
@@ -110,7 +110,7 @@ pub const Block = struct {
         t2 ^= round_key.repr[2];
         t3 ^= round_key.repr[3];
 
-        return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
+        return Block{ .repr = Repr{ t0, t1, t2, t3 } };
     }
 
     /// Encrypt a block with the last round key.
@@ -136,7 +136,7 @@ pub const Block = struct {
         t2 ^= round_key.repr[2];
         t3 ^= round_key.repr[3];
 
-        return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
+        return Block{ .repr = Repr{ t0, t1, t2, t3 } };
     }
 
     /// Decrypt a block with a round key.
@@ -161,7 +161,7 @@ pub const Block = struct {
         t2 ^= round_key.repr[2];
         t3 ^= round_key.repr[3];
 
-        return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
+        return Block{ .repr = Repr{ t0, t1, t2, t3 } };
     }
 
     /// Decrypt a block with a round key *WITHOUT ANY PROTECTION AGAINST SIDE CHANNELS*
@@ -206,7 +206,7 @@ pub const Block = struct {
         t2 ^= round_key.repr[2];
         t3 ^= round_key.repr[3];
 
-        return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
+        return Block{ .repr = Repr{ t0, t1, t2, t3 } };
     }
 
     /// Decrypt a block with the last round key.
@@ -232,12 +232,12 @@ pub const Block = struct {
         t2 ^= round_key.repr[2];
         t3 ^= round_key.repr[3];
 
-        return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
+        return Block{ .repr = Repr{ t0, t1, t2, t3 } };
     }
 
     /// Apply the bitwise XOR operation to the content of two blocks.
     pub inline fn xorBlocks(block1: Block, block2: Block) Block {
-        var x: BlockVec = undefined;
+        var x: Repr = undefined;
         comptime var i = 0;
         inline while (i < 4) : (i += 1) {
             x[i] = block1.repr[i] ^ block2.repr[i];
@@ -247,7 +247,7 @@ pub const Block = struct {
 
     /// Apply the bitwise AND operation to the content of two blocks.
     pub inline fn andBlocks(block1: Block, block2: Block) Block {
-        var x: BlockVec = undefined;
+        var x: Repr = undefined;
         comptime var i = 0;
         inline while (i < 4) : (i += 1) {
             x[i] = block1.repr[i] & block2.repr[i];
@@ -257,7 +257,7 @@ pub const Block = struct {
 
     /// Apply the bitwise OR operation to the content of two blocks.
     pub inline fn orBlocks(block1: Block, block2: Block) Block {
-        var x: BlockVec = undefined;
+        var x: Repr = undefined;
         comptime var i = 0;
         inline while (i < 4) : (i += 1) {
             x[i] = block1.repr[i] | block2.repr[i];
@@ -332,6 +332,118 @@ pub const Block = struct {
     };
 };
 
+/// A fixed-size vector of AES blocks.
+/// All operations are performed in parallel, using SIMD instructions when available.
+pub fn BlockVec(comptime blocks_count: comptime_int) type {
+    return struct {
+        const Self = @This();
+
+        /// The number of AES blocks the target architecture can process with a single instruction.
+        pub const native_vector_size = 1;
+
+        /// The size of the AES block vector that the target architecture can process with a single instruction, in bytes.
+        pub const native_word_size = native_vector_size * 16;
+
+        const native_words = blocks_count;
+
+        /// Internal representation of a block vector.
+        repr: [native_words]Block,
+
+        /// Length of the block vector in bytes.
+        pub const block_length: usize = blocks_count * 16;
+
+        /// Convert a byte sequence into an internal representation.
+        pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self {
+            var out: Self = undefined;
+            for (0..native_words) |i| {
+                out.repr[i] = Block.fromBytes(bytes[i * native_word_size ..][0..native_word_size]);
+            }
+            return out;
+        }
+
+        /// Convert the internal representation of a block vector into a byte sequence.
+        pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 {
+            var out: [blocks_count * 16]u8 = undefined;
+            for (0..native_words) |i| {
+                out[i * native_word_size ..][0..native_word_size].* = block_vec.repr[i].toBytes();
+            }
+            return out;
+        }
+
+        /// XOR the block vector with a byte sequence.
+        pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [32]u8 {
+            var out: Self = undefined;
+            for (0..native_words) |i| {
+                out.repr[i] = block_vec.repr[i].xorBytes(bytes[i * native_word_size ..][0..native_word_size]);
+            }
+            return out;
+        }
+
+        /// Apply the forward AES operation to the block vector with a vector of round keys.
+        pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self {
+            var out: Self = undefined;
+            for (0..native_words) |i| {
+                out.repr[i] = block_vec.repr[i].encrypt(round_key_vec.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the forward AES operation to the block vector with a vector of last round keys.
+        pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self {
+            var out: Self = undefined;
+            for (0..native_words) |i| {
+                out.repr[i] = block_vec.repr[i].encryptLast(round_key_vec.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the inverse AES operation to the block vector with a vector of round keys.
+        pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self {
+            var out: Self = undefined;
+            for (0..native_words) |i| {
+                out.repr[i] = block_vec.repr[i].decrypt(inv_round_key_vec.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the inverse AES operation to the block vector with a vector of last round keys.
+        pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self {
+            var out: Self = undefined;
+            for (0..native_words) |i| {
+                out.repr[i] = block_vec.repr[i].decryptLast(inv_round_key_vec.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the bitwise XOR operation to the content of two block vectors.
+        pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self {
+            var out: Self = undefined;
+            for (0..native_words) |i| {
+                out.repr[i] = block_vec1.repr[i].xorBlocks(block_vec2.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the bitwise AND operation to the content of two block vectors.
+        pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self {
+            var out: Self = undefined;
+            for (0..native_words) |i| {
+                out.repr[i] = block_vec1.repr[i].andBlocks(block_vec2.repr[i]);
+            }
+            return out;
+        }
+
+        /// Apply the bitwise OR operation to the content of two block vectors.
+        pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self {
+            var out: Self = undefined;
+            for (0..native_words) |i| {
+                out.repr[i] = block_vec1.repr[i].orBlocks(block_vec2.repr[i]);
+            }
+            return out;
+        }
+    };
+}
+
 fn KeySchedule(comptime Aes: type) type {
     std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14);
     const key_length = Aes.key_bits / 8;
@@ -671,7 +783,7 @@ fn mul(a: u8, b: u8) u8 {
 
 const cache_line_bytes = std.atomic.cache_line;
 
-inline fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u8 {
+fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u8 {
     if (side_channels_mitigations == .none) {
         return [4]u8{
             sbox[idx0],
@@ -709,7 +821,7 @@ inline fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2:
     }
 }
 
-inline fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u32 {
+fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u32 {
     if (side_channels_mitigations == .none) {
         return [4]u32{
             table[0][idx0],
@@ -718,17 +830,18 @@ inline fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8,
             table[3][idx3],
         };
     } else {
+        const table_len: usize = 256;
         const stride = switch (side_channels_mitigations) {
             .none => unreachable,
-            .basic => table[0].len / 4,
-            .medium => @max(1, @min(table[0].len, 2 * cache_line_bytes / 4)),
-            .full => @max(1, @min(table[0].len, cache_line_bytes / 4)),
+            .basic => table_len / 4,
+            .medium => @max(1, @min(table_len, 2 * cache_line_bytes / 4)),
+            .full => @max(1, @min(table_len, cache_line_bytes / 4)),
         };
         const of0 = idx0 % stride;
         const of1 = idx1 % stride;
         const of2 = idx2 % stride;
         const of3 = idx3 % stride;
-        var t: [4][table[0].len / stride]u32 align(64) = undefined;
+        var t: [4][table_len / stride]u32 align(64) = undefined;
         var i: usize = 0;
         while (i < t[0].len) : (i += 1) {
             const tx = table[0][i * stride ..];
diff --git a/lib/std/crypto/bcrypt.zig b/lib/std/crypto/bcrypt.zig
index f3c30ab5ce..308cd1a42e 100644
--- a/lib/std/crypto/bcrypt.zig
+++ b/lib/std/crypto/bcrypt.zig
@@ -563,15 +563,57 @@ const pbkdf_prf = struct {
 };
 
 /// bcrypt-pbkdf is a key derivation function based on bcrypt.
-/// This is the function used in OpenSSH to derive encryption keys from passphrases.
-///
-/// This implementation is compatible with the OpenBSD implementation (https://github.com/openbsd/src/blob/master/lib/libutil/bcrypt_pbkdf.c).
 ///
 /// Unlike the password hashing function `bcrypt`, this function doesn't silently truncate passwords longer than 72 bytes.
 pub fn pbkdf(pass: []const u8, salt: []const u8, key: []u8, rounds: u32) !void {
     try crypto.pwhash.pbkdf2(key, pass, salt, rounds, pbkdf_prf);
 }
 
+/// The function used in OpenSSH to derive encryption keys from passphrases.
+///
+/// This implementation is compatible with the OpenBSD implementation (https://github.com/openbsd/src/blob/master/lib/libutil/bcrypt_pbkdf.c).
+pub fn opensshKdf(pass: []const u8, salt: []const u8, key: []u8, rounds: u32) !void {
+    var tmp: [32]u8 = undefined;
+    var tmp2: [32]u8 = undefined;
+    if (rounds < 1 or pass.len == 0 or salt.len == 0 or key.len == 0 or key.len > tmp.len * tmp.len) {
+        return error.InvalidInput;
+    }
+    var sha2pass: [Sha512.digest_length]u8 = undefined;
+    Sha512.hash(pass, &sha2pass, .{});
+    const stride = (key.len + tmp.len - 1) / tmp.len;
+    var amt = (key.len + stride - 1) / stride;
+    if (math.shr(usize, key.len, 32) >= amt) {
+        return error.InvalidInput;
+    }
+    var key_remainder = key.len;
+    var count: u32 = 1;
+    while (key_remainder > 0) : (count += 1) {
+        var count_salt: [4]u8 = undefined;
+        std.mem.writeInt(u32, count_salt[0..], count, .big);
+        var sha2salt: [Sha512.digest_length]u8 = undefined;
+        var h = Sha512.init(.{});
+        h.update(salt);
+        h.update(&count_salt);
+        h.final(&sha2salt);
+        tmp2 = pbkdf_prf.hash(sha2pass, sha2salt);
+        tmp = tmp2;
+        for (1..rounds) |_| {
+            Sha512.hash(&tmp2, &sha2salt, .{});
+            tmp2 = pbkdf_prf.hash(sha2pass, sha2salt);
+            for (&tmp, tmp2) |*o, t| o.* ^= t;
+        }
+        amt = @min(amt, key_remainder);
+        key_remainder -= for (0..amt) |i| {
+            const dest = i * stride + (count - 1);
+            if (dest >= key.len) break i;
+            key[dest] = tmp[i];
+        } else amt;
+    }
+    crypto.secureZero(u8, &tmp);
+    crypto.secureZero(u8, &tmp2);
+    crypto.secureZero(u8, &sha2pass);
+}
+
 const crypt_format = struct {
     /// String prefix for bcrypt
     pub const prefix = "$2";
@@ -847,3 +889,13 @@ test "bcrypt phc format" {
         verify_options,
     );
 }
+
+test "openssh kdf" {
+    var key: [100]u8 = undefined;
+    const pass = "password";
+    const salt = "salt";
+    const rounds = 5;
+    try opensshKdf(pass, salt, &key, rounds);
+    const expected = [_]u8{ 65, 207, 68, 58, 55, 252, 114, 141, 255, 65, 216, 175, 5, 92, 235, 68, 220, 92, 118, 161, 40, 13, 241, 190, 56, 152, 69, 136, 41, 214, 51, 205, 37, 221, 101, 59, 105, 73, 133, 36, 14, 59, 94, 212, 111, 107, 109, 237, 213, 235, 246, 119, 59, 76, 45, 130, 142, 81, 178, 231, 161, 158, 138, 108, 18, 162, 26, 50, 218, 251, 23, 66, 2, 232, 20, 202, 216, 46, 12, 250, 247, 246, 252, 23, 155, 74, 77, 195, 120, 113, 57, 88, 126, 81, 9, 249, 72, 18, 208, 160 };
+    try testing.expectEqualSlices(u8, &key, &expected);
+}
diff --git a/lib/std/crypto/benchmark.zig b/lib/std/crypto/benchmark.zig
index 8bb651f73b..c3dcd9b8cb 100644
--- a/lib/std/crypto/benchmark.zig
+++ b/lib/std/crypto/benchmark.zig
@@ -72,6 +72,10 @@ const macs = [_]Crypto{
     Crypto{ .ty = crypto.auth.siphash.SipHash64(1, 3), .name = "siphash-1-3" },
     Crypto{ .ty = crypto.auth.siphash.SipHash128(2, 4), .name = "siphash128-2-4" },
     Crypto{ .ty = crypto.auth.siphash.SipHash128(1, 3), .name = "siphash128-1-3" },
+    Crypto{ .ty = crypto.auth.aegis.Aegis128X4Mac, .name = "aegis-128x4 mac" },
+    Crypto{ .ty = crypto.auth.aegis.Aegis256X4Mac, .name = "aegis-256x4 mac" },
+    Crypto{ .ty = crypto.auth.aegis.Aegis128X2Mac, .name = "aegis-128x2 mac" },
+    Crypto{ .ty = crypto.auth.aegis.Aegis256X2Mac, .name = "aegis-256x2 mac" },
     Crypto{ .ty = crypto.auth.aegis.Aegis128LMac, .name = "aegis-128l mac" },
     Crypto{ .ty = crypto.auth.aegis.Aegis256Mac, .name = "aegis-256 mac" },
     Crypto{ .ty = crypto.auth.cmac.CmacAes128, .name = "aes-cmac" },
@@ -283,7 +287,11 @@ const aeads = [_]Crypto{
     Crypto{ .ty = crypto.aead.chacha_poly.XChaCha20Poly1305, .name = "xchacha20Poly1305" },
     Crypto{ .ty = crypto.aead.chacha_poly.XChaCha8Poly1305, .name = "xchacha8Poly1305" },
     Crypto{ .ty = crypto.aead.salsa_poly.XSalsa20Poly1305, .name = "xsalsa20Poly1305" },
+    Crypto{ .ty = crypto.aead.aegis.Aegis128X4, .name = "aegis-128x4" },
+    Crypto{ .ty = crypto.aead.aegis.Aegis128X2, .name = "aegis-128x2" },
     Crypto{ .ty = crypto.aead.aegis.Aegis128L, .name = "aegis-128l" },
+    Crypto{ .ty = crypto.aead.aegis.Aegis256X4, .name = "aegis-256x4" },
+    Crypto{ .ty = crypto.aead.aegis.Aegis256X2, .name = "aegis-256x2" },
     Crypto{ .ty = crypto.aead.aegis.Aegis256, .name = "aegis-256" },
     Crypto{ .ty = crypto.aead.aes_gcm.Aes128Gcm, .name = "aes128-gcm" },
     Crypto{ .ty = crypto.aead.aes_gcm.Aes256Gcm, .name = "aes256-gcm" },
diff --git a/lib/std/debug.zig b/lib/std/debug.zig
index 982e71bc35..0756e456ff 100644
--- a/lib/std/debug.zig
+++ b/lib/std/debug.zig
@@ -1531,9 +1531,9 @@ pub fn ConfigurableTrace(comptime size: usize, comptime stack_frame_count: usize
 }
 
 pub const SafetyLock = struct {
-    state: State = .unlocked,
+    state: State = if (runtime_safety) .unlocked else .unknown,
 
-    pub const State = if (runtime_safety) enum { unlocked, locked } else enum { unlocked };
+    pub const State = if (runtime_safety) enum { unlocked, locked } else enum { unknown };
 
     pub fn lock(l: *SafetyLock) void {
         if (!runtime_safety) return;
@@ -1551,8 +1551,22 @@ pub const SafetyLock = struct {
         if (!runtime_safety) return;
         assert(l.state == .unlocked);
     }
+
+    pub fn assertLocked(l: SafetyLock) void {
+        if (!runtime_safety) return;
+        assert(l.state == .locked);
+    }
 };
 
+test SafetyLock {
+    var safety_lock: SafetyLock = .{};
+    safety_lock.assertUnlocked();
+    safety_lock.lock();
+    safety_lock.assertLocked();
+    safety_lock.unlock();
+    safety_lock.assertUnlocked();
+}
+
 /// Detect whether the program is being executed in the Valgrind virtual machine.
 ///
 /// When Valgrind integrations are disabled, this returns comptime-known false.
diff --git a/lib/std/debug/MemoryAccessor.zig b/lib/std/debug/MemoryAccessor.zig
index bfdda609f6..a420d9cdcf 100644
--- a/lib/std/debug/MemoryAccessor.zig
+++ b/lib/std/debug/MemoryAccessor.zig
@@ -48,7 +48,8 @@ fn read(ma: *MemoryAccessor, address: usize, buf: []u8) bool {
                 switch (linux.E.init(bytes_read)) {
                     .SUCCESS => return bytes_read == buf.len,
                     .FAULT => return false,
-                    .INVAL, .PERM, .SRCH => unreachable, // own pid is always valid
+                    .INVAL, .SRCH => unreachable, // own pid is always valid
+                    .PERM => {}, // Known to happen in containers.
                     .NOMEM => {},
                     .NOSYS => {}, // QEMU is known not to implement this syscall.
                     else => unreachable, // unexpected
diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig
index eba72721f9..2f16d849b0 100644
--- a/lib/std/fmt.zig
+++ b/lib/std/fmt.zig
@@ -224,7 +224,6 @@ pub const Placeholder = struct {
     pub fn parse(comptime str: anytype) Placeholder {
         const view = std.unicode.Utf8View.initComptime(&str);
         comptime var parser = Parser{
-            .buf = &str,
             .iter = view.iterator(),
         };
 
@@ -311,10 +310,13 @@ pub const Specifier = union(enum) {
     named: []const u8,
 };
 
+/// A stream based parser for format strings.
+///
+/// Allows to implement formatters compatible with std.fmt without replicating
+/// the standard library behavior.
 pub const Parser = struct {
-    buf: []const u8,
     pos: usize = 0,
-    iter: std.unicode.Utf8Iterator = undefined,
+    iter: std.unicode.Utf8Iterator,
 
     // Returns a decimal number or null if the current character is not a
     // digit
diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig
index 9c436320b7..e6f51dc648 100644
--- a/lib/std/hash_map.zig
+++ b/lib/std/hash_map.zig
@@ -1692,7 +1692,7 @@ pub fn HashMapUnmanaged(
             }
 
             self.size = 0;
-            self.pointer_stability = .{ .state = .unlocked };
+            self.pointer_stability = .{};
             std.mem.swap(Self, self, &map);
             map.deinit(allocator);
         }
diff --git a/lib/std/mem/Allocator.zig b/lib/std/mem/Allocator.zig
index 0d4ab9141f..8aea197d6a 100644
--- a/lib/std/mem/Allocator.zig
+++ b/lib/std/mem/Allocator.zig
@@ -301,8 +301,9 @@ pub fn reallocAdvanced(
     return mem.bytesAsSlice(T, new_bytes);
 }
 
-/// Free an array allocated with `alloc`. To free a single item,
-/// see `destroy`.
+/// Free an array allocated with `alloc`.
+/// If memory has length 0, free is a no-op.
+/// To free a single item, see `destroy`.
 pub fn free(self: Allocator, memory: anytype) void {
     const Slice = @typeInfo(@TypeOf(memory)).pointer;
     const bytes = mem.sliceAsBytes(memory);
diff --git a/lib/std/meta.zig b/lib/std/meta.zig
index 0ea83bb11e..44bfb65f8a 100644
--- a/lib/std/meta.zig
+++ b/lib/std/meta.zig
@@ -737,13 +737,15 @@ test TagPayload {
     try testing.expect(MovedEvent == @TypeOf(e.Moved));
 }
 
-/// Compares two of any type for equality. Containers are compared on a field-by-field basis,
-/// where possible. Pointers are not followed.
+/// Compares two of any type for equality. Containers that do not support comparison
+/// on their own are compared on a field-by-field basis. Pointers are not followed.
 pub fn eql(a: anytype, b: @TypeOf(a)) bool {
     const T = @TypeOf(a);
 
     switch (@typeInfo(T)) {
         .@"struct" => |info| {
+            if (info.layout == .@"packed") return a == b;
+
             inline for (info.fields) |field_info| {
                 if (!eql(@field(a, field_info.name), @field(b, field_info.name))) return false;
             }
diff --git a/lib/std/posix.zig b/lib/std/posix.zig
index d46307dbdf..3ca5e1ae59 100644
--- a/lib/std/posix.zig
+++ b/lib/std/posix.zig
@@ -1817,6 +1817,7 @@ pub fn openatZ(dir_fd: fd_t, file_path: [*:0]const u8, flags: O, mode: mode_t) O
             .OPNOTSUPP => return error.FileLocksNotSupported,
             .AGAIN => return error.WouldBlock,
             .TXTBSY => return error.FileBusy,
+            .NXIO => return error.NoDevice,
             .ILSEQ => |err| if (native_os == .wasi)
                 return error.InvalidUtf8
             else
diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig
index abec1f354a..a9b518357b 100644
--- a/lib/std/zig/AstGen.zig
+++ b/lib/std/zig/AstGen.zig
@@ -6024,7 +6024,7 @@ fn tryExpr(
     if (!parent_gz.is_comptime) {
         try emitDbgNode(parent_gz, node);
     }
-    const try_lc = LineColumn{ astgen.source_line - parent_gz.decl_line, astgen.source_column };
+    const try_lc: LineColumn = .{ astgen.source_line - parent_gz.decl_line, astgen.source_column };
 
     const operand_rl: ResultInfo.Loc, const block_tag: Zir.Inst.Tag = switch (ri.rl) {
         .ref => .{ .ref, .try_ptr },
@@ -6577,6 +6577,7 @@ fn whileExpr(
     const astgen = parent_gz.astgen;
     const tree = astgen.tree;
     const token_tags = tree.tokens.items(.tag);
+    const token_starts = tree.tokens.items(.start);
 
     const need_rl = astgen.nodes_need_rl.contains(node);
     const block_ri: ResultInfo = if (need_rl) ri else .{
@@ -6774,6 +6775,16 @@ fn whileExpr(
     try checkUsed(parent_gz, &then_scope.base, then_sub_scope);
     const break_tag: Zir.Inst.Tag = if (is_inline) .break_inline else .@"break";
     if (!continue_scope.endsWithNoReturn()) {
+        astgen.advanceSourceCursor(token_starts[tree.lastToken(then_node)]);
+        try emitDbgStmt(parent_gz, .{ astgen.source_line - parent_gz.decl_line, astgen.source_column });
+        _ = try parent_gz.add(.{
+            .tag = .extended,
+            .data = .{ .extended = .{
+                .opcode = .dbg_empty_stmt,
+                .small = undefined,
+                .operand = undefined,
+            } },
+        });
         _ = try continue_scope.addBreak(break_tag, continue_block, .void_value);
     }
     try continue_scope.setBlockBody(continue_block);
@@ -6882,6 +6893,7 @@ fn forExpr(
     }
     const tree = astgen.tree;
     const token_tags = tree.tokens.items(.tag);
+    const token_starts = tree.tokens.items(.start);
     const node_tags = tree.nodes.items(.tag);
     const node_data = tree.nodes.items(.data);
     const gpa = astgen.gpa;
@@ -7087,8 +7099,18 @@ fn forExpr(
 
     try checkUsed(parent_gz, &then_scope.base, then_sub_scope);
 
-    const break_tag: Zir.Inst.Tag = if (is_inline) .break_inline else .@"break";
+    astgen.advanceSourceCursor(token_starts[tree.lastToken(then_node)]);
+    try emitDbgStmt(parent_gz, .{ astgen.source_line - parent_gz.decl_line, astgen.source_column });
+    _ = try parent_gz.add(.{
+        .tag = .extended,
+        .data = .{ .extended = .{
+            .opcode = .dbg_empty_stmt,
+            .small = undefined,
+            .operand = undefined,
+        } },
+    });
 
+    const break_tag: Zir.Inst.Tag = if (is_inline) .break_inline else .@"break";
     _ = try then_scope.addBreak(break_tag, cond_block, .void_value);
 
     var else_scope = parent_gz.makeSubBlock(&cond_scope.base);
@@ -7135,6 +7157,7 @@ fn forExpr(
             .lhs = index_ptr,
             .rhs = index_plus_one,
         });
+
         const repeat_tag: Zir.Inst.Tag = if (is_inline) .repeat_inline else .repeat;
         _ = try loop_scope.addNode(repeat_tag, node);
 
@@ -7279,7 +7302,7 @@ fn switchExprErrUnion(
     };
 
     astgen.advanceSourceCursorToNode(operand_node);
-    const operand_lc = LineColumn{ astgen.source_line - parent_gz.decl_line, astgen.source_column };
+    const operand_lc: LineColumn = .{ astgen.source_line - parent_gz.decl_line, astgen.source_column };
 
     const raw_operand = try reachableExpr(parent_gz, scope, operand_ri, operand_node, switch_node);
     const item_ri: ResultInfo = .{ .rl = .none };
@@ -7868,7 +7891,7 @@ fn switchExpr(
     const operand_ri: ResultInfo = .{ .rl = if (any_payload_is_ref) .ref else .none };
 
     astgen.advanceSourceCursorToNode(operand_node);
-    const operand_lc = LineColumn{ astgen.source_line - parent_gz.decl_line, astgen.source_column };
+    const operand_lc: LineColumn = .{ astgen.source_line - parent_gz.decl_line, astgen.source_column };
 
     const raw_operand = try expr(parent_gz, scope, operand_ri, operand_node);
     const item_ri: ResultInfo = .{ .rl = .none };
@@ -8214,7 +8237,7 @@ fn ret(gz: *GenZir, scope: *Scope, node: Ast.Node.Index) InnerError!Zir.Inst.Ref
     if (!gz.is_comptime) {
         try emitDbgNode(gz, node);
     }
-    const ret_lc = LineColumn{ astgen.source_line - gz.decl_line, astgen.source_column };
+    const ret_lc: LineColumn = .{ astgen.source_line - gz.decl_line, astgen.source_column };
 
     const defer_outer = &astgen.fn_block.?.base;
 
diff --git a/lib/std/zig/Zir.zig b/lib/std/zig/Zir.zig
index 00a48e21f7..f2c103f835 100644
--- a/lib/std/zig/Zir.zig
+++ b/lib/std/zig/Zir.zig
@@ -2088,6 +2088,8 @@ pub const Inst = struct {
         /// `operand` is `Zir.Inst.Ref` of the loaded LHS (*not* its type).
         /// `small` is an `Inst.InplaceOp`.
         inplace_arith_result_ty,
+        /// Marks a statement that can be stepped to but produces no code.
+        dbg_empty_stmt,
 
         pub const InstData = struct {
             opcode: Extended,
@@ -4062,6 +4064,7 @@ fn findDeclsInner(
                 .branch_hint,
                 .inplace_arith_result_ty,
                 .tuple_decl,
+                .dbg_empty_stmt,
                 => return,
 
                 // `@TypeOf` has a body.
diff --git a/lib/zig.h b/lib/zig.h
index 248bb8641c..14b50aea47 100644
--- a/lib/zig.h
+++ b/lib/zig.h
@@ -256,6 +256,8 @@ typedef char bool;
 #define zig_trap() __asm__ volatile("udf #0xfe")
 #elif defined(__arm__) || defined(__aarch64__)
 #define zig_trap() __asm__ volatile("udf #0xfdee")
+#elif defined(__hexagon__)
+#define zig_trap() __asm__ volatile("r27:26 = memd(#0xbadc0fee)")
 #elif defined(__loongarch__) || defined(__powerpc__)
 #define zig_trap() __asm__ volatile(".word 0x0")
 #elif defined(__mips__)
@@ -280,6 +282,8 @@ typedef char bool;
 #define zig_breakpoint() __asm__ volatile("bkpt #0x0")
 #elif defined(__aarch64__)
 #define zig_breakpoint() __asm__ volatile("brk #0xf000")
+#elif defined(__hexagon__)
+#define zig_breakpoint() __asm__ volatile("brkpt")
 #elif defined(__loongarch__)
 #define zig_breakpoint() __asm__ volatile("break 0x0")
 #elif defined(__mips__)
diff --git a/src/Air.zig b/src/Air.zig
index 3aa5f317c0..4589bb1557 100644
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -460,6 +460,8 @@ pub const Inst = struct {
         /// Result type is always void.
         /// Uses the `dbg_stmt` field.
         dbg_stmt,
+        /// Marks a statement that can be stepped to but produces no code.
+        dbg_empty_stmt,
         /// A block that represents an inlined function call.
         /// Uses the `ty_pl` field. Payload is `DbgInlineBlock`.
         dbg_inline_block,
@@ -1468,6 +1470,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
 
         .breakpoint,
         .dbg_stmt,
+        .dbg_empty_stmt,
         .dbg_var_ptr,
         .dbg_var_val,
         .dbg_arg_inline,
@@ -1629,6 +1632,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
         .try_ptr,
         .try_ptr_cold,
         .dbg_stmt,
+        .dbg_empty_stmt,
         .dbg_inline_block,
         .dbg_var_ptr,
         .dbg_var_val,
diff --git a/src/Air/types_resolved.zig b/src/Air/types_resolved.zig
index 098cb29b22..cc866184e4 100644
--- a/src/Air/types_resolved.zig
+++ b/src/Air/types_resolved.zig
@@ -417,6 +417,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
             .work_group_size,
             .work_group_id,
             .dbg_stmt,
+            .dbg_empty_stmt,
             .err_return_trace,
             .save_err_return_trace_index,
             .repeat,
diff --git a/src/Compilation.zig b/src/Compilation.zig
index a228d61257..3ad5c7932f 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -5884,7 +5884,9 @@ pub const FileExt = enum {
 };
 
 pub fn hasObjectExt(filename: []const u8) bool {
-    return mem.endsWith(u8, filename, ".o") or mem.endsWith(u8, filename, ".obj");
+    return mem.endsWith(u8, filename, ".o") or
+        mem.endsWith(u8, filename, ".lo") or
+        mem.endsWith(u8, filename, ".obj");
 }
 
 pub fn hasStaticLibraryExt(filename: []const u8) bool {
diff --git a/src/Liveness.zig b/src/Liveness.zig
index b5bffc6a48..709844c0ac 100644
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@@ -334,6 +334,7 @@ pub fn categorizeOperand(
         .repeat,
         .switch_dispatch,
         .dbg_stmt,
+        .dbg_empty_stmt,
         .unreach,
         .ret_addr,
         .frame_addr,
@@ -973,6 +974,7 @@ fn analyzeInst(
         .ret_ptr,
         .breakpoint,
         .dbg_stmt,
+        .dbg_empty_stmt,
         .ret_addr,
         .frame_addr,
         .wasm_memory_size,
diff --git a/src/Liveness/Verify.zig b/src/Liveness/Verify.zig
index aa2239793a..01e0842ded 100644
--- a/src/Liveness/Verify.zig
+++ b/src/Liveness/Verify.zig
@@ -56,6 +56,7 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
             .ret_ptr,
             .breakpoint,
             .dbg_stmt,
+            .dbg_empty_stmt,
             .ret_addr,
             .frame_addr,
             .wasm_memory_size,
diff --git a/src/Sema.zig b/src/Sema.zig
index ceaff910ba..635c3437c0 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -1355,6 +1355,11 @@ fn analyzeBodyInner(
                     .field_parent_ptr => try sema.zirFieldParentPtr(block, extended),
                     .builtin_value => try sema.zirBuiltinValue(block, extended),
                     .inplace_arith_result_ty => try sema.zirInplaceArithResultTy(extended),
+                    .dbg_empty_stmt => {
+                        try sema.zirDbgEmptyStmt(block, inst);
+                        i += 1;
+                        continue;
+                    },
                 };
             },
 
@@ -2584,18 +2589,7 @@ fn validateAlign(
     src: LazySrcLoc,
     alignment: u64,
 ) !Alignment {
-    const result = try validateAlignAllowZero(sema, block, src, alignment);
-    if (result == .none) return sema.fail(block, src, "alignment must be >= 1", .{});
-    return result;
-}
-
-fn validateAlignAllowZero(
-    sema: *Sema,
-    block: *Block,
-    src: LazySrcLoc,
-    alignment: u64,
-) !Alignment {
-    if (alignment == 0) return .none;
+    if (alignment == 0) return sema.fail(block, src, "alignment must be >= 1", .{});
     if (!std.math.isPowerOfTwo(alignment)) {
         return sema.fail(block, src, "alignment value '{d}' is not a power of two", .{
             alignment,
@@ -6682,6 +6676,11 @@ fn zirDbgStmt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!voi
     });
 }
 
+fn zirDbgEmptyStmt(_: *Sema, block: *Block, _: Zir.Inst.Index) CompileError!void {
+    if (block.is_comptime or block.ownerModule().strip) return;
+    _ = try block.addNoOp(.dbg_empty_stmt);
+}
+
 fn zirDbgVar(
     sema: *Sema,
     block: *Block,
@@ -20542,7 +20541,7 @@ fn zirPtrType(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air
             else => {},
         }
         const align_bytes = (try val.getUnsignedIntSema(pt)).?;
-        break :blk try sema.validateAlignAllowZero(block, align_src, align_bytes);
+        break :blk try sema.validateAlign(block, align_src, align_bytes);
     } else .none;
 
     const address_space: std.builtin.AddressSpace = if (inst_data.flags.has_addrspace) blk: {
@@ -26904,7 +26903,7 @@ fn zirFuncFancy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A
         if (val.isGenericPoison()) {
             break :blk null;
         }
-        break :blk try sema.validateAlignAllowZero(block, align_src, try val.toUnsignedIntSema(pt));
+        break :blk try sema.validateAlign(block, align_src, try val.toUnsignedIntSema(pt));
     } else if (extra.data.bits.has_align_ref) blk: {
         const align_ref: Zir.Inst.Ref = @enumFromInt(sema.code.extra[extra_index]);
         extra_index += 1;
@@ -26922,7 +26921,7 @@ fn zirFuncFancy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A
             error.GenericPoison => break :blk null,
             else => |e| return e,
         };
-        break :blk try sema.validateAlignAllowZero(block, align_src, try align_val.toUnsignedIntSema(pt));
+        break :blk try sema.validateAlign(block, align_src, try align_val.toUnsignedIntSema(pt));
     } else .none;
 
     const @"addrspace": ?std.builtin.AddressSpace = if (extra.data.bits.has_addrspace_body) blk: {
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index 6371cf92f3..8fd27d4bb7 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -800,6 +800,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .try_ptr_cold    => try self.airTryPtr(inst),
 
             .dbg_stmt         => try self.airDbgStmt(inst),
+            .dbg_empty_stmt   => self.finishAirBookkeeping(),
             .dbg_inline_block => try self.airDbgInlineBlock(inst),
             .dbg_var_ptr,
             .dbg_var_val,
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 9966648759..065f4a047d 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -787,6 +787,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .try_ptr_cold    => try self.airTryPtr(inst),
 
             .dbg_stmt         => try self.airDbgStmt(inst),
+            .dbg_empty_stmt   => self.finishAirBookkeeping(),
             .dbg_inline_block => try self.airDbgInlineBlock(inst),
             .dbg_var_ptr,
             .dbg_var_val,
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index 24497defa2..29a0a8b8b5 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -1593,6 +1593,7 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void {
             .frame_addr      => try func.airFrameAddress(inst),
             .cond_br         => try func.airCondBr(inst),
             .dbg_stmt        => try func.airDbgStmt(inst),
+            .dbg_empty_stmt  => func.finishAirBookkeeping(),
             .fptrunc         => try func.airFptrunc(inst),
             .fpext           => try func.airFpext(inst),
             .intcast         => try func.airIntCast(inst),
diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig
index a1bef1f4cd..7bbed29d8f 100644
--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@@ -642,6 +642,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .try_ptr_cold    => @panic("TODO try self.airTryPtrCold(inst)"),
 
             .dbg_stmt         => try self.airDbgStmt(inst),
+            .dbg_empty_stmt   => self.finishAirBookkeeping(),
             .dbg_inline_block => try self.airDbgInlineBlock(inst),
             .dbg_var_ptr,
             .dbg_var_val,
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 50a8869282..ccdf38a474 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -1924,6 +1924,7 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         .try_ptr_cold => func.airTryPtr(inst),
 
         .dbg_stmt => func.airDbgStmt(inst),
+        .dbg_empty_stmt => try func.finishAir(inst, .none, &.{}),
         .dbg_inline_block => func.airDbgInlineBlock(inst),
         .dbg_var_ptr => func.airDbgVar(inst, .local_var, true),
         .dbg_var_val => func.airDbgVar(inst, .local_var, false),
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 06ae399f25..298b2e11e0 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -961,9 +961,16 @@ pub fn generate(
         },
         .debug_output = debug_output,
         .code = code,
+        .prev_di_loc = .{
+            .line = func.lbrace_line,
+            .column = func.lbrace_column,
+            .is_stmt = switch (debug_output) {
+                .dwarf => |dwarf| dwarf.dwarf.debug_line.header.default_is_stmt,
+                .plan9 => undefined,
+                .none => undefined,
+            },
+        },
         .prev_di_pc = 0,
-        .prev_di_line = func.lbrace_line,
-        .prev_di_column = func.lbrace_column,
     };
     defer emit.deinit();
     emit.emitMir() catch |err| switch (err) {
@@ -1066,9 +1073,8 @@ pub fn generateLazy(
         },
         .debug_output = debug_output,
         .code = code,
+        .prev_di_loc = undefined, // no debug info yet
         .prev_di_pc = undefined, // no debug info yet
-        .prev_di_line = undefined, // no debug info yet
-        .prev_di_column = undefined, // no debug info yet
     };
     defer emit.deinit();
     emit.emitMir() catch |err| switch (err) {
@@ -1194,13 +1200,16 @@ fn formatWipMir(
         switch (mir_inst.ops) {
             else => unreachable,
             .pseudo_dbg_prologue_end_none,
-            .pseudo_dbg_line_line_column,
             .pseudo_dbg_epilogue_begin_none,
             .pseudo_dbg_enter_block_none,
             .pseudo_dbg_leave_block_none,
             .pseudo_dbg_var_args_none,
             .pseudo_dead_none,
             => {},
+            .pseudo_dbg_line_stmt_line_column, .pseudo_dbg_line_line_column => try writer.print(
+                " {[line]d}, {[column]d}",
+                mir_inst.data.line_column,
+            ),
             .pseudo_dbg_enter_inline_func, .pseudo_dbg_leave_inline_func => try writer.print(" {}", .{
                 ip.getNav(ip.indexToKey(mir_inst.data.func).func.owner_nav).name.fmt(ip),
             }),
@@ -1281,14 +1290,7 @@ fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index {
     try self.mir_instructions.ensureUnusedCapacity(gpa, 1);
     const result_index: Mir.Inst.Index = @intCast(self.mir_instructions.len);
     self.mir_instructions.appendAssumeCapacity(inst);
-    if (inst.tag != .pseudo or switch (inst.ops) {
-        else => true,
-        .pseudo_dbg_prologue_end_none,
-        .pseudo_dbg_line_line_column,
-        .pseudo_dbg_epilogue_begin_none,
-        .pseudo_dead_none,
-        => false,
-    }) wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)});
+    wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)});
     return result_index;
 }
 
@@ -2218,7 +2220,7 @@ fn gen(self: *Self) InnerError!void {
     // Drop them off at the rbrace.
     _ = try self.addInst(.{
         .tag = .pseudo,
-        .ops = .pseudo_dbg_line_line_column,
+        .ops = .pseudo_dbg_line_stmt_line_column,
         .data = .{ .line_column = .{
             .line = self.end_di_line,
             .column = self.end_di_column,
@@ -2426,6 +2428,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .try_ptr_cold    => try self.airTryPtr(inst), // TODO
 
             .dbg_stmt         => try self.airDbgStmt(inst),
+            .dbg_empty_stmt   => try self.airDbgEmptyStmt(),
             .dbg_inline_block => try self.airDbgInlineBlock(inst),
             .dbg_var_ptr,
             .dbg_var_val,
@@ -13281,7 +13284,7 @@ fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void {
     const dbg_stmt = self.air.instructions.items(.data)[@intFromEnum(inst)].dbg_stmt;
     _ = try self.addInst(.{
         .tag = .pseudo,
-        .ops = .pseudo_dbg_line_line_column,
+        .ops = .pseudo_dbg_line_stmt_line_column,
         .data = .{ .line_column = .{
             .line = dbg_stmt.line,
             .column = dbg_stmt.column,
@@ -13290,6 +13293,14 @@ fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void {
     self.finishAirBookkeeping();
 }
 
+fn airDbgEmptyStmt(self: *Self) !void {
+    if (self.mir_instructions.len > 0 and
+        self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] == .pseudo_dbg_line_stmt_line_column)
+        self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] = .pseudo_dbg_line_line_column;
+    try self.asmOpOnly(.{ ._, .nop });
+    self.finishAirBookkeeping();
+}
+
 fn airDbgInlineBlock(self: *Self, inst: Air.Inst.Index) !void {
     const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
     const extra = self.air.extraData(Air.DbgInlineBlock, ty_pl.payload);
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 0395b7a43b..f744eb3fc4 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -6,8 +6,7 @@ atom_index: u32,
 debug_output: link.File.DebugInfoOutput,
 code: *std.ArrayList(u8),
 
-prev_di_line: u32,
-prev_di_column: u32,
+prev_di_loc: Loc,
 /// Relative to the beginning of `code`.
 prev_di_pc: usize,
 
@@ -263,77 +262,71 @@ pub fn emitMir(emit: *Emit) Error!void {
                 else => unreachable,
                 .pseudo => switch (mir_inst.ops) {
                     else => unreachable,
-                    .pseudo_dbg_prologue_end_none => {
-                        switch (emit.debug_output) {
-                            .dwarf => |dw| try dw.setPrologueEnd(),
-                            .plan9 => {},
-                            .none => {},
-                        }
+                    .pseudo_dbg_prologue_end_none => switch (emit.debug_output) {
+                        .dwarf => |dwarf| try dwarf.setPrologueEnd(),
+                        .plan9 => {},
+                        .none => {},
                     },
-                    .pseudo_dbg_line_line_column => try emit.dbgAdvancePCAndLine(
-                        mir_inst.data.line_column.line,
-                        mir_inst.data.line_column.column,
-                    ),
-                    .pseudo_dbg_epilogue_begin_none => {
-                        switch (emit.debug_output) {
-                            .dwarf => |dw| {
-                                try dw.setEpilogueBegin();
-                                log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{
-                                    emit.prev_di_line, emit.prev_di_column,
-                                });
-                                try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column);
-                            },
-                            .plan9 => {},
-                            .none => {},
-                        }
+                    .pseudo_dbg_line_stmt_line_column => try emit.dbgAdvancePCAndLine(.{
+                        .line = mir_inst.data.line_column.line,
+                        .column = mir_inst.data.line_column.column,
+                        .is_stmt = true,
+                    }),
+                    .pseudo_dbg_line_line_column => try emit.dbgAdvancePCAndLine(.{
+                        .line = mir_inst.data.line_column.line,
+                        .column = mir_inst.data.line_column.column,
+                        .is_stmt = false,
+                    }),
+                    .pseudo_dbg_epilogue_begin_none => switch (emit.debug_output) {
+                        .dwarf => |dwarf| {
+                            try dwarf.setEpilogueBegin();
+                            log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{
+                                emit.prev_di_loc.line, emit.prev_di_loc.column,
+                            });
+                            try emit.dbgAdvancePCAndLine(emit.prev_di_loc);
+                        },
+                        .plan9 => {},
+                        .none => {},
                     },
-                    .pseudo_dbg_enter_block_none => {
-                        switch (emit.debug_output) {
-                            .dwarf => |dw| {
-                                log.debug("mirDbgEnterBlock (line={d}, col={d})", .{
-                                    emit.prev_di_line, emit.prev_di_column,
-                                });
-                                try dw.enterBlock(emit.code.items.len);
-                            },
-                            .plan9 => {},
-                            .none => {},
-                        }
+                    .pseudo_dbg_enter_block_none => switch (emit.debug_output) {
+                        .dwarf => |dwarf| {
+                            log.debug("mirDbgEnterBlock (line={d}, col={d})", .{
+                                emit.prev_di_loc.line, emit.prev_di_loc.column,
+                            });
+                            try dwarf.enterBlock(emit.code.items.len);
+                        },
+                        .plan9 => {},
+                        .none => {},
                     },
-                    .pseudo_dbg_leave_block_none => {
-                        switch (emit.debug_output) {
-                            .dwarf => |dw| {
-                                log.debug("mirDbgLeaveBlock (line={d}, col={d})", .{
-                                    emit.prev_di_line, emit.prev_di_column,
-                                });
-                                try dw.leaveBlock(emit.code.items.len);
-                            },
-                            .plan9 => {},
-                            .none => {},
-                        }
+                    .pseudo_dbg_leave_block_none => switch (emit.debug_output) {
+                        .dwarf => |dwarf| {
+                            log.debug("mirDbgLeaveBlock (line={d}, col={d})", .{
+                                emit.prev_di_loc.line, emit.prev_di_loc.column,
+                            });
+                            try dwarf.leaveBlock(emit.code.items.len);
+                        },
+                        .plan9 => {},
+                        .none => {},
                     },
-                    .pseudo_dbg_enter_inline_func => {
-                        switch (emit.debug_output) {
-                            .dwarf => |dw| {
-                                log.debug("mirDbgEnterInline (line={d}, col={d})", .{
-                                    emit.prev_di_line, emit.prev_di_column,
-                                });
-                                try dw.enterInlineFunc(mir_inst.data.func, emit.code.items.len, emit.prev_di_line, emit.prev_di_column);
-                            },
-                            .plan9 => {},
-                            .none => {},
-                        }
+                    .pseudo_dbg_enter_inline_func => switch (emit.debug_output) {
+                        .dwarf => |dwarf| {
+                            log.debug("mirDbgEnterInline (line={d}, col={d})", .{
+                                emit.prev_di_loc.line, emit.prev_di_loc.column,
+                            });
+                            try dwarf.enterInlineFunc(mir_inst.data.func, emit.code.items.len, emit.prev_di_loc.line, emit.prev_di_loc.column);
+                        },
+                        .plan9 => {},
+                        .none => {},
                     },
-                    .pseudo_dbg_leave_inline_func => {
-                        switch (emit.debug_output) {
-                            .dwarf => |dw| {
-                                log.debug("mirDbgLeaveInline (line={d}, col={d})", .{
-                                    emit.prev_di_line, emit.prev_di_column,
-                                });
-                                try dw.leaveInlineFunc(mir_inst.data.func, emit.code.items.len);
-                            },
-                            .plan9 => {},
-                            .none => {},
-                        }
+                    .pseudo_dbg_leave_inline_func => switch (emit.debug_output) {
+                        .dwarf => |dwarf| {
+                            log.debug("mirDbgLeaveInline (line={d}, col={d})", .{
+                                emit.prev_di_loc.line, emit.prev_di_loc.column,
+                            });
+                            try dwarf.leaveInlineFunc(mir_inst.data.func, emit.code.items.len);
+                        },
+                        .plan9 => {},
+                        .none => {},
                     },
                     .pseudo_dbg_local_a,
                     .pseudo_dbg_local_ai_s,
@@ -344,129 +337,125 @@ pub fn emitMir(emit: *Emit) Error!void {
                     .pseudo_dbg_local_aro,
                     .pseudo_dbg_local_af,
                     .pseudo_dbg_local_am,
-                    => {
-                        switch (emit.debug_output) {
-                            .dwarf => |dw| {
-                                var loc_buf: [2]link.File.Dwarf.Loc = undefined;
-                                const air_inst_index, const loc: link.File.Dwarf.Loc = switch (mir_inst.ops) {
+                    => switch (emit.debug_output) {
+                        .dwarf => |dwarf| {
+                            var loc_buf: [2]link.File.Dwarf.Loc = undefined;
+                            const air_inst_index, const loc: link.File.Dwarf.Loc = switch (mir_inst.ops) {
+                                else => unreachable,
+                                .pseudo_dbg_local_a => .{ mir_inst.data.a.air_inst, .empty },
+                                .pseudo_dbg_local_ai_s,
+                                .pseudo_dbg_local_ai_u,
+                                .pseudo_dbg_local_ai_64,
+                                => .{ mir_inst.data.ai.air_inst, .{ .stack_value = stack_value: {
+                                    loc_buf[0] = switch (emit.lower.imm(mir_inst.ops, mir_inst.data.ai.i)) {
+                                        .signed => |s| .{ .consts = s },
+                                        .unsigned => |u| .{ .constu = u },
+                                    };
+                                    break :stack_value &loc_buf[0];
+                                } } },
+                                .pseudo_dbg_local_as => .{ mir_inst.data.as.air_inst, .{ .addr = .{
+                                    .sym = mir_inst.data.as.sym_index,
+                                } } },
+                                .pseudo_dbg_local_aso => loc: {
+                                    const sym_off = emit.lower.mir.extraData(
+                                        bits.SymbolOffset,
+                                        mir_inst.data.ax.payload,
+                                    ).data;
+                                    break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{
+                                        sym: {
+                                            loc_buf[0] = .{ .addr = .{ .sym = sym_off.sym_index } };
+                                            break :sym &loc_buf[0];
+                                        },
+                                        off: {
+                                            loc_buf[1] = .{ .consts = sym_off.off };
+                                            break :off &loc_buf[1];
+                                        },
+                                    } } };
+                                },
+                                .pseudo_dbg_local_aro => loc: {
+                                    const air_off = emit.lower.mir.extraData(
+                                        Mir.AirOffset,
+                                        mir_inst.data.rx.payload,
+                                    ).data;
+                                    break :loc .{ air_off.air_inst, .{ .plus = .{
+                                        reg: {
+                                            loc_buf[0] = .{ .breg = mir_inst.data.rx.r1.dwarfNum() };
+                                            break :reg &loc_buf[0];
+                                        },
+                                        off: {
+                                            loc_buf[1] = .{ .consts = air_off.off };
+                                            break :off &loc_buf[1];
+                                        },
+                                    } } };
+                                },
+                                .pseudo_dbg_local_af => loc: {
+                                    const reg_off = emit.lower.mir.resolveFrameAddr(emit.lower.mir.extraData(
+                                        bits.FrameAddr,
+                                        mir_inst.data.ax.payload,
+                                    ).data);
+                                    break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{
+                                        reg: {
+                                            loc_buf[0] = .{ .breg = reg_off.reg.dwarfNum() };
+                                            break :reg &loc_buf[0];
+                                        },
+                                        off: {
+                                            loc_buf[1] = .{ .consts = reg_off.off };
+                                            break :off &loc_buf[1];
+                                        },
+                                    } } };
+                                },
+                                .pseudo_dbg_local_am => loc: {
+                                    const mem = emit.lower.mem(mir_inst.data.ax.payload);
+                                    break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{
+                                        base: {
+                                            loc_buf[0] = switch (mem.base()) {
+                                                .none => .{ .constu = 0 },
+                                                .reg => |reg| .{ .breg = reg.dwarfNum() },
+                                                .frame => unreachable,
+                                                .reloc => |sym_index| .{ .addr = .{ .sym = sym_index } },
+                                            };
+                                            break :base &loc_buf[0];
+                                        },
+                                        disp: {
+                                            loc_buf[1] = switch (mem.disp()) {
+                                                .signed => |s| .{ .consts = s },
+                                                .unsigned => |u| .{ .constu = u },
+                                            };
+                                            break :disp &loc_buf[1];
+                                        },
+                                    } } };
+                                },
+                            };
+                            const ip = &emit.lower.bin_file.comp.zcu.?.intern_pool;
+                            const air_inst = emit.air.instructions.get(@intFromEnum(air_inst_index));
+                            const name: Air.NullTerminatedString = switch (air_inst.tag) {
+                                else => unreachable,
+                                .arg => air_inst.data.arg.name,
+                                .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => @enumFromInt(air_inst.data.pl_op.payload),
+                            };
+                            try dwarf.genLocalDebugInfo(
+                                switch (air_inst.tag) {
                                     else => unreachable,
-                                    .pseudo_dbg_local_a => .{ mir_inst.data.a.air_inst, .empty },
-                                    .pseudo_dbg_local_ai_s,
-                                    .pseudo_dbg_local_ai_u,
-                                    .pseudo_dbg_local_ai_64,
-                                    => .{ mir_inst.data.ai.air_inst, .{ .stack_value = stack_value: {
-                                        loc_buf[0] = switch (emit.lower.imm(mir_inst.ops, mir_inst.data.ai.i)) {
-                                            .signed => |s| .{ .consts = s },
-                                            .unsigned => |u| .{ .constu = u },
-                                        };
-                                        break :stack_value &loc_buf[0];
-                                    } } },
-                                    .pseudo_dbg_local_as => .{ mir_inst.data.as.air_inst, .{ .addr = .{
-                                        .sym = mir_inst.data.as.sym_index,
-                                    } } },
-                                    .pseudo_dbg_local_aso => loc: {
-                                        const sym_off = emit.lower.mir.extraData(
-                                            bits.SymbolOffset,
-                                            mir_inst.data.ax.payload,
-                                        ).data;
-                                        break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{
-                                            sym: {
-                                                loc_buf[0] = .{ .addr = .{ .sym = sym_off.sym_index } };
-                                                break :sym &loc_buf[0];
-                                            },
-                                            off: {
-                                                loc_buf[1] = .{ .consts = sym_off.off };
-                                                break :off &loc_buf[1];
-                                            },
-                                        } } };
-                                    },
-                                    .pseudo_dbg_local_aro => loc: {
-                                        const air_off = emit.lower.mir.extraData(
-                                            Mir.AirOffset,
-                                            mir_inst.data.rx.payload,
-                                        ).data;
-                                        break :loc .{ air_off.air_inst, .{ .plus = .{
-                                            reg: {
-                                                loc_buf[0] = .{ .breg = mir_inst.data.rx.r1.dwarfNum() };
-                                                break :reg &loc_buf[0];
-                                            },
-                                            off: {
-                                                loc_buf[1] = .{ .consts = air_off.off };
-                                                break :off &loc_buf[1];
-                                            },
-                                        } } };
-                                    },
-                                    .pseudo_dbg_local_af => loc: {
-                                        const reg_off = emit.lower.mir.resolveFrameAddr(emit.lower.mir.extraData(
-                                            bits.FrameAddr,
-                                            mir_inst.data.ax.payload,
-                                        ).data);
-                                        break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{
-                                            reg: {
-                                                loc_buf[0] = .{ .breg = reg_off.reg.dwarfNum() };
-                                                break :reg &loc_buf[0];
-                                            },
-                                            off: {
-                                                loc_buf[1] = .{ .consts = reg_off.off };
-                                                break :off &loc_buf[1];
-                                            },
-                                        } } };
-                                    },
-                                    .pseudo_dbg_local_am => loc: {
-                                        const mem = emit.lower.mem(mir_inst.data.ax.payload);
-                                        break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{
-                                            base: {
-                                                loc_buf[0] = switch (mem.base()) {
-                                                    .none => .{ .constu = 0 },
-                                                    .reg => |reg| .{ .breg = reg.dwarfNum() },
-                                                    .frame => unreachable,
-                                                    .reloc => |sym_index| .{ .addr = .{ .sym = sym_index } },
-                                                };
-                                                break :base &loc_buf[0];
-                                            },
-                                            disp: {
-                                                loc_buf[1] = switch (mem.disp()) {
-                                                    .signed => |s| .{ .consts = s },
-                                                    .unsigned => |u| .{ .constu = u },
-                                                };
-                                                break :disp &loc_buf[1];
-                                            },
-                                        } } };
-                                    },
-                                };
-                                const ip = &emit.lower.bin_file.comp.zcu.?.intern_pool;
-                                const air_inst = emit.air.instructions.get(@intFromEnum(air_inst_index));
-                                const name: Air.NullTerminatedString = switch (air_inst.tag) {
+                                    .arg, .dbg_arg_inline => .local_arg,
+                                    .dbg_var_ptr, .dbg_var_val => .local_var,
+                                },
+                                name.toSlice(emit.air),
+                                switch (air_inst.tag) {
                                     else => unreachable,
-                                    .arg => air_inst.data.arg.name,
-                                    .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => @enumFromInt(air_inst.data.pl_op.payload),
-                                };
-                                try dw.genLocalDebugInfo(
-                                    switch (air_inst.tag) {
-                                        else => unreachable,
-                                        .arg, .dbg_arg_inline => .local_arg,
-                                        .dbg_var_ptr, .dbg_var_val => .local_var,
-                                    },
-                                    name.toSlice(emit.air),
-                                    switch (air_inst.tag) {
-                                        else => unreachable,
-                                        .arg => emit.air.typeOfIndex(air_inst_index, ip),
-                                        .dbg_var_ptr => emit.air.typeOf(air_inst.data.pl_op.operand, ip).childTypeIp(ip),
-                                        .dbg_var_val, .dbg_arg_inline => emit.air.typeOf(air_inst.data.pl_op.operand, ip),
-                                    },
-                                    loc,
-                                );
-                            },
-                            .plan9 => {},
-                            .none => {},
-                        }
+                                    .arg => emit.air.typeOfIndex(air_inst_index, ip),
+                                    .dbg_var_ptr => emit.air.typeOf(air_inst.data.pl_op.operand, ip).childTypeIp(ip),
+                                    .dbg_var_val, .dbg_arg_inline => emit.air.typeOf(air_inst.data.pl_op.operand, ip),
+                                },
+                                loc,
+                            );
+                        },
+                        .plan9 => {},
+                        .none => {},
                     },
-                    .pseudo_dbg_var_args_none => {
-                        switch (emit.debug_output) {
-                            .dwarf => |dw| try dw.genVarArgsDebugInfo(),
-                            .plan9 => {},
-                            .none => {},
-                        }
+                    .pseudo_dbg_var_args_none => switch (emit.debug_output) {
+                        .dwarf => |dwarf| try dwarf.genVarArgsDebugInfo(),
+                        .plan9 => {},
+                        .none => {},
                     },
                     .pseudo_dead_none => {},
                 },
@@ -515,16 +504,22 @@ fn fixupRelocs(emit: *Emit) Error!void {
     }
 }
 
-fn dbgAdvancePCAndLine(emit: *Emit, line: u32, column: u32) Error!void {
-    const delta_line = @as(i33, line) - @as(i33, emit.prev_di_line);
+const Loc = struct {
+    line: u32,
+    column: u32,
+    is_stmt: bool,
+};
+
+fn dbgAdvancePCAndLine(emit: *Emit, loc: Loc) Error!void {
+    const delta_line = @as(i33, loc.line) - @as(i33, emit.prev_di_loc.line);
     const delta_pc: usize = emit.code.items.len - emit.prev_di_pc;
     log.debug("  (advance pc={d} and line={d})", .{ delta_pc, delta_line });
     switch (emit.debug_output) {
-        .dwarf => |dw| {
-            if (column != emit.prev_di_column) try dw.setColumn(column);
-            try dw.advancePCAndLine(delta_line, delta_pc);
-            emit.prev_di_line = line;
-            emit.prev_di_column = column;
+        .dwarf => |dwarf| {
+            if (loc.is_stmt != emit.prev_di_loc.is_stmt) try dwarf.negateStmt();
+            if (loc.column != emit.prev_di_loc.column) try dwarf.setColumn(loc.column);
+            try dwarf.advancePCAndLine(delta_line, delta_pc);
+            emit.prev_di_loc = loc;
             emit.prev_di_pc = emit.code.items.len;
         },
         .plan9 => |dbg_out| {
@@ -553,11 +548,10 @@ fn dbgAdvancePCAndLine(emit: *Emit, line: u32, column: u32) Error!void {
                 // we don't need to do anything, because adding the pc quanta does it for us
             } else unreachable;
             if (dbg_out.start_line == null)
-                dbg_out.start_line = emit.prev_di_line;
-            dbg_out.end_line = line;
+                dbg_out.start_line = emit.prev_di_loc.line;
+            dbg_out.end_line = loc.line;
             // only do this if the pc changed
-            emit.prev_di_line = line;
-            emit.prev_di_column = column;
+            emit.prev_di_loc = loc;
             emit.prev_di_pc = emit.code.items.len;
         },
         .none => {},
diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig
index 6ac79378c1..015b3ba12e 100644
--- a/src/arch/x86_64/Lower.zig
+++ b/src/arch/x86_64/Lower.zig
@@ -310,6 +310,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
             }),
 
             .pseudo_dbg_prologue_end_none,
+            .pseudo_dbg_line_stmt_line_column,
             .pseudo_dbg_line_line_column,
             .pseudo_dbg_epilogue_begin_none,
             .pseudo_dbg_enter_block_none,
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index 0e9d010758..a7f308b7b4 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -930,7 +930,10 @@ pub const Inst = struct {
 
         /// End of prologue
         pseudo_dbg_prologue_end_none,
-        /// Update debug line
+        /// Update debug line with is_stmt register set
+        /// Uses `line_column` payload.
+        pseudo_dbg_line_stmt_line_column,
+        /// Update debug line with is_stmt register clear
         /// Uses `line_column` payload.
         pseudo_dbg_line_line_column,
         /// Start of epilogue
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 0410023588..56466b4395 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -3289,6 +3289,7 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .try_ptr_cold => try airTryPtr(f, inst),
 
             .dbg_stmt => try airDbgStmt(f, inst),
+            .dbg_empty_stmt => try airDbgEmptyStmt(f, inst),
             .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => try airDbgVar(f, inst),
 
             .float_from_int,
@@ -4601,6 +4602,11 @@ fn airDbgStmt(f: *Function, inst: Air.Inst.Index) !CValue {
     return .none;
 }
 
+fn airDbgEmptyStmt(f: *Function, _: Air.Inst.Index) !CValue {
+    try f.object.writer().writeAll("(void)0;\n");
+    return .none;
+}
+
 fn airDbgInlineBlock(f: *Function, inst: Air.Inst.Index) !CValue {
     const pt = f.object.dg.pt;
     const zcu = pt.zcu;
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index fb20d4d622..d0b12350c0 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -5391,6 +5391,7 @@ pub const FuncGen = struct {
                 .inferred_alloc, .inferred_alloc_comptime => unreachable,
 
                 .dbg_stmt => try self.airDbgStmt(inst),
+                .dbg_empty_stmt => try self.airDbgEmptyStmt(inst),
                 .dbg_var_ptr => try self.airDbgVarPtr(inst),
                 .dbg_var_val => try self.airDbgVarVal(inst, false),
                 .dbg_arg_inline => try self.airDbgVarVal(inst, true),
@@ -7433,6 +7434,12 @@ pub const FuncGen = struct {
         return .none;
     }
 
+    fn airDbgEmptyStmt(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
+        _ = self;
+        _ = inst;
+        return .none;
+    }
+
     fn airDbgInlineBlock(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
         const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
         const extra = self.air.extraData(Air.DbgInlineBlock, ty_pl.payload);
diff --git a/src/dev.zig b/src/dev.zig
index 0d365c3e19..d623a708e7 100644
--- a/src/dev.zig
+++ b/src/dev.zig
@@ -81,6 +81,7 @@ pub const Env = enum {
                 => true,
                 .cc_command,
                 .translate_c_command,
+                .fmt_command,
                 .jit_command,
                 .fetch_command,
                 .init_command,
@@ -168,6 +169,7 @@ pub const Feature = enum {
     clang_command,
     cc_command,
     translate_c_command,
+    fmt_command,
     jit_command,
     fetch_command,
     init_command,
diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig
index 6653a8309a..afdc5d1d48 100644
--- a/src/link/Dwarf.zig
+++ b/src/link/Dwarf.zig
@@ -1474,6 +1474,11 @@ pub const WipNav = struct {
         try uleb128(dlw, column + 1);
     }
 
+    pub fn negateStmt(wip_nav: *WipNav) error{OutOfMemory}!void {
+        const dlw = wip_nav.debug_line.writer(wip_nav.dwarf.gpa);
+        try dlw.writeByte(DW.LNS.negate_stmt);
+    }
+
     pub fn setPrologueEnd(wip_nav: *WipNav) error{OutOfMemory}!void {
         const dlw = wip_nav.debug_line.writer(wip_nav.dwarf.gpa);
         try dlw.writeByte(DW.LNS.set_prologue_end);
diff --git a/src/link/Elf.zig b/src/link/Elf.zig
index 8f39d3412f..a068ac6cdc 100644
--- a/src/link/Elf.zig
+++ b/src/link/Elf.zig
@@ -113,8 +113,6 @@ thunks: std.ArrayListUnmanaged(Thunk) = .empty,
 merge_sections: std.ArrayListUnmanaged(Merge.Section) = .empty,
 comment_merge_section_index: ?Merge.Section.Index = null,
 
-first_eflags: ?elf.Word = null,
-
 /// `--verbose-link` output.
 /// Initialized on creation, appended to as inputs are added, printed during `flush`.
 dump_argv_list: std.ArrayListUnmanaged([]const u8),
@@ -791,7 +789,7 @@ pub fn loadInput(self: *Elf, input: link.Input) !void {
         .res => unreachable,
         .dso_exact => @panic("TODO"),
         .object => |obj| try parseObject(self, obj),
-        .archive => |obj| try parseArchive(gpa, diags, &self.file_handles, &self.files, &self.first_eflags, target, debug_fmt_strip, default_sym_version, &self.objects, obj, is_static_lib),
+        .archive => |obj| try parseArchive(gpa, diags, &self.file_handles, &self.files, target, debug_fmt_strip, default_sym_version, &self.objects, obj, is_static_lib),
         .dso => |dso| try parseDso(gpa, diags, dso, &self.shared_objects, &self.files, target),
     }
 }
@@ -1124,7 +1122,6 @@ fn parseObject(self: *Elf, obj: link.Input.Object) !void {
 
     const gpa = self.base.comp.gpa;
     const diags = &self.base.comp.link_diags;
-    const first_eflags = &self.first_eflags;
     const target = self.base.comp.root_mod.resolved_target.result;
     const debug_fmt_strip = self.base.comp.config.debug_format == .strip;
     const default_sym_version = self.default_sym_version;
@@ -1145,7 +1142,7 @@ fn parseObject(self: *Elf, obj: link.Input.Object) !void {
     try self.objects.append(gpa, index);
 
     const object = self.file(index).?.object;
-    try object.parseCommon(gpa, diags, obj.path, handle, target, first_eflags);
+    try object.parseCommon(gpa, diags, obj.path, handle, target);
     if (!self.base.isStaticLib()) {
         try object.parse(gpa, diags, obj.path, handle, target, debug_fmt_strip, default_sym_version);
     }
@@ -1156,7 +1153,6 @@ fn parseArchive(
     diags: *Diags,
     file_handles: *std.ArrayListUnmanaged(File.Handle),
     files: *std.MultiArrayList(File.Entry),
-    first_eflags: *?elf.Word,
     target: std.Target,
     debug_fmt_strip: bool,
     default_sym_version: elf.Versym,
@@ -1179,7 +1175,7 @@ fn parseArchive(
         const object = &files.items(.data)[index].object;
         object.index = index;
         object.alive = init_alive;
-        try object.parseCommon(gpa, diags, obj.path, obj.file, target, first_eflags);
+        try object.parseCommon(gpa, diags, obj.path, obj.file, target);
         if (!is_static_lib)
             try object.parse(gpa, diags, obj.path, obj.file, target, debug_fmt_strip, default_sym_version);
         try objects.append(gpa, index);
diff --git a/src/link/Elf/Object.zig b/src/link/Elf/Object.zig
index 688e51f3f1..65a62ff1a6 100644
--- a/src/link/Elf/Object.zig
+++ b/src/link/Elf/Object.zig
@@ -99,7 +99,6 @@ pub fn parseCommon(
     path: Path,
     handle: fs.File,
     target: std.Target,
-    first_eflags: *?elf.Word,
 ) !void {
     const offset = if (self.archive) |ar| ar.offset else 0;
     const file_size = (try handle.stat()).size;
@@ -114,7 +113,7 @@ pub fn parseCommon(
             @tagName(self.header.?.e_machine),
         });
     }
-    try validateEFlags(diags, path, target, self.header.?.e_flags, first_eflags);
+    try validateEFlags(diags, path, target, self.header.?.e_flags);
 
     if (self.header.?.e_shnum == 0) return;
 
@@ -180,39 +179,81 @@ pub fn parseCommon(
     }
 }
 
-fn validateEFlags(
+pub fn validateEFlags(
     diags: *Diags,
     path: Path,
     target: std.Target,
     e_flags: elf.Word,
-    first_eflags: *?elf.Word,
-) error{LinkFailure}!void {
-    if (first_eflags.*) |*self_eflags| {
-        switch (target.cpu.arch) {
-            .riscv64 => {
-                if (e_flags != self_eflags.*) {
-                    const riscv_eflags: riscv.RiscvEflags = @bitCast(e_flags);
-                    const self_riscv_eflags: *riscv.RiscvEflags = @ptrCast(self_eflags);
+) !void {
+    switch (target.cpu.arch) {
+        .riscv64 => {
+            const features = target.cpu.features;
+            const flags: riscv.Eflags = @bitCast(e_flags);
+            var any_errors: bool = false;
 
-                    self_riscv_eflags.rvc = self_riscv_eflags.rvc or riscv_eflags.rvc;
-                    self_riscv_eflags.tso = self_riscv_eflags.tso or riscv_eflags.tso;
+            // For an input object to target an ABI that the target CPU doesn't have enabled
+            // is invalid, and will throw an error.
 
-                    var any_errors: bool = false;
-                    if (self_riscv_eflags.fabi != riscv_eflags.fabi) {
-                        any_errors = true;
-                        diags.addParseError(path, "cannot link object files with different float-point ABIs", .{});
-                    }
-                    if (self_riscv_eflags.rve != riscv_eflags.rve) {
-                        any_errors = true;
-                        diags.addParseError(path, "cannot link object files with different RVEs", .{});
-                    }
-                    if (any_errors) return error.LinkFailure;
-                }
-            },
-            else => {},
-        }
-    } else {
-        first_eflags.* = e_flags;
+            // Invalid when
+            // 1. The input uses C and we do not.
+            if (flags.rvc and !std.Target.riscv.featureSetHas(features, .c)) {
+                any_errors = true;
+                diags.addParseError(
+                    path,
+                    "cannot link object file targeting the C feature without having the C feature enabled",
+                    .{},
+                );
+            }
+
+            // Invalid when
+            // 1. We use E and the input does not.
+            // 2. The input uses E and we do not.
+            if (std.Target.riscv.featureSetHas(features, .e) != flags.rve) {
+                any_errors = true;
+                diags.addParseError(
+                    path,
+                    "{s}",
+                    .{
+                        if (flags.rve)
+                            "cannot link object file targeting the E feature without having the E feature enabled"
+                        else
+                            "cannot link object file not targeting the E feature while having the E feature enabled",
+                    },
+                );
+            }
+
+            // Invalid when
+            // 1. We use total store order and the input does not.
+            // 2. The input uses total store order and we do not.
+            if (flags.tso != std.Target.riscv.featureSetHas(features, .ztso)) {
+                any_errors = true;
+                diags.addParseError(
+                    path,
+                    "cannot link object file targeting the TSO memory model without having the ztso feature enabled",
+                    .{},
+                );
+            }
+
+            const fabi: riscv.Eflags.FloatAbi =
+                if (std.Target.riscv.featureSetHas(features, .d))
+                .double
+            else if (std.Target.riscv.featureSetHas(features, .f))
+                .single
+            else
+                .soft;
+
+            if (flags.fabi != fabi) {
+                any_errors = true;
+                diags.addParseError(
+                    path,
+                    "cannot link object file targeting a different floating-point ABI. targeting {s}, found {s}",
+                    .{ @tagName(fabi), @tagName(flags.fabi) },
+                );
+            }
+
+            if (any_errors) return error.LinkFailure;
+        },
+        else => {},
     }
 }
 
diff --git a/src/link/Elf/ZigObject.zig b/src/link/Elf/ZigObject.zig
index 1e29ab8bf6..effe12539c 100644
--- a/src/link/Elf/ZigObject.zig
+++ b/src/link/Elf/ZigObject.zig
@@ -1496,7 +1496,7 @@ pub fn updateFunc(
             });
             defer gpa.free(name);
             const osec = if (self.text_index) |sect_sym_index|
-                self.symbol(sect_sym_index).output_section_index
+                self.symbol(sect_sym_index).outputShndx(elf_file).?
             else osec: {
                 const osec = try elf_file.addSection(.{
                     .name = try elf_file.insertShString(".text"),
diff --git a/src/link/riscv.zig b/src/link/riscv.zig
index 7c0282ef3b..106fd1a817 100644
--- a/src/link/riscv.zig
+++ b/src/link/riscv.zig
@@ -70,18 +70,20 @@ fn bitSlice(
     return @truncate((value >> low) & (1 << (high - low + 1)) - 1);
 }
 
-pub const RiscvEflags = packed struct(u32) {
+pub const Eflags = packed struct(u32) {
     rvc: bool,
-    fabi: enum(u2) {
+    fabi: FloatAbi,
+    rve: bool,
+    tso: bool,
+    _reserved: u19 = 0,
+    _unused: u8 = 0,
+
+    pub const FloatAbi = enum(u2) {
         soft = 0b00,
         single = 0b01,
         double = 0b10,
         quad = 0b11,
-    },
-    rve: bool,
-    tso: bool,
-    _reserved: u19,
-    _unused: u8,
+    };
 };
 
 const mem = std.mem;
diff --git a/src/main.zig b/src/main.zig
index 291820cb75..13e24d8c25 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -309,6 +309,7 @@ fn mainArgs(gpa: Allocator, arena: Allocator, args: []const []const u8) !void {
             .server = use_server,
         });
     } else if (mem.eql(u8, cmd, "fmt")) {
+        dev.check(.fmt_command);
         return @import("fmt.zig").run(gpa, arena, cmd_args);
     } else if (mem.eql(u8, cmd, "objcopy")) {
         return jitCmd(gpa, arena, cmd_args, .{
diff --git a/src/musl.zig b/src/musl.zig
index d1b2fd2e2d..ace72c0b07 100644
--- a/src/musl.zig
+++ b/src/musl.zig
@@ -138,17 +138,6 @@ pub fn buildCrtFile(comp: *Compilation, in_crt_file: CrtFile, prog_node: std.Pro
                 try addSrcFile(arena, &source_table, src_file);
             }
 
-            const time32_compat_arch_list = [_][]const u8{
-                "arm",
-                "i386",
-                "m68k",
-                "microblaze",
-                "mips",
-                "mipsn32",
-                "or1k",
-                "powerpc",
-                "sh",
-            };
             for (time32_compat_arch_list) |time32_compat_arch| {
                 if (mem.eql(u8, arch_name, time32_compat_arch)) {
                     for (compat_time32_files) |compat_time32_file| {
@@ -239,13 +228,29 @@ pub fn buildCrtFile(comp: *Compilation, in_crt_file: CrtFile, prog_node: std.Pro
             });
 
             const target = comp.root_mod.resolved_target.result;
-            const arch_define = try std.fmt.allocPrint(arena, "-DARCH_{s}", .{
-                @tagName(target.cpu.arch),
-            });
+            const arch_name = std.zig.target.muslArchName(target.cpu.arch, target.abi);
+            const time32 = for (time32_compat_arch_list) |time32_compat_arch| {
+                if (mem.eql(u8, arch_name, time32_compat_arch)) break true;
+            } else false;
+            const arch_define = try std.fmt.allocPrint(arena, "-DARCH_{s}", .{arch_name});
+            const family_define = switch (target.cpu.arch) {
+                .arm, .armeb, .thumb, .thumbeb => "-DFAMILY_arm",
+                .aarch64, .aarch64_be => "-DFAMILY_aarch64",
+                .loongarch64 => "-DFAMILY_loongarch",
+                .m68k => "-DFAMILY_m68k",
+                .mips, .mipsel, .mips64, .mips64el => "-DFAMILY_mips",
+                .powerpc, .powerpc64, .powerpc64le => "-DFAMILY_powerpc",
+                .riscv32, .riscv64 => "-DFAMILY_riscv",
+                .s390x => "-DFAMILY_s390x",
+                .x86, .x86_64 => "-DFAMILY_x86",
+                else => unreachable,
+            };
             const cc_argv: []const []const u8 = if (target.ptrBitWidth() == 64)
-                &.{ "-DPTR64", arch_define }
+                &.{ "-DPTR64", arch_define, family_define }
+            else if (time32)
+                &.{ "-DTIME32", arch_define, family_define }
             else
-                &.{arch_define};
+                &.{ arch_define, family_define };
 
             const root_mod = try Module.create(arena, .{
                 .global_cache_directory = comp.global_cache_directory,
@@ -347,6 +352,18 @@ pub fn needsCrt0(output_mode: std.builtin.OutputMode, link_mode: std.builtin.Lin
     };
 }
 
+const time32_compat_arch_list = [_][]const u8{
+    "arm",
+    "i386",
+    "m68k",
+    "microblaze",
+    "mips",
+    "mipsn32",
+    "or1k",
+    "powerpc",
+    "sh",
+};
+
 fn isArchName(name: []const u8) bool {
     const musl_arch_names = [_][]const u8{
         "aarch64",
diff --git a/src/print_air.zig b/src/print_air.zig
index 6a3f31a27c..280d05edfa 100644
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -202,6 +202,7 @@ const Writer = struct {
 
             .trap,
             .breakpoint,
+            .dbg_empty_stmt,
             .unreach,
             .ret_addr,
             .frame_addr,
diff --git a/src/print_zir.zig b/src/print_zir.zig
index b8f4432e72..808ead0e79 100644
--- a/src/print_zir.zig
+++ b/src/print_zir.zig
@@ -621,6 +621,8 @@ const Writer = struct {
             .field_parent_ptr => try self.writeFieldParentPtr(stream, extended),
             .builtin_value => try self.writeBuiltinValue(stream, extended),
             .inplace_arith_result_ty => try self.writeInplaceArithResultTy(stream, extended),
+
+            .dbg_empty_stmt => try stream.writeAll("))"),
         }
     }
 
diff --git a/test/cases/compile_errors/align_zero.zig b/test/cases/compile_errors/align_zero.zig
new file mode 100644
index 0000000000..a63523b853
--- /dev/null
+++ b/test/cases/compile_errors/align_zero.zig
@@ -0,0 +1,52 @@
+pub var global_var: i32 align(0) = undefined;
+
+pub export fn a() void {
+    _ = &global_var;
+}
+
+pub extern var extern_var: i32 align(0);
+
+pub export fn b() void {
+    _ = &extern_var;
+}
+
+pub export fn c() align(0) void {}
+
+pub export fn d() void {
+    _ = *align(0) fn () i32;
+}
+
+pub export fn e() void {
+    var local_var: i32 align(0) = undefined;
+    _ = &local_var;
+}
+
+pub export fn f() void {
+    _ = *align(0) i32;
+}
+
+pub export fn g() void {
+    _ = []align(0) i32;
+}
+
+pub export fn h() void {
+    _ = struct { field: i32 align(0) };
+}
+
+pub export fn i() void {
+    _ = union { field: i32 align(0) };
+}
+
+// error
+// backend=stage2
+// target=native
+//
+// :1:31: error: alignment must be >= 1
+// :7:38: error: alignment must be >= 1
+// :13:25: error: alignment must be >= 1
+// :16:16: error: alignment must be >= 1
+// :20:30: error: alignment must be >= 1
+// :25:16: error: alignment must be >= 1
+// :29:17: error: alignment must be >= 1
+// :33:35: error: alignment must be >= 1
+// :37:34: error: alignment must be >= 1
diff --git a/test/cases/compile_errors/function_alignment_on_unsupported_target.zig b/test/cases/compile_errors/function_alignment_on_unsupported_target.zig
index 7b033e0f60..e3ea1dd068 100644
--- a/test/cases/compile_errors/function_alignment_on_unsupported_target.zig
+++ b/test/cases/compile_errors/function_alignment_on_unsupported_target.zig
@@ -1,4 +1,4 @@
-export fn entry() align(0) void {}
+export fn entry() align(64) void {}
 
 // error
 // backend=stage2
diff --git a/test/src/Debugger.zig b/test/src/Debugger.zig
index 91eaa385ba..1e2289eb11 100644
--- a/test/src/Debugger.zig
+++ b/test/src/Debugger.zig
@@ -808,6 +808,424 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
             \\1 breakpoints deleted; 0 breakpoint locations disabled.
         },
     );
+    db.addLldbTest(
+        "step_single_stmt_loops",
+        target,
+        &.{
+            .{
+                .path = "step_single_stmt_loops.zig",
+                .source =
+                \\pub fn main() void {
+                \\    var x: u32 = 0;
+                \\    for (0..3) |_| {
+                \\        x +%= 1;
+                \\    }
+                \\    {
+                \\        var i: u32 = 0;
+                \\        while (i < 3) : (i +%= 1) {
+                \\            x +%= 1;
+                \\        }
+                \\    }
+                \\    {
+                \\        var i: u32 = 0;
+                \\        while (i < 3) {
+                \\            i +%= 1;
+                \\        }
+                \\    }
+                \\    inline for (0..3) |_| {
+                \\        x +%= 1;
+                \\    }
+                \\    {
+                \\        comptime var i: u32 = 0;
+                \\        inline while (i < 3) : (i +%= 1) {
+                \\            x +%= 1;
+                \\        }
+                \\    }
+                \\    {
+                \\        comptime var i: u32 = 0;
+                \\        inline while (i < 3) {
+                \\            i +%= 1;
+                \\        }
+                \\    }
+                \\    x +%= 1;
+                \\}
+                \\
+                ,
+            },
+        },
+        \\breakpoint set --name step_single_stmt_loops.main
+        \\process launch
+        \\thread step-in
+        \\#00
+        \\frame variable x
+        \\thread step-in
+        \\#01
+        \\frame variable x
+        \\thread step-in
+        \\#02
+        \\frame variable x
+        \\thread step-in
+        \\#03
+        \\frame variable x
+        \\thread step-in
+        \\#04
+        \\frame variable x
+        \\thread step-in
+        \\#05
+        \\frame variable x
+        \\thread step-in
+        \\#06
+        \\frame variable x
+        \\thread step-in
+        \\#07
+        \\frame variable x
+        \\thread step-in
+        \\#08
+        \\frame variable x
+        \\thread step-in
+        \\#09
+        \\frame variable x
+        \\thread step-in
+        \\#10
+        \\frame variable x
+        \\thread step-in
+        \\#11
+        \\frame variable x
+        \\thread step-in
+        \\#12
+        \\frame variable x
+        \\thread step-in
+        \\#13
+        \\frame variable x
+        \\thread step-in
+        \\#14
+        \\frame variable x
+        \\thread step-in
+        \\#15
+        \\frame variable x
+        \\thread step-in
+        \\#16
+        \\frame variable x
+        \\thread step-in
+        \\#17
+        \\frame variable x
+        \\thread step-in
+        \\#18
+        \\frame variable x
+        \\thread step-in
+        \\#19
+        \\frame variable x
+        \\thread step-in
+        \\#20
+        \\frame variable x
+        \\thread step-in
+        \\#21
+        \\frame variable x
+        \\thread step-in
+        \\#22
+        \\frame variable x
+        \\thread step-in
+        \\#23
+        \\frame variable x
+        \\thread step-in
+        \\#24
+        \\frame variable x
+        \\thread step-in
+        \\#25
+        \\frame variable x
+        \\thread step-in
+        \\#26
+        \\frame variable x
+        \\thread step-in
+        \\#27
+        \\frame variable x
+        \\thread step-in
+        \\#28
+        \\frame variable x
+        \\thread step-in
+        \\#29
+        \\frame variable x
+        \\thread step-in
+        \\#30
+        \\frame variable x
+        \\thread step-in
+        \\#31
+        \\frame variable x
+        \\thread step-in
+        \\#32
+        \\frame variable x
+        \\thread step-in
+        \\#33
+        \\frame variable x
+        \\thread step-in
+        \\#34
+        \\frame variable x
+        \\thread step-in
+        \\#35
+        \\frame variable x
+        \\thread step-in
+        \\#36
+        \\frame variable x
+        \\thread step-in
+        \\#37
+        \\frame variable x
+        \\thread step-in
+        \\#38
+        \\frame variable x
+        \\thread step-in
+        \\#39
+        \\frame variable x
+        \\thread step-in
+        \\#40
+        \\frame variable x
+        \\thread step-in
+        \\#41
+        \\frame variable x
+        \\thread step-in
+        \\#42
+        \\frame variable x
+        \\thread step-in
+        \\#43
+        \\frame variable x
+        \\thread step-in
+        \\#44
+        \\frame variable x
+        \\thread step-in
+        \\#45
+        \\frame variable x
+        \\
+    ,
+        &.{
+            \\(lldb) #00
+            \\(lldb) frame variable x
+            \\(u32) x = 0
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #01
+            \\(lldb) frame variable x
+            \\(u32) x = 0
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #02
+            \\(lldb) frame variable x
+            \\(u32) x = 1
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #03
+            \\(lldb) frame variable x
+            \\(u32) x = 1
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #04
+            \\(lldb) frame variable x
+            \\(u32) x = 1
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #05
+            \\(lldb) frame variable x
+            \\(u32) x = 2
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #06
+            \\(lldb) frame variable x
+            \\(u32) x = 2
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #07
+            \\(lldb) frame variable x
+            \\(u32) x = 2
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #08
+            \\(lldb) frame variable x
+            \\(u32) x = 3
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #09
+            \\(lldb) frame variable x
+            \\(u32) x = 3
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #10
+            \\(lldb) frame variable x
+            \\(u32) x = 3
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #11
+            \\(lldb) frame variable x
+            \\(u32) x = 3
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #12
+            \\(lldb) frame variable x
+            \\(u32) x = 3
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #13
+            \\(lldb) frame variable x
+            \\(u32) x = 4
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #14
+            \\(lldb) frame variable x
+            \\(u32) x = 4
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #15
+            \\(lldb) frame variable x
+            \\(u32) x = 4
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #16
+            \\(lldb) frame variable x
+            \\(u32) x = 5
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #17
+            \\(lldb) frame variable x
+            \\(u32) x = 5
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #18
+            \\(lldb) frame variable x
+            \\(u32) x = 5
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #19
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #20
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #21
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #22
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #23
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #24
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #25
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #26
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #27
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #28
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #29
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #30
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #31
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #32
+            \\(lldb) frame variable x
+            \\(u32) x = 6
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #33
+            \\(lldb) frame variable x
+            \\(u32) x = 7
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #34
+            \\(lldb) frame variable x
+            \\(u32) x = 7
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #35
+            \\(lldb) frame variable x
+            \\(u32) x = 8
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #36
+            \\(lldb) frame variable x
+            \\(u32) x = 8
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #37
+            \\(lldb) frame variable x
+            \\(u32) x = 9
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #38
+            \\(lldb) frame variable x
+            \\(u32) x = 9
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #39
+            \\(lldb) frame variable x
+            \\(u32) x = 10
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #40
+            \\(lldb) frame variable x
+            \\(u32) x = 10
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #41
+            \\(lldb) frame variable x
+            \\(u32) x = 11
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #42
+            \\(lldb) frame variable x
+            \\(u32) x = 11
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #43
+            \\(lldb) frame variable x
+            \\(u32) x = 12
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #44
+            \\(lldb) frame variable x
+            \\(u32) x = 12
+            \\(lldb) thread step-in
+            ,
+            \\(lldb) #45
+            \\(lldb) frame variable x
+            \\(u32) x = 12
+        },
+    );
     db.addLldbTest(
         "inline_call",
         target,
@@ -1533,17 +1951,17 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
         &.{
             \\(lldb) frame variable --show-types -- list0 list0.len list0.capacity list0[0] list0[1] list0[2] list0.0 list0.1 list0.2
             \\(std.multi_array_list.MultiArrayList(struct { u32, u8, u16 })) list0 = len=3 capacity=8 {
-            \\  (std.struct { u32, u8, u16 }) [0] = {
+            \\  (struct { u32, u8, u16 }) [0] = {
             \\    (u32) .@"0" = 1
             \\    (u8) .@"1" = 2
             \\    (u16) .@"2" = 3
             \\  }
-            \\  (std.struct { u32, u8, u16 }) [1] = {
+            \\  (struct { u32, u8, u16 }) [1] = {
             \\    (u32) .@"0" = 4
             \\    (u8) .@"1" = 5
             \\    (u16) .@"2" = 6
             \\  }
-            \\  (std.struct { u32, u8, u16 }) [2] = {
+            \\  (struct { u32, u8, u16 }) [2] = {
             \\    (u32) .@"0" = 7
             \\    (u8) .@"1" = 8
             \\    (u16) .@"2" = 9
@@ -1551,17 +1969,17 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
             \\}
             \\(usize) list0.len = 3
             \\(usize) list0.capacity = 8
-            \\(std.struct { u32, u8, u16 }) list0[0] = {
+            \\(struct { u32, u8, u16 }) list0[0] = {
             \\  (u32) .@"0" = 1
             \\  (u8) .@"1" = 2
             \\  (u16) .@"2" = 3
             \\}
-            \\(std.struct { u32, u8, u16 }) list0[1] = {
+            \\(struct { u32, u8, u16 }) list0[1] = {
             \\  (u32) .@"0" = 4
             \\  (u8) .@"1" = 5
             \\  (u16) .@"2" = 6
             \\}
-            \\(std.struct { u32, u8, u16 }) list0[2] = {
+            \\(struct { u32, u8, u16 }) list0[2] = {
             \\  (u32) .@"0" = 7
             \\  (u8) .@"1" = 8
             \\  (u16) .@"2" = 9
@@ -1583,17 +2001,17 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
             \\}
             \\(lldb) frame variable --show-types -- slice0 slice0.len slice0.capacity slice0[0] slice0[1] slice0[2] slice0.0 slice0.1 slice0.2
             \\(std.multi_array_list.MultiArrayList(struct { u32, u8, u16 }).Slice) slice0 = len=3 capacity=8 {
-            \\  (std.struct { u32, u8, u16 }) [0] = {
+            \\  (struct { u32, u8, u16 }) [0] = {
             \\    (u32) .@"0" = 1
             \\    (u8) .@"1" = 2
             \\    (u16) .@"2" = 3
             \\  }
-            \\  (std.struct { u32, u8, u16 }) [1] = {
+            \\  (struct { u32, u8, u16 }) [1] = {
             \\    (u32) .@"0" = 4
             \\    (u8) .@"1" = 5
             \\    (u16) .@"2" = 6
             \\  }
-            \\  (std.struct { u32, u8, u16 }) [2] = {
+            \\  (struct { u32, u8, u16 }) [2] = {
             \\    (u32) .@"0" = 7
             \\    (u8) .@"1" = 8
             \\    (u16) .@"2" = 9
@@ -1601,17 +2019,17 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void {
             \\}
             \\(usize) slice0.len = 3
             \\(usize) slice0.capacity = 8
-            \\(std.struct { u32, u8, u16 }) slice0[0] = {
+            \\(struct { u32, u8, u16 }) slice0[0] = {
             \\  (u32) .@"0" = 1
             \\  (u8) .@"1" = 2
             \\  (u16) .@"2" = 3
             \\}
-            \\(std.struct { u32, u8, u16 }) slice0[1] = {
+            \\(struct { u32, u8, u16 }) slice0[1] = {
             \\  (u32) .@"0" = 4
             \\  (u8) .@"1" = 5
             \\  (u16) .@"2" = 6
             \\}
-            \\(std.struct { u32, u8, u16 }) slice0[2] = {
+            \\(struct { u32, u8, u16 }) slice0[2] = {
             \\  (u32) .@"0" = 7
             \\  (u8) .@"1" = 8
             \\  (u16) .@"2" = 9
diff --git a/tools/gen_stubs.zig b/tools/gen_stubs.zig
index 23cfd57d55..c5663fa825 100644
--- a/tools/gen_stubs.zig
+++ b/tools/gen_stubs.zig
@@ -2,23 +2,51 @@
 //! ./gen_stubs /path/to/musl/build-all >libc.S
 //!
 //! The directory 'build-all' is expected to contain these subdirectories:
-//! arm  x86  mips  mips64  powerpc  powerpc64  riscv32  riscv64  x86_64  loongarch64
+//!
+//! * aarch64
+//! * arm
+//! * i386
+//! * loongarch64
+//! * mips
+//! * mips64
+//! * mipsn32
+//! * powerpc
+//! * powerpc64
+//! * riscv32
+//! * riscv64
+//! * s390x
+//! * x32 (currently broken)
+//! * x86_64
 //!
 //! ...each with 'lib/libc.so' inside of them.
 //!
 //! When building the resulting libc.S file, these defines are required:
-//! * `-DPTR64`: when the architecture is 64-bit
+//! * `-DTIME32`: When the target's primary time ABI is 32-bit
+//! * `-DPTR64`: When the target has 64-bit pointers
 //! * One of the following, corresponding to the CPU architecture:
-//!   - `-DARCH_riscv32`
-//!   - `-DARCH_riscv64`
+//!   - `-DARCH_aarch64`
+//!   - `-DARCH_arm`
+//!   - `-DARCH_i386`
+//!   - `-DARCH_loongarch64`
 //!   - `-DARCH_mips`
 //!   - `-DARCH_mips64`
-//!   - `-DARCH_i386`
-//!   - `-DARCH_x86_64`
+//!   - `-DARCH_mipsn32`
 //!   - `-DARCH_powerpc`
 //!   - `-DARCH_powerpc64`
-//!   - `-DARCH_aarch64`
-//!   - `-DARCH_loongarch64`
+//!   - `-DARCH_riscv32`
+//!   - `-DARCH_riscv64`
+//!   - `-DARCH_s390x`
+//!   - `-DARCH_x32`
+//!   - `-DARCH_x86_64`
+//! * One of the following, corresponding to the CPU architecture family:
+//!   - `-DFAMILY_aarch64`
+//!   - `-DFAMILY_arm`
+//!   - `-DFAMILY_loongarch`
+//!   - `-DFAMILY_mips`
+//!   - `-DFAMILY_powerpc`
+//!   - `-DFAMILY_riscv`
+//!   - `-DFAMILY_s390x`
+//!   - `-DFAMILY_x86`
 
 // TODO: pick the best index to put them into instead of at the end
 //       - e.g. find a common previous symbol and put it after that one
@@ -29,24 +57,85 @@ const builtin = std.builtin;
 const mem = std.mem;
 const log = std.log;
 const elf = std.elf;
-const native_endian = @import("builtin").target.cpu.arch.endian();
+const native_endian = @import("builtin").cpu.arch.endian();
 
-const inputs = .{
-    .riscv32,
-    .riscv64,
-    .loongarch64,
-    .mips,
-    .mips64,
-    .x86,
-    .x86_64,
-    .powerpc,
-    .powerpc64,
-    .aarch64,
+const Arch = enum {
+    aarch64,
+    arm,
+    i386,
+    loongarch64,
+    mips,
+    mips64,
+    mipsn32,
+    powerpc,
+    powerpc64,
+    riscv32,
+    riscv64,
+    s390x,
+    x86_64,
+
+    pub fn ptrSize(arch: Arch) u16 {
+        return switch (arch) {
+            .arm,
+            .i386,
+            .mips,
+            .mipsn32,
+            .powerpc,
+            .riscv32,
+            => 4,
+            .aarch64,
+            .loongarch64,
+            .mips64,
+            .powerpc64,
+            .riscv64,
+            .s390x,
+            .x86_64,
+            => 8,
+        };
+    }
+
+    pub fn isTime32(arch: Arch) bool {
+        return switch (arch) {
+            // This list will never grow; newer 32-bit ports will be time64 (e.g. riscv32).
+            .arm,
+            .i386,
+            .mips,
+            .mipsn32,
+            .powerpc,
+            => true,
+            else => false,
+        };
+    }
+
+    pub fn family(arch: Arch) Family {
+        return switch (arch) {
+            .aarch64 => .aarch64,
+            .arm => .arm,
+            .i386, .x86_64 => .x86,
+            .loongarch64 => .loongarch,
+            .mips, .mips64, .mipsn32 => .mips,
+            .powerpc, .powerpc64 => .powerpc,
+            .riscv32, .riscv64 => .riscv,
+            .s390x => .s390x,
+        };
+    }
 };
 
-const arches: [inputs.len]std.Target.Cpu.Arch = blk: {
-    var result: [inputs.len]std.Target.Cpu.Arch = undefined;
-    for (inputs) |arch| {
+const Family = enum {
+    aarch64,
+    arm,
+    loongarch,
+    mips,
+    powerpc,
+    riscv,
+    s390x,
+    x86,
+};
+
+const arches: [@typeInfo(Arch).@"enum".fields.len]Arch = blk: {
+    var result: [@typeInfo(Arch).@"enum".fields.len]Arch = undefined;
+    for (@typeInfo(Arch).@"enum".fields) |field| {
+        const arch: Arch = @enumFromInt(field.value);
         result[archIndex(arch)] = arch;
     }
     break :blk result;
@@ -60,6 +149,31 @@ const MultiSym = struct {
     ty: u4,
     visib: elf.STV,
 
+    fn isSingleArch(ms: MultiSym) ?Arch {
+        var result: ?Arch = null;
+        inline for (@typeInfo(Arch).@"enum".fields) |field| {
+            const arch: Arch = @enumFromInt(field.value);
+            if (ms.present[archIndex(arch)]) {
+                if (result != null) return null;
+                result = arch;
+            }
+        }
+        return result;
+    }
+
+    fn isFamily(ms: MultiSym) ?Family {
+        var result: ?Family = null;
+        inline for (@typeInfo(Arch).@"enum".fields) |field| {
+            const arch: Arch = @enumFromInt(field.value);
+            if (ms.present[archIndex(arch)]) {
+                const family = arch.family();
+                if (result) |r| if (family != r) return null;
+                result = family;
+            }
+        }
+        return result;
+    }
+
     fn allPresent(ms: MultiSym) bool {
         for (arches, 0..) |_, i| {
             if (!ms.present[i]) {
@@ -69,17 +183,14 @@ const MultiSym = struct {
         return true;
     }
 
-    fn is32Only(ms: MultiSym) bool {
-        return ms.present[archIndex(.riscv32)] == true and
-            ms.present[archIndex(.riscv64)] == false and
-            ms.present[archIndex(.mips)] == true and
-            ms.present[archIndex(.mips64)] == false and
-            ms.present[archIndex(.x86)] == true and
-            ms.present[archIndex(.x86_64)] == false and
-            ms.present[archIndex(.powerpc)] == true and
-            ms.present[archIndex(.powerpc64)] == false and
-            ms.present[archIndex(.aarch64)] == false and
-            ms.present[archIndex(.loongarch64)] == false;
+    fn isTime32Only(ms: MultiSym) bool {
+        inline for (@typeInfo(Arch).@"enum".fields) |field| {
+            const arch: Arch = @enumFromInt(field.value);
+            if (ms.present[archIndex(arch)] != arch.isTime32()) {
+                return false;
+            }
+        }
+        return true;
     }
 
     fn commonSize(ms: MultiSym) ?u64 {
@@ -112,48 +223,11 @@ const MultiSym = struct {
         return binding.?;
     }
 
-    fn isPtrSize(ms: MultiSym) bool {
-        const map = .{
-            .{ .riscv32, 4 },
-            .{ .riscv64, 8 },
-            .{ .mips, 4 },
-            .{ .mips64, 8 },
-            .{ .x86, 4 },
-            .{ .x86_64, 8 },
-            .{ .powerpc, 4 },
-            .{ .powerpc64, 8 },
-            .{ .aarch64, 8 },
-            .{ .loongarch64, 8 },
-        };
-        inline for (map) |item| {
-            const arch = item[0];
-            const size = item[1];
+    fn isPtrSize(ms: MultiSym, mult: u16) bool {
+        inline for (@typeInfo(Arch).@"enum".fields) |field| {
+            const arch: Arch = @enumFromInt(field.value);
             const arch_index = archIndex(arch);
-            if (ms.present[arch_index] and ms.size[arch_index] != size) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    fn isPtr2Size(ms: MultiSym) bool {
-        const map = .{
-            .{ .riscv32, 8 },
-            .{ .riscv64, 16 },
-            .{ .mips, 8 },
-            .{ .mips64, 16 },
-            .{ .x86, 8 },
-            .{ .x86_64, 16 },
-            .{ .powerpc, 8 },
-            .{ .powerpc64, 16 },
-            .{ .aarch64, 16 },
-            .{ .loongarch64, 16 },
-        };
-        inline for (map) |item| {
-            const arch = item[0];
-            const size = item[1];
-            const arch_index = archIndex(arch);
-            if (ms.present[arch_index] and ms.size[arch_index] != size) {
+            if (ms.present[arch_index] and ms.size[arch_index] != arch.ptrSize() * mult) {
                 return false;
             }
         }
@@ -161,22 +235,26 @@ const MultiSym = struct {
     }
 
     fn isWeak64(ms: MultiSym) bool {
-        const map = .{
-            .{ .riscv32, 1 },
-            .{ .riscv64, 2 },
-            .{ .mips, 1 },
-            .{ .mips64, 2 },
-            .{ .x86, 1 },
-            .{ .x86_64, 2 },
-            .{ .powerpc, 1 },
-            .{ .powerpc64, 2 },
-            .{ .aarch64, 2 },
-            .{ .loongarch64, 2 },
-        };
-        inline for (map) |item| {
-            const arch = item[0];
-            const binding = item[1];
+        inline for (@typeInfo(Arch).@"enum".fields) |field| {
+            const arch: Arch = @enumFromInt(field.value);
             const arch_index = archIndex(arch);
+            const binding: u4 = switch (arch.ptrSize()) {
+                4 => std.elf.STB_GLOBAL,
+                8 => std.elf.STB_WEAK,
+                else => unreachable,
+            };
+            if (ms.present[arch_index] and ms.binding[arch_index] != binding) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    fn isWeakTime64(ms: MultiSym) bool {
+        inline for (@typeInfo(Arch).@"enum".fields) |field| {
+            const arch: Arch = @enumFromInt(field.value);
+            const arch_index = archIndex(arch);
+            const binding: u4 = if (arch.isTime32()) std.elf.STB_GLOBAL else std.elf.STB_WEAK;
             if (ms.present[arch_index] and ms.binding[arch_index] != binding) {
                 return false;
             }
@@ -189,10 +267,9 @@ const Parse = struct {
     arena: mem.Allocator,
     sym_table: *std.StringArrayHashMap(MultiSym),
     sections: *std.StringArrayHashMap(void),
-    blacklist: std.StringArrayHashMap(void),
     elf_bytes: []align(@alignOf(elf.Elf64_Ehdr)) u8,
     header: elf.Header,
-    arch: std.Target.Cpu.Arch,
+    arch: Arch,
 };
 
 pub fn main() !void {
@@ -207,16 +284,10 @@ pub fn main() !void {
 
     var sym_table = std.StringArrayHashMap(MultiSym).init(arena);
     var sections = std.StringArrayHashMap(void).init(arena);
-    var blacklist = std.StringArrayHashMap(void).init(arena);
-
-    try blacklist.ensureUnusedCapacity(blacklisted_symbols.len);
-    for (blacklisted_symbols) |name| {
-        blacklist.putAssumeCapacityNoClobber(name, {});
-    }
 
     for (arches) |arch| {
         const libc_so_path = try std.fmt.allocPrint(arena, "{s}/lib/libc.so", .{
-            archMuslName(arch),
+            @tagName(arch),
         });
 
         // Read the ELF header.
@@ -238,7 +309,6 @@ pub fn main() !void {
             .arena = arena,
             .sym_table = &sym_table,
             .sections = &sections,
-            .blacklist = blacklist,
             .elf_bytes = elf_bytes,
             .header = header,
             .arch = arch,
@@ -268,6 +338,13 @@ pub fn main() !void {
         \\#define PTR2_SIZE_BYTES 8
         \\#endif
         \\
+        \\#ifdef TIME32
+        \\#define WEAKTIME64 .globl
+        \\#else
+        \\#define WEAKTIME64 .weak
+        \\#endif
+        \\
+        \\
     );
 
     // Sort the symbols for deterministic output and cleaner vcs diffs.
@@ -301,7 +378,7 @@ pub fn main() !void {
     sym_table.sort(SymTableSort{ .sym_table = &sym_table, .sections = &sections });
 
     var prev_section: u16 = std.math.maxInt(u16);
-    var prev_pp_state: enum { none, ptr32, special } = .none;
+    var prev_pp_state: union(enum) { all, single: Arch, multi, family: Family, time32 } = .all;
     for (sym_table.values(), 0..) |multi_sym, sym_index| {
         const name = sym_table.keys()[sym_index];
 
@@ -313,32 +390,66 @@ pub fn main() !void {
 
         if (multi_sym.allPresent()) {
             switch (prev_pp_state) {
-                .none => {},
-                .ptr32, .special => {
+                .all => {},
+                .single, .multi, .family, .time32 => {
                     try stdout.writeAll("#endif\n");
-                    prev_pp_state = .none;
+                    prev_pp_state = .all;
                 },
             }
-        } else if (multi_sym.is32Only()) {
+        } else if (multi_sym.isSingleArch()) |arch| {
             switch (prev_pp_state) {
-                .none => {
-                    try stdout.writeAll("#ifdef PTR32\n");
-                    prev_pp_state = .ptr32;
+                .all => {
+                    try stdout.print("#ifdef ARCH_{s}\n", .{@tagName(arch)});
+                    prev_pp_state = .{ .single = arch };
                 },
-                .special => {
-                    try stdout.writeAll("#endif\n#ifdef PTR32\n");
-                    prev_pp_state = .ptr32;
+                .multi, .family, .time32 => {
+                    try stdout.print("#endif\n#ifdef ARCH_{s}\n", .{@tagName(arch)});
+                    prev_pp_state = .{ .single = arch };
                 },
-                .ptr32 => {},
+                .single => |prev_arch| {
+                    if (arch != prev_arch) {
+                        try stdout.print("#endif\n#ifdef ARCH_{s}\n", .{@tagName(arch)});
+                        prev_pp_state = .{ .single = arch };
+                    }
+                },
+            }
+        } else if (multi_sym.isFamily()) |family| {
+            switch (prev_pp_state) {
+                .all => {
+                    try stdout.print("#ifdef FAMILY_{s}\n", .{@tagName(family)});
+                    prev_pp_state = .{ .family = family };
+                },
+                .single, .multi, .time32 => {
+                    try stdout.print("#endif\n#ifdef FAMILY_{s}\n", .{@tagName(family)});
+                    prev_pp_state = .{ .family = family };
+                },
+                .family => |prev_family| {
+                    if (family != prev_family) {
+                        try stdout.print("#endif\n#ifdef FAMILY_{s}\n", .{@tagName(family)});
+                        prev_pp_state = .{ .family = family };
+                    }
+                },
+            }
+        } else if (multi_sym.isTime32Only()) {
+            switch (prev_pp_state) {
+                .all => {
+                    try stdout.writeAll("#ifdef TIME32\n");
+                    prev_pp_state = .time32;
+                },
+                .single, .multi, .family => {
+                    try stdout.writeAll("#endif\n#ifdef TIME32\n");
+                    prev_pp_state = .time32;
+                },
+                .time32 => {},
             }
         } else {
             switch (prev_pp_state) {
-                .none => {},
-                .special, .ptr32 => {
+                .all => {},
+                .single, .multi, .family, .time32 => {
                     try stdout.writeAll("#endif\n");
                 },
             }
-            prev_pp_state = .special;
+            prev_pp_state = .multi;
 
             var first = true;
             try stdout.writeAll("#if ");
@@ -366,6 +477,8 @@ pub fn main() !void {
             }
         } else if (multi_sym.isWeak64()) {
             try stdout.print("WEAK64 {s}\n", .{name});
+        } else if (multi_sym.isWeakTime64()) {
+            try stdout.print("WEAKTIME64 {s}\n", .{name});
         } else {
             for (arches, 0..) |arch, i| {
                 log.info("symbol '{s}' binding on {s}: {d}", .{
@@ -384,9 +497,9 @@ pub fn main() !void {
                 try stdout.print(".type {s}, %object;\n", .{name});
                 if (multi_sym.commonSize()) |size| {
                     try stdout.print(".size {s}, {d}\n", .{ name, size });
-                } else if (multi_sym.isPtrSize()) {
+                } else if (multi_sym.isPtrSize(1)) {
                     try stdout.print(".size {s}, PTR_SIZE_BYTES\n", .{name});
-                } else if (multi_sym.isPtr2Size()) {
+                } else if (multi_sym.isPtrSize(2)) {
                     try stdout.print(".size {s}, PTR2_SIZE_BYTES\n", .{name});
                 } else {
                     for (arches, 0..) |arch, i| {
@@ -410,8 +523,8 @@ pub fn main() !void {
     }
 
     switch (prev_pp_state) {
-        .none => {},
-        .ptr32, .special => try stdout.writeAll("#endif\n"),
+        .all => {},
+        .single, .multi, .family, .time32 => try stdout.writeAll("#endif\n"),
     }
 }
 
@@ -487,12 +600,17 @@ fn parseElf(parse: Parse, comptime is_64: bool, comptime endian: builtin.Endian)
         const visib = @as(elf.STV, @enumFromInt(@as(u2, @truncate(sym.st_other))));
         const size = s(sym.st_size);
 
-        if (parse.blacklist.contains(name)) continue;
-
         if (size == 0) {
             log.warn("{s}: symbol '{s}' has size 0", .{ @tagName(parse.arch), name });
         }
 
+        if (sym.st_shndx == elf.SHN_UNDEF) {
+            log.debug("{s}: skipping '{s}' due to it being undefined", .{
+                @tagName(parse.arch), name,
+            });
+            continue;
+        }
+
         switch (binding) {
             elf.STB_GLOBAL, elf.STB_WEAK => {},
             else => {
@@ -590,40 +708,8 @@ fn parseElf(parse: Parse, comptime is_64: bool, comptime endian: builtin.Endian)
     }
 }
 
-fn archIndex(arch: std.Target.Cpu.Arch) u8 {
-    return switch (arch) {
-        // zig fmt: off
-        .riscv64     => 0,
-        .mips        => 1,
-        .mips64      => 2,
-        .x86         => 3,
-        .x86_64      => 4,
-        .powerpc     => 5,
-        .powerpc64   => 6,
-        .aarch64     => 7,
-        .riscv32     => 8,
-        .loongarch64 => 9,
-        else         => unreachable,
-        // zig fmt: on
-    };
-}
-
-fn archMuslName(arch: std.Target.Cpu.Arch) []const u8 {
-    return switch (arch) {
-        // zig fmt: off
-        .riscv64     => "riscv64",
-        .mips        => "mips",
-        .mips64      => "mips64",
-        .x86         => "i386",
-        .x86_64      => "x86_64",
-        .powerpc     => "powerpc",
-        .powerpc64   => "powerpc64",
-        .aarch64     => "aarch64",
-        .riscv32     => "riscv32",
-        .loongarch64 => "loongarch64",
-        else         => unreachable,
-        // zig fmt: on
-    };
+fn archIndex(arch: Arch) u8 {
+    return @intFromEnum(arch);
 }
 
 fn archSetName(arch_set: [arches.len]bool) []const u8 {
@@ -639,529 +725,3 @@ fn fatal(comptime format: []const u8, args: anytype) noreturn {
     log.err(format, args);
     std.process.exit(1);
 }
-
-const blacklisted_symbols = [_][]const u8{
-    "__absvdi2",
-    "__absvsi2",
-    "__absvti2",
-    "__adddf3",
-    "__addkf3",
-    "__addodi4",
-    "__addosi4",
-    "__addoti4",
-    "__addsf3",
-    "__addtf3",
-    "__addxf3",
-    "__ashldi3",
-    "__ashlsi3",
-    "__ashlti3",
-    "__ashrdi3",
-    "__ashrsi3",
-    "__ashrti3",
-    "__atomic_compare_exchange",
-    "__atomic_compare_exchange_1",
-    "__atomic_compare_exchange_2",
-    "__atomic_compare_exchange_4",
-    "__atomic_compare_exchange_8",
-    "__atomic_exchange",
-    "__atomic_exchange_1",
-    "__atomic_exchange_2",
-    "__atomic_exchange_4",
-    "__atomic_exchange_8",
-    "__atomic_fetch_add_1",
-    "__atomic_fetch_add_2",
-    "__atomic_fetch_add_4",
-    "__atomic_fetch_add_8",
-    "__atomic_fetch_and_1",
-    "__atomic_fetch_and_2",
-    "__atomic_fetch_and_4",
-    "__atomic_fetch_and_8",
-    "__atomic_fetch_nand_1",
-    "__atomic_fetch_nand_2",
-    "__atomic_fetch_nand_4",
-    "__atomic_fetch_nand_8",
-    "__atomic_fetch_or_1",
-    "__atomic_fetch_or_2",
-    "__atomic_fetch_or_4",
-    "__atomic_fetch_or_8",
-    "__atomic_fetch_sub_1",
-    "__atomic_fetch_sub_2",
-    "__atomic_fetch_sub_4",
-    "__atomic_fetch_sub_8",
-    "__atomic_fetch_xor_1",
-    "__atomic_fetch_xor_2",
-    "__atomic_fetch_xor_4",
-    "__atomic_fetch_xor_8",
-    "__atomic_load",
-    "__atomic_load_1",
-    "__atomic_load_2",
-    "__atomic_load_4",
-    "__atomic_load_8",
-    "__atomic_store",
-    "__atomic_store_1",
-    "__atomic_store_2",
-    "__atomic_store_4",
-    "__atomic_store_8",
-    "__bswapdi2",
-    "__bswapsi2",
-    "__bswapti2",
-    "__ceilh",
-    "__ceilx",
-    "__clear_cache",
-    "__clzdi2",
-    "__chk_fail",
-    "__clzsi2",
-    "__clzti2",
-    "__cmpdf2",
-    "__cmpdi2",
-    "__cmpsf2",
-    "__cmpsi2",
-    "__cmptf2",
-    "__cmpti2",
-    "__cosh",
-    "__cosx",
-    "__ctzdi2",
-    "__ctzsi2",
-    "__ctzti2",
-    "__divdf3",
-    "__divdi3",
-    "__divkf3",
-    "__divmoddi4",
-    "__divmodsi4",
-    "__divmodti4",
-    "__divsf3",
-    "__divsi3",
-    "__divtf3",
-    "__divti3",
-    "__divxf3",
-    "__dlstart",
-    "__eqdf2",
-    "__eqkf2",
-    "__eqsf2",
-    "__eqtf2",
-    "__eqxf2",
-    "__exp2h",
-    "__exp2x",
-    "__exph",
-    "__expx",
-    "__extenddfkf2",
-    "__extenddftf2",
-    "__extenddfxf2",
-    "__extendhfsf2",
-    "__extendhftf2",
-    "__extendhfxf2",
-    "__extendsfdf2",
-    "__extendsfkf2",
-    "__extendsftf2",
-    "__extendsfxf2",
-    "__extendxftf2",
-    "__fabsh",
-    "__fabsx",
-    "__ffsdi2",
-    "__ffssi2",
-    "__ffsti2",
-    "__fixdfdi",
-    "__fixdfsi",
-    "__fixdfti",
-    "__fixkfdi",
-    "__fixkfsi",
-    "__fixkfti",
-    "__fixsfdi",
-    "__fixsfsi",
-    "__fixsfti",
-    "__fixtfdi",
-    "__fixtfsi",
-    "__fixtfti",
-    "__fixunsdfdi",
-    "__fixunsdfsi",
-    "__fixunsdfti",
-    "__fixunskfdi",
-    "__fixunskfsi",
-    "__fixunskfti",
-    "__fixunssfdi",
-    "__fixunssfsi",
-    "__fixunssfti",
-    "__fixunstfdi",
-    "__fixunstfsi",
-    "__fixunstfti",
-    "__fixunsxfdi",
-    "__fixunsxfsi",
-    "__fixunsxfti",
-    "__fixxfdi",
-    "__fixxfsi",
-    "__fixxfti",
-    "__floatdidf",
-    "__floatdikf",
-    "__floatdisf",
-    "__floatditf",
-    "__floatdixf",
-    "__floatsidf",
-    "__floatsikf",
-    "__floatsisf",
-    "__floatsitf",
-    "__floatsixf",
-    "__floattidf",
-    "__floattikf",
-    "__floattisf",
-    "__floattitf",
-    "__floattixf",
-    "__floatundidf",
-    "__floatundikf",
-    "__floatundisf",
-    "__floatunditf",
-    "__floatundixf",
-    "__floatunsidf",
-    "__floatunsikf",
-    "__floatunsisf",
-    "__floatunsitf",
-    "__floatunsixf",
-    "__floatuntidf",
-    "__floatuntikf",
-    "__floatuntisf",
-    "__floatuntitf",
-    "__floatuntixf",
-    "__floorh",
-    "__floorx",
-    "__fmah",
-    "__fmax",
-    "__fmaxh",
-    "__fmaxx",
-    "__fminh",
-    "__fminx",
-    "__fmodh",
-    "__fmodx",
-    "__gedf2",
-    "__gekf2",
-    "__gesf2",
-    "__getf2",
-    "__gexf2",
-    "__gnu_f2h_ieee",
-    "__gnu_h2f_ieee",
-    "__gtdf2",
-    "__gtkf2",
-    "__gtsf2",
-    "__gttf2",
-    "__gtxf2",
-    "__ledf2",
-    "__lekf2",
-    "__lesf2",
-    "__letf2",
-    "__lexf2",
-    "__log10h",
-    "__log10x",
-    "__log2h",
-    "__log2x",
-    "__logh",
-    "__logx",
-    "__lshrdi3",
-    "__lshrsi3",
-    "__lshrti3",
-    "__ltdf2",
-    "__ltkf2",
-    "__ltsf2",
-    "__lttf2",
-    "__ltxf2",
-    "__memcpy_chk",
-    "__memmove_chk",
-    "__memset",
-    "__memset_chk",
-    "__moddi3",
-    "__modsi3",
-    "__modti3",
-    "__muldc3",
-    "__muldf3",
-    "__muldi3",
-    "__mulkc3",
-    "__mulkf3",
-    "__mulodi4",
-    "__mulosi4",
-    "__muloti4",
-    "__mulsc3",
-    "__mulsf3",
-    "__mulsi3",
-    "__multc3",
-    "__multf3",
-    "__multi3",
-    "__mulxc3",
-    "__mulxf3",
-    "__nedf2",
-    "__negdf2",
-    "__negdi2",
-    "__negsf2",
-    "__negsi2",
-    "__negti2",
-    "__negvdi2",
-    "__negvsi2",
-    "__negvti2",
-    "__nekf2",
-    "__nesf2",
-    "__netf2",
-    "__nexf2",
-    "__paritydi2",
-    "__paritysi2",
-    "__parityti2",
-    "__popcountdi2",
-    "__popcountsi2",
-    "__popcountti2",
-    "__powidf2",
-    "__powihf2",
-    "__powikf2",
-    "__powisf2",
-    "__powitf2",
-    "__powixf2",
-    "__roundh",
-    "__roundx",
-    "__sincosh",
-    "__sincosx",
-    "__sinh",
-    "__sinx",
-    "__sqrth",
-    "__sqrtx",
-    "__strcat_chk",
-    "__strcpy_chk",
-    "__strncat_chk",
-    "__strncpy_chk",
-    "__subdf3",
-    "__subkf3",
-    "__subodi4",
-    "__subosi4",
-    "__suboti4",
-    "__subsf3",
-    "__subtf3",
-    "__subxf3",
-    "__tanh",
-    "__tanx",
-    "__truncdfhf2",
-    "__truncdfsf2",
-    "__trunch",
-    "__trunckfdf2",
-    "__trunckfsf2",
-    "__truncsfhf2",
-    "__trunctfdf2",
-    "__trunctfhf2",
-    "__trunctfsf2",
-    "__trunctfxf2",
-    "__truncx",
-    "__truncxfdf2",
-    "__truncxfhf2",
-    "__truncxfsf2",
-    "__ucmpdi2",
-    "__ucmpsi2",
-    "__ucmpti2",
-    "__udivdi3",
-    "__udivei4",
-    "__udivmoddi4",
-    "__udivmodsi4",
-    "__udivmodti4",
-    "__udivsi3",
-    "__udivti3",
-    "__umoddi3",
-    "__umodei4",
-    "__umodsi3",
-    "__umodti3",
-    "__unorddf2",
-    "__unordkf2",
-    "__unordsf2",
-    "__unordtf2",
-    "__zig_probe_stack",
-    "ceilf128",
-    "ceilq",
-    "cosf128",
-    "cosq",
-    "exp2f128",
-    "exp2q",
-    "expf128",
-    "expq",
-    "fabsf128",
-    "fabsq",
-    "fabsq.2",
-    "fabsq.3",
-    "floorf128",
-    "floorq",
-    "fmaf128",
-    "fmaq",
-    "fmaxf128",
-    "fmaxq",
-    "fmaxq.2",
-    "fmaxq.3",
-    "fminf128",
-    "fminq",
-    "fmodf128",
-    "fmodq",
-    "log10f128",
-    "log10q",
-    "log2f128",
-    "log2q",
-    "logf128",
-    "logq",
-    "roundf128",
-    "roundq",
-    "sincosf128",
-    "sincosq",
-    "sinf128",
-    "sinq",
-    "sqrtf128",
-    "sqrtq",
-    "tanf128",
-    "tanq",
-    "truncf128",
-    "truncq",
-    "__aarch64_cas16_acq",
-    "__aarch64_cas16_acq_rel",
-    "__aarch64_cas16_rel",
-    "__aarch64_cas16_relax",
-    "__aarch64_cas1_acq",
-    "__aarch64_cas1_acq_rel",
-    "__aarch64_cas1_rel",
-    "__aarch64_cas1_relax",
-    "__aarch64_cas2_acq",
-    "__aarch64_cas2_acq_rel",
-    "__aarch64_cas2_rel",
-    "__aarch64_cas2_relax",
-    "__aarch64_cas4_acq",
-    "__aarch64_cas4_acq_rel",
-    "__aarch64_cas4_rel",
-    "__aarch64_cas4_relax",
-    "__aarch64_cas8_acq",
-    "__aarch64_cas8_acq_rel",
-    "__aarch64_cas8_rel",
-    "__aarch64_cas8_relax",
-    "__aarch64_ldadd1_acq",
-    "__aarch64_ldadd1_acq_rel",
-    "__aarch64_ldadd1_rel",
-    "__aarch64_ldadd1_relax",
-    "__aarch64_ldadd2_acq",
-    "__aarch64_ldadd2_acq_rel",
-    "__aarch64_ldadd2_rel",
-    "__aarch64_ldadd2_relax",
-    "__aarch64_ldadd4_acq",
-    "__aarch64_ldadd4_acq_rel",
-    "__aarch64_ldadd4_rel",
-    "__aarch64_ldadd4_relax",
-    "__aarch64_ldadd8_acq",
-    "__aarch64_ldadd8_acq_rel",
-    "__aarch64_ldadd8_rel",
-    "__aarch64_ldadd8_relax",
-    "__aarch64_ldclr1_acq",
-    "__aarch64_ldclr1_acq_rel",
-    "__aarch64_ldclr1_rel",
-    "__aarch64_ldclr1_relax",
-    "__aarch64_ldclr2_acq",
-    "__aarch64_ldclr2_acq_rel",
-    "__aarch64_ldclr2_rel",
-    "__aarch64_ldclr2_relax",
-    "__aarch64_ldclr4_acq",
-    "__aarch64_ldclr4_acq_rel",
-    "__aarch64_ldclr4_rel",
-    "__aarch64_ldclr4_relax",
-    "__aarch64_ldclr8_acq",
-    "__aarch64_ldclr8_acq_rel",
-    "__aarch64_ldclr8_rel",
-    "__aarch64_ldclr8_relax",
-    "__aarch64_ldeor1_acq",
-    "__aarch64_ldeor1_acq_rel",
-    "__aarch64_ldeor1_rel",
-    "__aarch64_ldeor1_relax",
-    "__aarch64_ldeor2_acq",
-    "__aarch64_ldeor2_acq_rel",
-    "__aarch64_ldeor2_rel",
-    "__aarch64_ldeor2_relax",
-    "__aarch64_ldeor4_acq",
-    "__aarch64_ldeor4_acq_rel",
-    "__aarch64_ldeor4_rel",
-    "__aarch64_ldeor4_relax",
-    "__aarch64_ldeor8_acq",
-    "__aarch64_ldeor8_acq_rel",
-    "__aarch64_ldeor8_rel",
-    "__aarch64_ldeor8_relax",
-    "__aarch64_ldset1_acq",
-    "__aarch64_ldset1_acq_rel",
-    "__aarch64_ldset1_rel",
-    "__aarch64_ldset1_relax",
-    "__aarch64_ldset2_acq",
-    "__aarch64_ldset2_acq_rel",
-    "__aarch64_ldset2_rel",
-    "__aarch64_ldset2_relax",
-    "__aarch64_ldset4_acq",
-    "__aarch64_ldset4_acq_rel",
-    "__aarch64_ldset4_rel",
-    "__aarch64_ldset4_relax",
-    "__aarch64_ldset8_acq",
-    "__aarch64_ldset8_acq_rel",
-    "__aarch64_ldset8_rel",
-    "__aarch64_ldset8_relax",
-    "__aarch64_swp1_acq",
-    "__aarch64_swp1_acq_rel",
-    "__aarch64_swp1_rel",
-    "__aarch64_swp1_relax",
-    "__aarch64_swp2_acq",
-    "__aarch64_swp2_acq_rel",
-    "__aarch64_swp2_rel",
-    "__aarch64_swp2_relax",
-    "__aarch64_swp4_acq",
-    "__aarch64_swp4_acq_rel",
-    "__aarch64_swp4_rel",
-    "__aarch64_swp4_relax",
-    "__aarch64_swp8_acq",
-    "__aarch64_swp8_acq_rel",
-    "__aarch64_swp8_rel",
-    "__aarch64_swp8_relax",
-    "__addhf3",
-    "__atomic_compare_exchange_16",
-    "__atomic_exchange_16",
-    "__atomic_fetch_add_16",
-    "__atomic_fetch_and_16",
-    "__atomic_fetch_nand_16",
-    "__atomic_fetch_or_16",
-    "__atomic_fetch_sub_16",
-    "__atomic_fetch_umax_1",
-    "__atomic_fetch_umax_16",
-    "__atomic_fetch_umax_2",
-    "__atomic_fetch_umax_4",
-    "__atomic_fetch_umax_8",
-    "__atomic_fetch_umin_1",
-    "__atomic_fetch_umin_16",
-    "__atomic_fetch_umin_2",
-    "__atomic_fetch_umin_4",
-    "__atomic_fetch_umin_8",
-    "__atomic_fetch_xor_16",
-    "__atomic_load_16",
-    "__atomic_store_16",
-    "__cmphf2",
-    "__cmpxf2",
-    "__divdc3",
-    "__divhc3",
-    "__divhf3",
-    "__divkc3",
-    "__divsc3",
-    "__divtc3",
-    "__divxc3",
-    "__eqhf2",
-    "__extendhfdf2",
-    "__fixhfdi",
-    "__fixhfsi",
-    "__fixhfti",
-    "__fixunshfdi",
-    "__fixunshfsi",
-    "__fixunshfti",
-    "__floatdihf",
-    "__floatsihf",
-    "__floattihf",
-    "__floatundihf",
-    "__floatunsihf",
-    "__floatuntihf",
-    "__gehf2",
-    "__gthf2",
-    "__lehf2",
-    "__lthf2",
-    "__mulhc3",
-    "__mulhf3",
-    "__neghf2",
-    "__negkf2",
-    "__negtf2",
-    "__negxf2",
-    "__nehf2",
-    "__subhf3",
-    "__unordhf2",
-    "__unordxf2",
-};