diff --git a/ci/x86_64-linux-debug.sh b/ci/x86_64-linux-debug.sh index 7204fa29f1..f849e9fca0 100755 --- a/ci/x86_64-linux-debug.sh +++ b/ci/x86_64-linux-debug.sh @@ -12,7 +12,7 @@ CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" -export PATH="$HOME/deps/wasmtime-v10.0.2-$ARCH-linux:$HOME/deps/qemu-linux-x86_64-9.1.0/bin:$HOME/local/bin:$PATH" +export PATH="$HOME/deps/wasmtime-v10.0.2-$ARCH-linux:$HOME/deps/qemu-linux-x86_64-9.2.0-rc1/bin:$HOME/local/bin:$PATH" # Make the `zig version` number consistent. # This will affect the cmake command below. @@ -64,7 +64,7 @@ stage3-debug/bin/zig build \ stage3-debug/bin/zig build test docs \ --maxrss 21000000000 \ - -Dlldb=$HOME/deps/lldb-zig/Debug-6ece8bda1/bin/lldb \ + -Dlldb=$HOME/deps/lldb-zig/Debug-bfeada333/bin/lldb \ -fqemu \ -fwasmtime \ -Dstatic-llvm \ diff --git a/ci/x86_64-linux-release.sh b/ci/x86_64-linux-release.sh index b51b6f12d6..4ab6b4810c 100755 --- a/ci/x86_64-linux-release.sh +++ b/ci/x86_64-linux-release.sh @@ -12,7 +12,7 @@ CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" -export PATH="$HOME/deps/wasmtime-v10.0.2-$ARCH-linux:$HOME/deps/qemu-linux-x86_64-9.1.0/bin:$HOME/local/bin:$PATH" +export PATH="$HOME/deps/wasmtime-v10.0.2-$ARCH-linux:$HOME/deps/qemu-linux-x86_64-9.2.0-rc1/bin:$HOME/local/bin:$PATH" # Make the `zig version` number consistent. # This will affect the cmake command below. @@ -64,7 +64,7 @@ stage3-release/bin/zig build \ stage3-release/bin/zig build test docs \ --maxrss 21000000000 \ - -Dlldb=$HOME/deps/lldb-zig/Release-6ece8bda1/bin/lldb \ + -Dlldb=$HOME/deps/lldb-zig/Release-bfeada333/bin/lldb \ -fqemu \ -fwasmtime \ -Dstatic-llvm \ diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig index 02d3f75c5b..82aeb7f88e 100644 --- a/lib/compiler_rt.zig +++ b/lib/compiler_rt.zig @@ -220,6 +220,7 @@ comptime { _ = @import("compiler_rt/aulldiv.zig"); _ = @import("compiler_rt/aullrem.zig"); _ = @import("compiler_rt/clear_cache.zig"); + _ = @import("compiler_rt/hexagon.zig"); if (@import("builtin").object_format != .c) { _ = @import("compiler_rt/atomics.zig"); diff --git a/lib/compiler_rt/count0bits.zig b/lib/compiler_rt/count0bits.zig index 60da0390da..0045f5741f 100644 --- a/lib/compiler_rt/count0bits.zig +++ b/lib/compiler_rt/count0bits.zig @@ -73,13 +73,13 @@ fn __clzsi2_thumb1() callconv(.Naked) void { \\ subs r1, #4 \\ movs r0, r2 \\ 1: - \\ ldr r3, =LUT + \\ ldr r3, .lut \\ ldrb r0, [r3, r0] \\ subs r0, r1, r0 \\ bx lr \\ .p2align 2 \\ // Number of bits set in the 0-15 range - \\ LUT: + \\ .lut: \\ .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 ); diff --git a/lib/compiler_rt/hexagon.zig b/lib/compiler_rt/hexagon.zig new file mode 100644 index 0000000000..de7fd96491 --- /dev/null +++ b/lib/compiler_rt/hexagon.zig @@ -0,0 +1,1787 @@ +const builtin = @import("builtin"); +const common = @import("./common.zig"); + +fn __hexagon_divsi3() callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ p0 = cmp.ge(r0,#0) + \\ p1 = cmp.ge(r1,#0) + \\ r1 = abs(r0) + \\ r2 = abs(r1) + \\ } + \\ { + \\ r3 = cl0(r1) + \\ r4 = cl0(r2) + \\ r5 = sub(r1,r2) + \\ p2 = cmp.gtu(r2,r1) + \\ } + \\ { + \\ r0 = #0 + \\ p1 = xor(p0,p1) + \\ p0 = cmp.gtu(r2,r5) + \\ if (p2) jumpr r31 + \\ } + \\ + \\ { + \\ r0 = mux(p1,#-1,#1) + \\ if (p0) jumpr r31 + \\ r4 = sub(r4,r3) + \\ r3 = #1 + \\ } + \\ { + \\ r0 = #0 + \\ r3:2 = vlslw(r3:2,r4) + \\ loop0(1f,r4) + \\ } + \\ .falign + \\ 1: + \\ { + \\ p0 = cmp.gtu(r2,r1) + \\ if (!p0.new) r1 = sub(r1,r2) + \\ if (!p0.new) r0 = add(r0,r3) + \\ r3:2 = vlsrw(r3:2,#1) + \\ }:endloop0 + \\ { + \\ p0 = cmp.gtu(r2,r1) + \\ if (!p0.new) r0 = add(r0,r3) + \\ if (!p1) jumpr r31 + \\ } + \\ { + \\ r0 = neg(r0) + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_umodsi3() callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ r2 = cl0(r0) + \\ r3 = cl0(r1) + \\ p0 = cmp.gtu(r1,r0) + \\ } + \\ { + \\ r2 = sub(r3,r2) + \\ if (p0) jumpr r31 + \\ } + \\ { + \\ loop0(1f,r2) + \\ p1 = cmp.eq(r2,#0) + \\ r2 = lsl(r1,r2) + \\ } + \\ .falign + \\ 1: + \\ { + \\ p0 = cmp.gtu(r2,r0) + \\ if (!p0.new) r0 = sub(r0,r2) + \\ r2 = lsr(r2,#1) + \\ if (p1) r1 = #0 + \\ }:endloop0 + \\ { + \\ p0 = cmp.gtu(r2,r0) + \\ if (!p0.new) r0 = sub(r0,r1) + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_sqrtf() callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ r3,p0 = sfinvsqrta(r0) + \\ r5 = sffixupr(r0) + \\ r4 = ##0x3f000000 + \\ r1:0 = combine(#0,#0) + \\ } + \\ { + \\ r0 += sfmpy(r3,r5):lib + \\ r1 += sfmpy(r3,r4):lib + \\ r2 = r4 + \\ r3 = r5 + \\ } + \\ { + \\ r2 -= sfmpy(r0,r1):lib + \\ p1 = sfclass(r5,#1) + \\ + \\ } + \\ { + \\ r0 += sfmpy(r0,r2):lib + \\ r1 += sfmpy(r1,r2):lib + \\ r2 = r4 + \\ r3 = r5 + \\ } + \\ { + \\ r2 -= sfmpy(r0,r1):lib + \\ r3 -= sfmpy(r0,r0):lib + \\ } + \\ { + \\ r0 += sfmpy(r1,r3):lib + \\ r1 += sfmpy(r1,r2):lib + \\ r2 = r4 + \\ r3 = r5 + \\ } + \\ { + \\ + \\ r3 -= sfmpy(r0,r0):lib + \\ if (p1) r0 = or(r0,r5) + \\ } + \\ { + \\ r0 += sfmpy(r1,r3,p0):scale + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_moddi3() callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ p3 = tstbit(r1,#31) + \\ } + \\ { + \\ r1:0 = abs(r1:0) + \\ r3:2 = abs(r3:2) + \\ } + \\ { + \\ r6 = cl0(r1:0) + \\ r7 = cl0(r3:2) + \\ r5:4 = r3:2 + \\ r3:2 = r1:0 + \\ } + \\ { + \\ r10 = sub(r7,r6) + \\ r1:0 = #0 + \\ r15:14 = #1 + \\ } + \\ { + \\ r11 = add(r10,#1) + \\ r13:12 = lsl(r5:4,r10) + \\ r15:14 = lsl(r15:14,r10) + \\ } + \\ { + \\ p0 = cmp.gtu(r5:4,r3:2) + \\ loop0(1f,r11) + \\ } + \\ { + \\ if (p0) jump .hexagon_moddi3_return + \\ } + \\ .falign + \\ 1: + \\ { + \\ p0 = cmp.gtu(r13:12,r3:2) + \\ } + \\ { + \\ r7:6 = sub(r3:2, r13:12) + \\ r9:8 = add(r1:0, r15:14) + \\ } + \\ { + \\ r1:0 = vmux(p0, r1:0, r9:8) + \\ r3:2 = vmux(p0, r3:2, r7:6) + \\ } + \\ { + \\ r15:14 = lsr(r15:14, #1) + \\ r13:12 = lsr(r13:12, #1) + \\ }:endloop0 + \\ + \\ .hexagon_moddi3_return: + \\ { + \\ r1:0 = neg(r3:2) + \\ } + \\ { + \\ r1:0 = vmux(p3,r1:0,r3:2) + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_divdi3() callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ p2 = tstbit(r1,#31) + \\ p3 = tstbit(r3,#31) + \\ } + \\ { + \\ r1:0 = abs(r1:0) + \\ r3:2 = abs(r3:2) + \\ } + \\ { + \\ r6 = cl0(r1:0) + \\ r7 = cl0(r3:2) + \\ r5:4 = r3:2 + \\ r3:2 = r1:0 + \\ } + \\ { + \\ p3 = xor(p2,p3) + \\ r10 = sub(r7,r6) + \\ r1:0 = #0 + \\ r15:14 = #1 + \\ } + \\ { + \\ r11 = add(r10,#1) + \\ r13:12 = lsl(r5:4,r10) + \\ r15:14 = lsl(r15:14,r10) + \\ } + \\ { + \\ p0 = cmp.gtu(r5:4,r3:2) + \\ loop0(1f,r11) + \\ } + \\ { + \\ if (p0) jump .hexagon_divdi3_return + \\ } + \\ .falign + \\ 1: + \\ { + \\ p0 = cmp.gtu(r13:12,r3:2) + \\ } + \\ { + \\ r7:6 = sub(r3:2, r13:12) + \\ r9:8 = add(r1:0, r15:14) + \\ } + \\ { + \\ r1:0 = vmux(p0, r1:0, r9:8) + \\ r3:2 = vmux(p0, r3:2, r7:6) + \\ } + \\ { + \\ r15:14 = lsr(r15:14, #1) + \\ r13:12 = lsr(r13:12, #1) + \\ }:endloop0 + \\ + \\ .hexagon_divdi3_return: + \\ { + \\ r3:2 = neg(r1:0) + \\ } + \\ { + \\ r1:0 = vmux(p3,r3:2,r1:0) + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_divsf3() callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ r2,p0 = sfrecipa(r0,r1) + \\ r4 = sffixupd(r0,r1) + \\ r3 = ##0x3f800000 + \\ } + \\ { + \\ r5 = sffixupn(r0,r1) + \\ r3 -= sfmpy(r4,r2):lib + \\ r6 = ##0x80000000 + \\ r7 = r3 + \\ } + \\ { + \\ r2 += sfmpy(r3,r2):lib + \\ r3 = r7 + \\ r6 = r5 + \\ r0 = and(r6,r5) + \\ } + \\ { + \\ r3 -= sfmpy(r4,r2):lib + \\ r0 += sfmpy(r5,r2):lib + \\ } + \\ { + \\ r2 += sfmpy(r3,r2):lib + \\ r6 -= sfmpy(r0,r4):lib + \\ } + \\ { + \\ r0 += sfmpy(r6,r2):lib + \\ } + \\ { + \\ r5 -= sfmpy(r0,r4):lib + \\ } + \\ { + \\ r0 += sfmpy(r5,r2,p0):scale + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_udivdi3() callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ r6 = cl0(r1:0) + \\ r7 = cl0(r3:2) + \\ r5:4 = r3:2 + \\ r3:2 = r1:0 + \\ } + \\ { + \\ r10 = sub(r7,r6) + \\ r1:0 = #0 + \\ r15:14 = #1 + \\ } + \\ { + \\ r11 = add(r10,#1) + \\ r13:12 = lsl(r5:4,r10) + \\ r15:14 = lsl(r15:14,r10) + \\ } + \\ { + \\ p0 = cmp.gtu(r5:4,r3:2) + \\ loop0(1f,r11) + \\ } + \\ { + \\ if (p0) jumpr r31 + \\ } + \\ .falign + \\ 1: + \\ { + \\ p0 = cmp.gtu(r13:12,r3:2) + \\ } + \\ { + \\ r7:6 = sub(r3:2, r13:12) + \\ r9:8 = add(r1:0, r15:14) + \\ } + \\ { + \\ r1:0 = vmux(p0, r1:0, r9:8) + \\ r3:2 = vmux(p0, r3:2, r7:6) + \\ } + \\ { + \\ r15:14 = lsr(r15:14, #1) + \\ r13:12 = lsr(r13:12, #1) + \\ }:endloop0 + \\ { + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_umoddi3() callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ r6 = cl0(r1:0) + \\ r7 = cl0(r3:2) + \\ r5:4 = r3:2 + \\ r3:2 = r1:0 + \\ } + \\ { + \\ r10 = sub(r7,r6) + \\ r1:0 = #0 + \\ r15:14 = #1 + \\ } + \\ { + \\ r11 = add(r10,#1) + \\ r13:12 = lsl(r5:4,r10) + \\ r15:14 = lsl(r15:14,r10) + \\ } + \\ { + \\ p0 = cmp.gtu(r5:4,r3:2) + \\ loop0(1f,r11) + \\ } + \\ { + \\ if (p0) jump .hexagon_umoddi3_return + \\ } + \\ .falign + \\ 1: + \\ { + \\ p0 = cmp.gtu(r13:12,r3:2) + \\ } + \\ { + \\ r7:6 = sub(r3:2, r13:12) + \\ r9:8 = add(r1:0, r15:14) + \\ } + \\ { + \\ r1:0 = vmux(p0, r1:0, r9:8) + \\ r3:2 = vmux(p0, r3:2, r7:6) + \\ } + \\ { + \\ r15:14 = lsr(r15:14, #1) + \\ r13:12 = lsr(r13:12, #1) + \\ }:endloop0 + \\ + \\ .hexagon_umoddi3_return: + \\ { + \\ r1:0 = r3:2 + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_modsi3() callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ p2 = cmp.ge(r0,#0) + \\ r2 = abs(r0) + \\ r1 = abs(r1) + \\ } + \\ { + \\ r3 = cl0(r2) + \\ r4 = cl0(r1) + \\ p0 = cmp.gtu(r1,r2) + \\ } + \\ { + \\ r3 = sub(r4,r3) + \\ if (p0) jumpr r31 + \\ } + \\ { + \\ p1 = cmp.eq(r3,#0) + \\ loop0(1f,r3) + \\ r0 = r2 + \\ r2 = lsl(r1,r3) + \\ } + \\ .falign + \\ 1: + \\ { + \\ p0 = cmp.gtu(r2,r0) + \\ if (!p0.new) r0 = sub(r0,r2) + \\ r2 = lsr(r2,#1) + \\ if (p1) r1 = #0 + \\ }:endloop0 + \\ { + \\ p0 = cmp.gtu(r2,r0) + \\ if (!p0.new) r0 = sub(r0,r1) + \\ if (p2) jumpr r31 + \\ } + \\ { + \\ r0 = neg(r0) + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes() callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ p0 = bitsclr(r1,#7) + \\ p0 = bitsclr(r0,#7) + \\ if (p0.new) r5:4 = memd(r1) + \\ r3 = #-3 + \\ } + \\ { + \\ if (!p0) jump .Lmemcpy_call + \\ if (p0) memd(r0++#8) = r5:4 + \\ if (p0) r5:4 = memd(r1+#8) + \\ r3 += lsr(r2,#3) + \\ } + \\ { + \\ memd(r0++#8) = r5:4 + \\ r5:4 = memd(r1+#16) + \\ r1 = add(r1,#24) + \\ loop0(1f,r3) + \\ } + \\ .falign + \\ 1: + \\ { + \\ memd(r0++#8) = r5:4 + \\ r5:4 = memd(r1++#8) + \\ }:endloop0 + \\ { + \\ memd(r0) = r5:4 + \\ r0 -= add(r2,#-8) + \\ jumpr r31 + \\ } + \\ .Lmemcpy_call: + \\ jump memcpy@PLT + ); +} + +fn __hexagon_udivsi3() callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ r2 = cl0(r0) + \\ r3 = cl0(r1) + \\ r5:4 = combine(#1,#0) + \\ p0 = cmp.gtu(r1,r0) + \\ } + \\ { + \\ r6 = sub(r3,r2) + \\ r4 = r1 + \\ r1:0 = combine(r0,r4) + \\ if (p0) jumpr r31 + \\ } + \\ { + \\ r3:2 = vlslw(r5:4,r6) + \\ loop0(1f,r6) + \\ } + \\ .falign + \\ 1: + \\ { + \\ p0 = cmp.gtu(r2,r1) + \\ if (!p0.new) r1 = sub(r1,r2) + \\ if (!p0.new) r0 = add(r0,r3) + \\ r3:2 = vlsrw(r3:2,#1) + \\ }:endloop0 + \\ { + \\ p0 = cmp.gtu(r2,r1) + \\ if (!p0.new) r0 = add(r0,r3) + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_adddf3() align(32) callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ r4 = extractu(r1,#11,#20) + \\ r5 = extractu(r3,#11,#20) + \\ r13:12 = combine(##0x20000000,#0) + \\ } + \\ { + \\ p3 = dfclass(r1:0,#2) + \\ p3 = dfclass(r3:2,#2) + \\ r9:8 = r13:12 + \\ p2 = cmp.gtu(r5,r4) + \\ } + \\ { + \\ if (!p3) jump .Ladd_abnormal + \\ if (p2) r1:0 = r3:2 + \\ if (p2) r3:2 = r1:0 + \\ if (p2) r5:4 = combine(r4,r5) + \\ } + \\ { + \\ r13:12 = insert(r1:0,#52,#11 -2) + \\ r9:8 = insert(r3:2,#52,#11 -2) + \\ r15 = sub(r4,r5) + \\ r7:6 = combine(#62,#1) + \\ } + \\ + \\ + \\ + \\ + \\ + \\ .Ladd_continue: + \\ { + \\ r15 = min(r15,r7) + \\ + \\ r11:10 = neg(r13:12) + \\ p2 = cmp.gt(r1,#-1) + \\ r14 = #0 + \\ } + \\ { + \\ if (!p2) r13:12 = r11:10 + \\ r11:10 = extractu(r9:8,r15:14) + \\ r9:8 = ASR(r9:8,r15) + \\ + \\ + \\ + \\ + \\ r15:14 = #0 + \\ } + \\ { + \\ p1 = cmp.eq(r11:10,r15:14) + \\ if (!p1.new) r8 = or(r8,r6) + \\ r5 = add(r4,#-1024 -60) + \\ p3 = cmp.gt(r3,#-1) + \\ } + \\ { + \\ r13:12 = add(r13:12,r9:8) + \\ r11:10 = sub(r13:12,r9:8) + \\ r7:6 = combine(#54,##2045) + \\ } + \\ { + \\ p0 = cmp.gtu(r4,r7) + \\ p0 = !cmp.gtu(r4,r6) + \\ if (!p0.new) jump:nt .Ladd_ovf_unf + \\ if (!p3) r13:12 = r11:10 + \\ } + \\ { + \\ r1:0 = convert_d2df(r13:12) + \\ p0 = cmp.eq(r13,#0) + \\ p0 = cmp.eq(r12,#0) + \\ if (p0.new) jump:nt .Ladd_zero + \\ } + \\ { + \\ r1 += asl(r5,#20) + \\ jumpr r31 + \\ } + \\ + \\ .falign + \\ .Ladd_zero: + \\ + \\ + \\ { + \\ r28 = USR + \\ r1:0 = #0 + \\ r3 = #1 + \\ } + \\ { + \\ r28 = extractu(r28,#2,#22) + \\ r3 = asl(r3,#31) + \\ } + \\ { + \\ p0 = cmp.eq(r28,#2) + \\ if (p0.new) r1 = xor(r1,r3) + \\ jumpr r31 + \\ } + \\ .falign + \\ .Ladd_ovf_unf: + \\ { + \\ r1:0 = convert_d2df(r13:12) + \\ p0 = cmp.eq(r13,#0) + \\ p0 = cmp.eq(r12,#0) + \\ if (p0.new) jump:nt .Ladd_zero + \\ } + \\ { + \\ r28 = extractu(r1,#11,#20) + \\ r1 += asl(r5,#20) + \\ } + \\ { + \\ r5 = add(r5,r28) + \\ r3:2 = combine(##0x00100000,#0) + \\ } + \\ { + \\ p0 = cmp.gt(r5,##1024 +1024 -2) + \\ if (p0.new) jump:nt .Ladd_ovf + \\ } + \\ { + \\ p0 = cmp.gt(r5,#0) + \\ if (p0.new) jumpr:t r31 + \\ r28 = sub(#1,r5) + \\ } + \\ { + \\ r3:2 = insert(r1:0,#52,#0) + \\ r1:0 = r13:12 + \\ } + \\ { + \\ r3:2 = lsr(r3:2,r28) + \\ } + \\ { + \\ r1:0 = insert(r3:2,#63,#0) + \\ jumpr r31 + \\ } + \\ .falign + \\ .Ladd_ovf: + \\ + \\ { + \\ r1:0 = r13:12 + \\ r28 = USR + \\ r13:12 = combine(##0x7fefffff,#-1) + \\ } + \\ { + \\ r5 = extractu(r28,#2,#22) + \\ r28 = or(r28,#0x28) + \\ r9:8 = combine(##0x7ff00000,#0) + \\ } + \\ { + \\ USR = r28 + \\ r5 ^= lsr(r1,#31) + \\ r28 = r5 + \\ } + \\ { + \\ p0 = !cmp.eq(r28,#1) + \\ p0 = !cmp.eq(r5,#2) + \\ if (p0.new) r13:12 = r9:8 + \\ } + \\ { + \\ r1:0 = insert(r13:12,#63,#0) + \\ } + \\ { + \\ p0 = dfcmp.eq(r1:0,r1:0) + \\ jumpr r31 + \\ } + \\ + \\ .Ladd_abnormal: + \\ { + \\ r13:12 = extractu(r1:0,#63,#0) + \\ r9:8 = extractu(r3:2,#63,#0) + \\ } + \\ { + \\ p3 = cmp.gtu(r13:12,r9:8) + \\ if (!p3.new) r1:0 = r3:2 + \\ if (!p3.new) r3:2 = r1:0 + \\ } + \\ { + \\ + \\ p0 = dfclass(r1:0,#0x0f) + \\ if (!p0.new) jump:nt .Linvalid_nan_add + \\ if (!p3) r13:12 = r9:8 + \\ if (!p3) r9:8 = r13:12 + \\ } + \\ { + \\ + \\ + \\ p1 = dfclass(r1:0,#0x08) + \\ if (p1.new) jump:nt .Linf_add + \\ } + \\ { + \\ p2 = dfclass(r3:2,#0x01) + \\ if (p2.new) jump:nt .LB_zero + \\ r13:12 = #0 + \\ } + \\ + \\ { + \\ p0 = dfclass(r1:0,#4) + \\ if (p0.new) jump:nt .Ladd_two_subnormal + \\ r13:12 = combine(##0x20000000,#0) + \\ } + \\ { + \\ r4 = extractu(r1,#11,#20) + \\ r5 = #1 + \\ + \\ r9:8 = asl(r9:8,#11 -2) + \\ } + \\ + \\ + \\ + \\ { + \\ r13:12 = insert(r1:0,#52,#11 -2) + \\ r15 = sub(r4,r5) + \\ r7:6 = combine(#62,#1) + \\ jump .Ladd_continue + \\ } + \\ + \\ .Ladd_two_subnormal: + \\ { + \\ r13:12 = extractu(r1:0,#63,#0) + \\ r9:8 = extractu(r3:2,#63,#0) + \\ } + \\ { + \\ r13:12 = neg(r13:12) + \\ r9:8 = neg(r9:8) + \\ p0 = cmp.gt(r1,#-1) + \\ p1 = cmp.gt(r3,#-1) + \\ } + \\ { + \\ if (p0) r13:12 = r1:0 + \\ if (p1) r9:8 = r3:2 + \\ } + \\ { + \\ r13:12 = add(r13:12,r9:8) + \\ } + \\ { + \\ r9:8 = neg(r13:12) + \\ p0 = cmp.gt(r13,#-1) + \\ r3:2 = #0 + \\ } + \\ { + \\ if (!p0) r1:0 = r9:8 + \\ if (p0) r1:0 = r13:12 + \\ r3 = ##0x80000000 + \\ } + \\ { + \\ if (!p0) r1 = or(r1,r3) + \\ p0 = dfcmp.eq(r1:0,r3:2) + \\ if (p0.new) jump:nt .Lzero_plus_zero + \\ } + \\ { + \\ jumpr r31 + \\ } + \\ + \\ .Linvalid_nan_add: + \\ { + \\ r28 = convert_df2sf(r1:0) + \\ p0 = dfclass(r3:2,#0x0f) + \\ if (p0.new) r3:2 = r1:0 + \\ } + \\ { + \\ r2 = convert_df2sf(r3:2) + \\ r1:0 = #-1 + \\ jumpr r31 + \\ } + \\ .falign + \\ .LB_zero: + \\ { + \\ p0 = dfcmp.eq(r13:12,r1:0) + \\ if (!p0.new) jumpr:t r31 + \\ } + \\ + \\ + \\ + \\ + \\ .Lzero_plus_zero: + \\ { + \\ p0 = cmp.eq(r1:0,r3:2) + \\ if (p0.new) jumpr:t r31 + \\ } + \\ { + \\ r28 = USR + \\ } + \\ { + \\ r28 = extractu(r28,#2,#22) + \\ r1:0 = #0 + \\ } + \\ { + \\ p0 = cmp.eq(r28,#2) + \\ if (p0.new) r1 = ##0x80000000 + \\ jumpr r31 + \\ } + \\ .Linf_add: + \\ + \\ { + \\ p0 = !cmp.eq(r1,r3) + \\ p0 = dfclass(r3:2,#8) + \\ if (!p0.new) jumpr:t r31 + \\ } + \\ { + \\ r2 = ##0x7f800001 + \\ } + \\ { + \\ r1:0 = convert_sf2df(r2) + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_subdf3() align(32) callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ r3 = togglebit(r3,#31) + \\ jump ##__hexagon_adddf3 + \\ } + ); +} + +fn __hexagon_divdf3() align(32) callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ p2 = dfclass(r1:0,#0x02) + \\ p2 = dfclass(r3:2,#0x02) + \\ r13:12 = combine(r3,r1) + \\ r28 = xor(r1,r3) + \\ } + \\ { + \\ if (!p2) jump .Ldiv_abnormal + \\ r7:6 = extractu(r3:2,#23,#52 -23) + \\ r8 = ##0x3f800001 + \\ } + \\ { + \\ r9 = or(r8,r6) + \\ r13 = extractu(r13,#11,#52 -32) + \\ r12 = extractu(r12,#11,#52 -32) + \\ p3 = cmp.gt(r28,#-1) + \\ } + \\ + \\ + \\ .Ldenorm_continue: + \\ { + \\ r11,p0 = sfrecipa(r8,r9) + \\ r10 = and(r8,#-2) + \\ r28 = #1 + \\ r12 = sub(r12,r13) + \\ } + \\ + \\ + \\ { + \\ r10 -= sfmpy(r11,r9):lib + \\ r1 = insert(r28,#11 +1,#52 -32) + \\ r13 = ##0x00800000 << 3 + \\ } + \\ { + \\ r11 += sfmpy(r11,r10):lib + \\ r3 = insert(r28,#11 +1,#52 -32) + \\ r10 = and(r8,#-2) + \\ } + \\ { + \\ r10 -= sfmpy(r11,r9):lib + \\ r5 = #-0x3ff +1 + \\ r4 = #0x3ff -1 + \\ } + \\ { + \\ r11 += sfmpy(r11,r10):lib + \\ p1 = cmp.gt(r12,r5) + \\ p1 = !cmp.gt(r12,r4) + \\ } + \\ { + \\ r13 = insert(r11,#23,#3) + \\ r5:4 = #0 + \\ r12 = add(r12,#-61) + \\ } + \\ + \\ + \\ + \\ + \\ { + \\ r13 = add(r13,#((-3) << 3)) + \\ } + \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASL(r7:6, # ( 14 )); r1:0 -= asl(r15:14, # 32); } + \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 1 )); r1:0 -= asl(r15:14, # 32); } + \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 16 )); r1:0 -= asl(r15:14, # 32); } + \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 31 )); r1:0 -= asl(r15:14, # 32); r7:6=# ( 0 ); } + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ { + \\ + \\ r15:14 = sub(r1:0,r3:2) + \\ p0 = cmp.gtu(r3:2,r1:0) + \\ + \\ if (!p0.new) r6 = #2 + \\ } + \\ { + \\ r5:4 = add(r5:4,r7:6) + \\ if (!p0) r1:0 = r15:14 + \\ r15:14 = #0 + \\ } + \\ { + \\ p0 = cmp.eq(r1:0,r15:14) + \\ if (!p0.new) r4 = or(r4,r28) + \\ } + \\ { + \\ r7:6 = neg(r5:4) + \\ } + \\ { + \\ if (!p3) r5:4 = r7:6 + \\ } + \\ { + \\ r1:0 = convert_d2df(r5:4) + \\ if (!p1) jump .Ldiv_ovf_unf + \\ } + \\ { + \\ r1 += asl(r12,#52 -32) + \\ jumpr r31 + \\ } + \\ + \\ .Ldiv_ovf_unf: + \\ { + \\ r1 += asl(r12,#52 -32) + \\ r13 = extractu(r1,#11,#52 -32) + \\ } + \\ { + \\ r7:6 = abs(r5:4) + \\ r12 = add(r12,r13) + \\ } + \\ { + \\ p0 = cmp.gt(r12,##0x3ff +0x3ff) + \\ if (p0.new) jump:nt .Ldiv_ovf + \\ } + \\ { + \\ p0 = cmp.gt(r12,#0) + \\ if (p0.new) jump:nt .Ldiv_possible_unf + \\ } + \\ { + \\ r13 = add(clb(r7:6),#-1) + \\ r12 = sub(#7,r12) + \\ r10 = USR + \\ r11 = #63 + \\ } + \\ { + \\ r13 = min(r12,r11) + \\ r11 = or(r10,#0x030) + \\ r7:6 = asl(r7:6,r13) + \\ r12 = #0 + \\ } + \\ { + \\ r15:14 = extractu(r7:6,r13:12) + \\ r7:6 = lsr(r7:6,r13) + \\ r3:2 = #1 + \\ } + \\ { + \\ p0 = cmp.gtu(r3:2,r15:14) + \\ if (!p0.new) r6 = or(r2,r6) + \\ r7 = setbit(r7,#52 -32+4) + \\ } + \\ { + \\ r5:4 = neg(r7:6) + \\ p0 = bitsclr(r6,#(1<<4)-1) + \\ if (!p0.new) r10 = r11 + \\ } + \\ { + \\ USR = r10 + \\ if (p3) r5:4 = r7:6 + \\ r10 = #-0x3ff -(52 +4) + \\ } + \\ { + \\ r1:0 = convert_d2df(r5:4) + \\ } + \\ { + \\ r1 += asl(r10,#52 -32) + \\ jumpr r31 + \\ } + \\ + \\ + \\ .Ldiv_possible_unf: + \\ + \\ + \\ { + \\ r3:2 = extractu(r1:0,#63,#0) + \\ r15:14 = combine(##0x00100000,#0) + \\ r10 = #0x7FFF + \\ } + \\ { + \\ p0 = dfcmp.eq(r15:14,r3:2) + \\ p0 = bitsset(r7,r10) + \\ } + \\ + \\ + \\ + \\ + \\ + \\ + \\ { + \\ if (!p0) jumpr r31 + \\ r10 = USR + \\ } + \\ + \\ { + \\ r10 = or(r10,#0x30) + \\ } + \\ { + \\ USR = r10 + \\ } + \\ { + \\ p0 = dfcmp.eq(r1:0,r1:0) + \\ jumpr r31 + \\ } + \\ + \\ .Ldiv_ovf: + \\ + \\ + \\ + \\ { + \\ r10 = USR + \\ r3:2 = combine(##0x7fefffff,#-1) + \\ r1 = mux(p3,#0,#-1) + \\ } + \\ { + \\ r7:6 = combine(##0x7ff00000,#0) + \\ r5 = extractu(r10,#2,#22) + \\ r10 = or(r10,#0x28) + \\ } + \\ { + \\ USR = r10 + \\ r5 ^= lsr(r1,#31) + \\ r4 = r5 + \\ } + \\ { + \\ p0 = !cmp.eq(r4,#1) + \\ p0 = !cmp.eq(r5,#2) + \\ if (p0.new) r3:2 = r7:6 + \\ p0 = dfcmp.eq(r3:2,r3:2) + \\ } + \\ { + \\ r1:0 = insert(r3:2,#63,#0) + \\ jumpr r31 + \\ } + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ .Ldiv_abnormal: + \\ { + \\ p0 = dfclass(r1:0,#0x0F) + \\ p0 = dfclass(r3:2,#0x0F) + \\ p3 = cmp.gt(r28,#-1) + \\ } + \\ { + \\ p1 = dfclass(r1:0,#0x08) + \\ p1 = dfclass(r3:2,#0x08) + \\ } + \\ { + \\ p2 = dfclass(r1:0,#0x01) + \\ p2 = dfclass(r3:2,#0x01) + \\ } + \\ { + \\ if (!p0) jump .Ldiv_nan + \\ if (p1) jump .Ldiv_invalid + \\ } + \\ { + \\ if (p2) jump .Ldiv_invalid + \\ } + \\ { + \\ p2 = dfclass(r1:0,#(0x0F ^ 0x01)) + \\ p2 = dfclass(r3:2,#(0x0F ^ 0x08)) + \\ } + \\ { + \\ p1 = dfclass(r1:0,#(0x0F ^ 0x08)) + \\ p1 = dfclass(r3:2,#(0x0F ^ 0x01)) + \\ } + \\ { + \\ if (!p2) jump .Ldiv_zero_result + \\ if (!p1) jump .Ldiv_inf_result + \\ } + \\ + \\ + \\ + \\ + \\ + \\ { + \\ p0 = dfclass(r1:0,#0x02) + \\ p1 = dfclass(r3:2,#0x02) + \\ r10 = ##0x00100000 + \\ } + \\ { + \\ r13:12 = combine(r3,r1) + \\ r1 = insert(r10,#11 +1,#52 -32) + \\ r3 = insert(r10,#11 +1,#52 -32) + \\ } + \\ { + \\ if (p0) r1 = or(r1,r10) + \\ if (p1) r3 = or(r3,r10) + \\ } + \\ { + \\ r5 = add(clb(r1:0),#-11) + \\ r4 = add(clb(r3:2),#-11) + \\ r10 = #1 + \\ } + \\ { + \\ r12 = extractu(r12,#11,#52 -32) + \\ r13 = extractu(r13,#11,#52 -32) + \\ } + \\ { + \\ r1:0 = asl(r1:0,r5) + \\ r3:2 = asl(r3:2,r4) + \\ if (!p0) r12 = sub(r10,r5) + \\ if (!p1) r13 = sub(r10,r4) + \\ } + \\ { + \\ r7:6 = extractu(r3:2,#23,#52 -23) + \\ } + \\ { + \\ r9 = or(r8,r6) + \\ jump .Ldenorm_continue + \\ } + \\ + \\ .Ldiv_zero_result: + \\ { + \\ r1 = xor(r1,r3) + \\ r3:2 = #0 + \\ } + \\ { + \\ r1:0 = insert(r3:2,#63,#0) + \\ jumpr r31 + \\ } + \\ .Ldiv_inf_result: + \\ { + \\ p2 = dfclass(r3:2,#0x01) + \\ p2 = dfclass(r1:0,#(0x0F ^ 0x08)) + \\ } + \\ { + \\ r10 = USR + \\ if (!p2) jump 1f + \\ r1 = xor(r1,r3) + \\ } + \\ { + \\ r10 = or(r10,#0x04) + \\ } + \\ { + \\ USR = r10 + \\ } + \\ 1: + \\ { + \\ r3:2 = combine(##0x7ff00000,#0) + \\ p0 = dfcmp.uo(r3:2,r3:2) + \\ } + \\ { + \\ r1:0 = insert(r3:2,#63,#0) + \\ jumpr r31 + \\ } + \\ .Ldiv_nan: + \\ { + \\ p0 = dfclass(r1:0,#0x10) + \\ p1 = dfclass(r3:2,#0x10) + \\ if (!p0.new) r1:0 = r3:2 + \\ if (!p1.new) r3:2 = r1:0 + \\ } + \\ { + \\ r5 = convert_df2sf(r1:0) + \\ r4 = convert_df2sf(r3:2) + \\ } + \\ { + \\ r1:0 = #-1 + \\ jumpr r31 + \\ } + \\ + \\ .Ldiv_invalid: + \\ { + \\ r10 = ##0x7f800001 + \\ } + \\ { + \\ r1:0 = convert_sf2df(r10) + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_muldf3() align(32) callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ p0 = dfclass(r1:0,#2) + \\ p0 = dfclass(r3:2,#2) + \\ r13:12 = combine(##0x40000000,#0) + \\ } + \\ { + \\ r13:12 = insert(r1:0,#52,#11 -1) + \\ r5:4 = asl(r3:2,#11 -1) + \\ r28 = #-1024 + \\ r9:8 = #1 + \\ } + \\ { + \\ r7:6 = mpyu(r4,r13) + \\ r5:4 = insert(r9:8,#2,#62) + \\ } + \\ + \\ + \\ + \\ + \\ { + \\ r15:14 = mpyu(r12,r4) + \\ r7:6 += mpyu(r12,r5) + \\ } + \\ { + \\ r7:6 += lsr(r15:14,#32) + \\ r11:10 = mpyu(r13,r5) + \\ r5:4 = combine(##1024 +1024 -4,#0) + \\ } + \\ { + \\ r11:10 += lsr(r7:6,#32) + \\ if (!p0) jump .Lmul_abnormal + \\ p1 = cmp.eq(r14,#0) + \\ p1 = cmp.eq(r6,#0) + \\ } + \\ { + \\ if (!p1) r10 = or(r10,r8) + \\ r6 = extractu(r1,#11,#20) + \\ r7 = extractu(r3,#11,#20) + \\ } + \\ { + \\ r15:14 = neg(r11:10) + \\ r6 += add(r28,r7) + \\ r28 = xor(r1,r3) + \\ } + \\ { + \\ if (!p2.new) r11:10 = r15:14 + \\ p2 = cmp.gt(r28,#-1) + \\ p0 = !cmp.gt(r6,r5) + \\ p0 = cmp.gt(r6,r4) + \\ if (!p0.new) jump:nt .Lmul_ovf_unf + \\ } + \\ { + \\ r1:0 = convert_d2df(r11:10) + \\ r6 = add(r6,#-1024 -58) + \\ } + \\ { + \\ r1 += asl(r6,#20) + \\ jumpr r31 + \\ } + \\ + \\ .falign + \\ .Lmul_possible_unf: + \\ { + \\ p0 = cmp.eq(r0,#0) + \\ p0 = bitsclr(r1,r4) + \\ if (!p0.new) jumpr:t r31 + \\ r5 = #0x7fff + \\ } + \\ { + \\ p0 = bitsset(r13,r5) + \\ r4 = USR + \\ r5 = #0x030 + \\ } + \\ { + \\ if (p0) r4 = or(r4,r5) + \\ } + \\ { + \\ USR = r4 + \\ } + \\ { + \\ p0 = dfcmp.eq(r1:0,r1:0) + \\ jumpr r31 + \\ } + \\ .falign + \\ .Lmul_ovf_unf: + \\ { + \\ r1:0 = convert_d2df(r11:10) + \\ r13:12 = abs(r11:10) + \\ r7 = add(r6,#-1024 -58) + \\ } + \\ { + \\ r1 += asl(r7,#20) + \\ r7 = extractu(r1,#11,#20) + \\ r4 = ##0x7FEFFFFF + \\ } + \\ { + \\ r7 += add(r6,##-1024 -58) + \\ + \\ r5 = #0 + \\ } + \\ { + \\ p0 = cmp.gt(r7,##1024 +1024 -2) + \\ if (p0.new) jump:nt .Lmul_ovf + \\ } + \\ { + \\ p0 = cmp.gt(r7,#0) + \\ if (p0.new) jump:nt .Lmul_possible_unf + \\ r5 = sub(r6,r5) + \\ r28 = #63 + \\ } + \\ { + \\ r4 = #0 + \\ r5 = sub(#5,r5) + \\ } + \\ { + \\ p3 = cmp.gt(r11,#-1) + \\ r5 = min(r5,r28) + \\ r11:10 = r13:12 + \\ } + \\ { + \\ r28 = USR + \\ r15:14 = extractu(r11:10,r5:4) + \\ } + \\ { + \\ r11:10 = asr(r11:10,r5) + \\ r4 = #0x0030 + \\ r1 = insert(r9,#11,#20) + \\ } + \\ { + \\ p0 = cmp.gtu(r9:8,r15:14) + \\ if (!p0.new) r10 = or(r10,r8) + \\ r11 = setbit(r11,#20 +3) + \\ } + \\ { + \\ r15:14 = neg(r11:10) + \\ p1 = bitsclr(r10,#0x7) + \\ if (!p1.new) r28 = or(r4,r28) + \\ } + \\ { + \\ if (!p3) r11:10 = r15:14 + \\ USR = r28 + \\ } + \\ { + \\ r1:0 = convert_d2df(r11:10) + \\ p0 = dfcmp.eq(r1:0,r1:0) + \\ } + \\ { + \\ r1 = insert(r9,#11 -1,#20 +1) + \\ jumpr r31 + \\ } + \\ .falign + \\ .Lmul_ovf: + \\ + \\ { + \\ r28 = USR + \\ r13:12 = combine(##0x7fefffff,#-1) + \\ r1:0 = r11:10 + \\ } + \\ { + \\ r14 = extractu(r28,#2,#22) + \\ r28 = or(r28,#0x28) + \\ r5:4 = combine(##0x7ff00000,#0) + \\ } + \\ { + \\ USR = r28 + \\ r14 ^= lsr(r1,#31) + \\ r28 = r14 + \\ } + \\ { + \\ p0 = !cmp.eq(r28,#1) + \\ p0 = !cmp.eq(r14,#2) + \\ if (p0.new) r13:12 = r5:4 + \\ p0 = dfcmp.eq(r1:0,r1:0) + \\ } + \\ { + \\ r1:0 = insert(r13:12,#63,#0) + \\ jumpr r31 + \\ } + \\ + \\ .Lmul_abnormal: + \\ { + \\ r13:12 = extractu(r1:0,#63,#0) + \\ r5:4 = extractu(r3:2,#63,#0) + \\ } + \\ { + \\ p3 = cmp.gtu(r13:12,r5:4) + \\ if (!p3.new) r1:0 = r3:2 + \\ if (!p3.new) r3:2 = r1:0 + \\ } + \\ { + \\ + \\ p0 = dfclass(r1:0,#0x0f) + \\ if (!p0.new) jump:nt .Linvalid_nan + \\ if (!p3) r13:12 = r5:4 + \\ if (!p3) r5:4 = r13:12 + \\ } + \\ { + \\ + \\ p1 = dfclass(r1:0,#0x08) + \\ p1 = dfclass(r3:2,#0x0e) + \\ } + \\ { + \\ + \\ + \\ p0 = dfclass(r1:0,#0x08) + \\ p0 = dfclass(r3:2,#0x01) + \\ } + \\ { + \\ if (p1) jump .Ltrue_inf + \\ p2 = dfclass(r3:2,#0x01) + \\ } + \\ { + \\ if (p0) jump .Linvalid_zeroinf + \\ if (p2) jump .Ltrue_zero + \\ r28 = ##0x7c000000 + \\ } + \\ + \\ + \\ + \\ + \\ + \\ { + \\ p0 = bitsclr(r1,r28) + \\ if (p0.new) jump:nt .Lmul_tiny + \\ } + \\ { + \\ r28 = cl0(r5:4) + \\ } + \\ { + \\ r28 = add(r28,#-11) + \\ } + \\ { + \\ r5:4 = asl(r5:4,r28) + \\ } + \\ { + \\ r3:2 = insert(r5:4,#63,#0) + \\ r1 -= asl(r28,#20) + \\ } + \\ jump __hexagon_muldf3 + \\ .Lmul_tiny: + \\ { + \\ r28 = USR + \\ r1:0 = xor(r1:0,r3:2) + \\ } + \\ { + \\ r28 = or(r28,#0x30) + \\ r1:0 = insert(r9:8,#63,#0) + \\ r5 = extractu(r28,#2,#22) + \\ } + \\ { + \\ USR = r28 + \\ p0 = cmp.gt(r5,#1) + \\ if (!p0.new) r0 = #0 + \\ r5 ^= lsr(r1,#31) + \\ } + \\ { + \\ p0 = cmp.eq(r5,#3) + \\ if (!p0.new) r0 = #0 + \\ jumpr r31 + \\ } + \\ .Linvalid_zeroinf: + \\ { + \\ r28 = USR + \\ } + \\ { + \\ r1:0 = #-1 + \\ r28 = or(r28,#2) + \\ } + \\ { + \\ USR = r28 + \\ } + \\ { + \\ p0 = dfcmp.uo(r1:0,r1:0) + \\ jumpr r31 + \\ } + \\ .Linvalid_nan: + \\ { + \\ p0 = dfclass(r3:2,#0x0f) + \\ r28 = convert_df2sf(r1:0) + \\ if (p0.new) r3:2 = r1:0 + \\ } + \\ { + \\ r2 = convert_df2sf(r3:2) + \\ r1:0 = #-1 + \\ jumpr r31 + \\ } + \\ .falign + \\ .Ltrue_zero: + \\ { + \\ r1:0 = r3:2 + \\ r3:2 = r1:0 + \\ } + \\ .Ltrue_inf: + \\ { + \\ r3 = extract(r3,#1,#31) + \\ } + \\ { + \\ r1 ^= asl(r3,#31) + \\ jumpr r31 + \\ } + ); +} + +fn __hexagon_sqrtdf2() align(32) callconv(.naked) noreturn { + asm volatile ( + \\ { + \\ r15:14 = extractu(r1:0,#23 +1,#52 -23) + \\ r28 = extractu(r1,#11,#52 -32) + \\ r5:4 = combine(##0x3f000004,#1) + \\ } + \\ { + \\ p2 = dfclass(r1:0,#0x02) + \\ p2 = cmp.gt(r1,#-1) + \\ if (!p2.new) jump:nt .Lsqrt_abnormal + \\ r9 = or(r5,r14) + \\ } + \\ + \\ .Ldenormal_restart: + \\ { + \\ r11:10 = r1:0 + \\ r7,p0 = sfinvsqrta(r9) + \\ r5 = and(r5,#-16) + \\ r3:2 = #0 + \\ } + \\ { + \\ r3 += sfmpy(r7,r9):lib + \\ r2 += sfmpy(r7,r5):lib + \\ r6 = r5 + \\ + \\ + \\ r9 = and(r28,#1) + \\ } + \\ { + \\ r6 -= sfmpy(r3,r2):lib + \\ r11 = insert(r4,#11 +1,#52 -32) + \\ p1 = cmp.gtu(r9,#0) + \\ } + \\ { + \\ r3 += sfmpy(r3,r6):lib + \\ r2 += sfmpy(r2,r6):lib + \\ r6 = r5 + \\ r9 = mux(p1,#8,#9) + \\ } + \\ { + \\ r6 -= sfmpy(r3,r2):lib + \\ r11:10 = asl(r11:10,r9) + \\ r9 = mux(p1,#3,#2) + \\ } + \\ { + \\ r2 += sfmpy(r2,r6):lib + \\ + \\ r15:14 = asl(r11:10,r9) + \\ } + \\ { + \\ r2 = and(r2,##0x007fffff) + \\ } + \\ { + \\ r2 = add(r2,##0x00800000 - 3) + \\ r9 = mux(p1,#7,#8) + \\ } + \\ { + \\ r8 = asl(r2,r9) + \\ r9 = mux(p1,#15-(1+1),#15-(1+0)) + \\ } + \\ { + \\ r13:12 = mpyu(r8,r15) + \\ } + \\ { + \\ r1:0 = asl(r11:10,#15) + \\ r15:14 = mpyu(r13,r13) + \\ p1 = cmp.eq(r0,r0) + \\ } + \\ { + \\ r1:0 -= asl(r15:14,#15) + \\ r15:14 = mpyu(r13,r12) + \\ p2 = cmp.eq(r0,r0) + \\ } + \\ { + \\ r1:0 -= lsr(r15:14,#16) + \\ p3 = cmp.eq(r0,r0) + \\ } + \\ { + \\ r1:0 = mpyu(r1,r8) + \\ } + \\ { + \\ r13:12 += lsr(r1:0,r9) + \\ r9 = add(r9,#16) + \\ r1:0 = asl(r11:10,#31) + \\ } + \\ + \\ { + \\ r15:14 = mpyu(r13,r13) + \\ r1:0 -= mpyu(r13,r12) + \\ } + \\ { + \\ r1:0 -= asl(r15:14,#31) + \\ r15:14 = mpyu(r12,r12) + \\ } + \\ { + \\ r1:0 -= lsr(r15:14,#33) + \\ } + \\ { + \\ r1:0 = mpyu(r1,r8) + \\ } + \\ { + \\ r13:12 += lsr(r1:0,r9) + \\ r9 = add(r9,#16) + \\ r1:0 = asl(r11:10,#47) + \\ } + \\ + \\ { + \\ r15:14 = mpyu(r13,r13) + \\ } + \\ { + \\ r1:0 -= asl(r15:14,#47) + \\ r15:14 = mpyu(r13,r12) + \\ } + \\ { + \\ r1:0 -= asl(r15:14,#16) + \\ r15:14 = mpyu(r12,r12) + \\ } + \\ { + \\ r1:0 -= lsr(r15:14,#17) + \\ } + \\ { + \\ r1:0 = mpyu(r1,r8) + \\ } + \\ { + \\ r13:12 += lsr(r1:0,r9) + \\ } + \\ { + \\ r3:2 = mpyu(r13,r12) + \\ r5:4 = mpyu(r12,r12) + \\ r15:14 = #0 + \\ r1:0 = #0 + \\ } + \\ { + \\ r3:2 += lsr(r5:4,#33) + \\ r5:4 += asl(r3:2,#33) + \\ p1 = cmp.eq(r0,r0) + \\ } + \\ { + \\ r7:6 = mpyu(r13,r13) + \\ r1:0 = sub(r1:0,r5:4,p1):carry + \\ r9:8 = #1 + \\ } + \\ { + \\ r7:6 += lsr(r3:2,#31) + \\ r9:8 += asl(r13:12,#1) + \\ } + \\ + \\ + \\ + \\ + \\ + \\ { + \\ r15:14 = sub(r11:10,r7:6,p1):carry + \\ r5:4 = sub(r1:0,r9:8,p2):carry + \\ + \\ + \\ + \\ + \\ r7:6 = #1 + \\ r11:10 = #0 + \\ } + \\ { + \\ r3:2 = sub(r15:14,r11:10,p2):carry + \\ r7:6 = add(r13:12,r7:6) + \\ r28 = add(r28,#-0x3ff) + \\ } + \\ { + \\ + \\ if (p2) r13:12 = r7:6 + \\ if (p2) r1:0 = r5:4 + \\ if (p2) r15:14 = r3:2 + \\ } + \\ { + \\ r5:4 = sub(r1:0,r9:8,p3):carry + \\ r7:6 = #1 + \\ r28 = asr(r28,#1) + \\ } + \\ { + \\ r3:2 = sub(r15:14,r11:10,p3):carry + \\ r7:6 = add(r13:12,r7:6) + \\ } + \\ { + \\ if (p3) r13:12 = r7:6 + \\ if (p3) r1:0 = r5:4 + \\ + \\ + \\ + \\ + \\ + \\ r2 = #1 + \\ } + \\ { + \\ p0 = cmp.eq(r1:0,r11:10) + \\ if (!p0.new) r12 = or(r12,r2) + \\ r3 = cl0(r13:12) + \\ r28 = add(r28,#-63) + \\ } + \\ + \\ + \\ + \\ { + \\ r1:0 = convert_ud2df(r13:12) + \\ r28 = add(r28,r3) + \\ } + \\ { + \\ r1 += asl(r28,#52 -32) + \\ jumpr r31 + \\ } + \\ .Lsqrt_abnormal: + \\ { + \\ p0 = dfclass(r1:0,#0x01) + \\ if (p0.new) jumpr:t r31 + \\ } + \\ { + \\ p0 = dfclass(r1:0,#0x10) + \\ if (p0.new) jump:nt .Lsqrt_nan + \\ } + \\ { + \\ p0 = cmp.gt(r1,#-1) + \\ if (!p0.new) jump:nt .Lsqrt_invalid_neg + \\ if (!p0.new) r28 = ##0x7F800001 + \\ } + \\ { + \\ p0 = dfclass(r1:0,#0x08) + \\ if (p0.new) jumpr:nt r31 + \\ } + \\ + \\ + \\ { + \\ r1:0 = extractu(r1:0,#52,#0) + \\ } + \\ { + \\ r28 = add(clb(r1:0),#-11) + \\ } + \\ { + \\ r1:0 = asl(r1:0,r28) + \\ r28 = sub(#1,r28) + \\ } + \\ { + \\ r1 = insert(r28,#1,#52 -32) + \\ } + \\ { + \\ r3:2 = extractu(r1:0,#23 +1,#52 -23) + \\ r5 = ##0x3f000004 + \\ } + \\ { + \\ r9 = or(r5,r2) + \\ r5 = and(r5,#-16) + \\ jump .Ldenormal_restart + \\ } + \\ .Lsqrt_nan: + \\ { + \\ r28 = convert_df2sf(r1:0) + \\ r1:0 = #-1 + \\ jumpr r31 + \\ } + \\ .Lsqrt_invalid_neg: + \\ { + \\ r1:0 = convert_sf2df(r28) + \\ jumpr r31 + \\ } + ); +} + +comptime { + if (builtin.cpu.arch == .hexagon) { + @export(&__hexagon_adddf3, .{ .name = "__hexagon_adddf3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_adddf3, .{ .name = "__hexagon_fast_adddf3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_subdf3, .{ .name = "__hexagon_subdf3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_subdf3, .{ .name = "__hexagon_fast_subdf3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_divdf3, .{ .name = "__hexagon_divdf3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_divdf3, .{ .name = "__hexagon_fast_divdf3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_muldf3, .{ .name = "__hexagon_muldf3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_muldf3, .{ .name = "__hexagon_fast_muldf3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_sqrtdf2", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_fast2_sqrtdf2", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_sqrt", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_divsf3, .{ .name = "__hexagon_divsf3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_divsf3, .{ .name = "__hexagon_fast_divsf3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_divsi3, .{ .name = "__hexagon_divsi3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_umodsi3, .{ .name = "__hexagon_umodsi3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_sqrtf, .{ .name = "__hexagon_sqrtf", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_sqrtf, .{ .name = "__hexagon_fast2_sqrtf", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_moddi3, .{ .name = "__hexagon_moddi3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_divdi3, .{ .name = "__hexagon_divdi3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_udivdi3, .{ .name = "__hexagon_udivdi3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_umoddi3, .{ .name = "__hexagon_umoddi3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_modsi3, .{ .name = "__hexagon_modsi3", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes, .{ .name = "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__hexagon_udivsi3, .{ .name = "__hexagon_udivsi3", .linkage = common.linkage, .visibility = common.visibility }); + } +} diff --git a/lib/libc/musl/arch/i386/crt_arch.h b/lib/libc/musl/arch/i386/crt_arch.h index 43c8477a81..1a80fce353 100644 --- a/lib/libc/musl/arch/i386/crt_arch.h +++ b/lib/libc/musl/arch/i386/crt_arch.h @@ -3,6 +3,7 @@ __asm__( ".weak _DYNAMIC \n" ".hidden _DYNAMIC \n" ".global " START "\n" +".type " START ",%function \n" START ":\n" " xor %ebp,%ebp \n" " mov %esp,%eax \n" diff --git a/lib/libc/musl/arch/x86_64/crt_arch.h b/lib/libc/musl/arch/x86_64/crt_arch.h index 3eec61bdcd..b1c9c4761d 100644 --- a/lib/libc/musl/arch/x86_64/crt_arch.h +++ b/lib/libc/musl/arch/x86_64/crt_arch.h @@ -1,6 +1,7 @@ __asm__( ".text \n" ".global " START " \n" +".type " START ",%function \n" START ": \n" " xor %rbp,%rbp \n" " mov %rsp,%rdi \n" diff --git a/lib/libc/musl/libc.S b/lib/libc/musl/libc.S index cb8b590a9e..36875b8a7b 100644 --- a/lib/libc/musl/libc.S +++ b/lib/libc/musl/libc.S @@ -7,6 +7,13 @@ #define PTR_SIZE_BYTES 4 #define PTR2_SIZE_BYTES 8 #endif + +#ifdef TIME32 +#define WEAKTIME64 .globl +#else +#define WEAKTIME64 .weak +#endif + .bss .weak ___environ .type ___environ, %object; @@ -168,18 +175,64 @@ _IO_putc: .weak _IO_putc_unlocked .type _IO_putc_unlocked, %function; _IO_putc_unlocked: -#if !defined(ARCH_riscv64) && !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64) +#ifdef ARCH_i386 .globl ___tls_get_addr .type ___tls_get_addr, %function; ___tls_get_addr: #endif -#ifdef PTR32 +#ifdef TIME32 .globl __adjtime64 .type __adjtime64, %function; __adjtime64: .globl __adjtimex_time64 .type __adjtimex_time64, %function; __adjtimex_time64: +#endif +#ifdef ARCH_arm +.globl __aeabi_atexit +.type __aeabi_atexit, %function; +__aeabi_atexit: +.globl __aeabi_memclr +.type __aeabi_memclr, %function; +__aeabi_memclr: +.globl __aeabi_memclr4 +.type __aeabi_memclr4, %function; +__aeabi_memclr4: +.globl __aeabi_memclr8 +.type __aeabi_memclr8, %function; +__aeabi_memclr8: +.globl __aeabi_memcpy +.type __aeabi_memcpy, %function; +__aeabi_memcpy: +.globl __aeabi_memcpy4 +.type __aeabi_memcpy4, %function; +__aeabi_memcpy4: +.globl __aeabi_memcpy8 +.type __aeabi_memcpy8, %function; +__aeabi_memcpy8: +.globl __aeabi_memmove +.type __aeabi_memmove, %function; +__aeabi_memmove: +.globl __aeabi_memmove4 +.type __aeabi_memmove4, %function; +__aeabi_memmove4: +.globl __aeabi_memmove8 +.type __aeabi_memmove8, %function; +__aeabi_memmove8: +.globl __aeabi_memset +.type __aeabi_memset, %function; +__aeabi_memset: +.globl __aeabi_memset4 +.type __aeabi_memset4, %function; +__aeabi_memset4: +.globl __aeabi_memset8 +.type __aeabi_memset8, %function; +__aeabi_memset8: +.globl __aeabi_read_tp +.type __aeabi_read_tp, %function; +__aeabi_read_tp: +#endif +#ifdef TIME32 .globl __aio_suspend_time64 .type __aio_suspend_time64, %function; __aio_suspend_time64: @@ -187,12 +240,12 @@ __aio_suspend_time64: .globl __assert_fail .type __assert_fail, %function; __assert_fail: -#if !defined(ARCH_riscv64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64) +#ifdef FAMILY_mips .globl __cachectl .type __cachectl, %function; __cachectl: #endif -#ifdef PTR32 +#ifdef TIME32 .globl __clock_adjtime64 .type __clock_adjtime64, %function; __clock_adjtime64: @@ -236,7 +289,7 @@ __cxa_atexit: .globl __cxa_finalize .type __cxa_finalize, %function; __cxa_finalize: -#ifdef PTR32 +#ifdef TIME32 .globl __difftime64 .type __difftime64, %function; __difftime64: @@ -247,7 +300,12 @@ __dls2b: .globl __dls3 .type __dls3, %function; __dls3: -#ifdef PTR32 +#ifdef FAMILY_mips +.globl __dlstart +.type __dlstart, %function; +__dlstart: +#endif +#ifdef TIME32 .globl __dlsym_time64 .type __dlsym_time64, %function; __dlsym_time64: @@ -312,7 +370,7 @@ __fseterr: .globl __fsetlocking .type __fsetlocking, %function; __fsetlocking: -#ifdef PTR32 +#ifdef TIME32 .weak __fstat_time64 .type __fstat_time64, %function; __fstat_time64: @@ -347,7 +405,7 @@ __fxstatat: .weak __getdelim .type __getdelim, %function; __getdelim: -#ifdef PTR32 +#ifdef TIME32 .globl __getitimer_time64 .type __getitimer_time64, %function; __getitimer_time64: @@ -364,6 +422,11 @@ __gmtime64: .type __gmtime64_r, %function; __gmtime64_r: #endif +#ifdef ARCH_arm +.globl __gnu_Unwind_Find_exidx +.type __gnu_Unwind_Find_exidx, %function; +__gnu_Unwind_Find_exidx: +#endif .globl __h_errno_location .type __h_errno_location, %function; __h_errno_location: @@ -490,7 +553,7 @@ __libc_current_sigrtmin: .globl __libc_start_main .type __libc_start_main, %function; __libc_start_main: -#ifdef PTR32 +#ifdef TIME32 .globl __localtime64 .type __localtime64, %function; __localtime64: @@ -498,12 +561,12 @@ __localtime64: .type __localtime64_r, %function; __localtime64_r: #endif -#if !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_loongarch64) +#ifdef FAMILY_riscv .globl __longjmp .type __longjmp, %function; __longjmp: #endif -#ifdef PTR32 +#ifdef TIME32 .globl __lstat_time64 .type __lstat_time64, %function; __lstat_time64: @@ -514,7 +577,7 @@ __lutimes_time64: .globl __lxstat .type __lxstat, %function; __lxstat: -#ifdef PTR32 +#ifdef TIME32 .globl __mktime64 .type __mktime64, %function; __mktime64: @@ -547,7 +610,7 @@ __overflow: .weak __posix_getopt .type __posix_getopt, %function; __posix_getopt: -#ifdef PTR32 +#ifdef TIME32 .globl __ppoll_time64 .type __ppoll_time64, %function; __ppoll_time64: @@ -576,17 +639,7 @@ __recvmmsg_time64: .globl __res_state .type __res_state, %function; __res_state: -#if !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64) -.globl __restore -.type __restore, %function; -__restore: -#endif -#if !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64) -.globl __restore_rt -.type __restore_rt, %function; -__restore_rt: -#endif -#if !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_loongarch64) +#ifdef FAMILY_riscv .globl __riscv_flush_icache .type __riscv_flush_icache, %function; __riscv_flush_icache: @@ -594,7 +647,7 @@ __riscv_flush_icache: .globl __sched_cpucount .type __sched_cpucount, %function; __sched_cpucount: -#ifdef PTR32 +#ifdef TIME32 .globl __sched_rr_get_interval_time64 .type __sched_rr_get_interval_time64, %function; __sched_rr_get_interval_time64: @@ -614,7 +667,7 @@ __setitimer_time64: .globl __setjmp .type __setjmp, %function; __setjmp: -#ifdef PTR32 +#ifdef TIME32 .globl __settimeofday_time64 .type __settimeofday_time64, %function; __settimeofday_time64: @@ -631,7 +684,7 @@ __signbitl: .globl __sigsetjmp .type __sigsetjmp, %function; __sigsetjmp: -#ifdef PTR32 +#ifdef TIME32 .globl __sigtimedwait_time64 .type __sigtimedwait_time64, %function; __sigtimedwait_time64: @@ -639,7 +692,7 @@ __sigtimedwait_time64: .globl __stack_chk_fail .type __stack_chk_fail, %function; __stack_chk_fail: -#ifdef PTR32 +#ifdef TIME32 .globl __stat_time64 .type __stat_time64, %function; __stat_time64: @@ -692,7 +745,7 @@ __strxfrm_l: .weak __sysv_signal .type __sysv_signal, %function; __sysv_signal: -#ifdef PTR32 +#ifdef TIME32 .globl __thrd_sleep_time64 .type __thrd_sleep_time64, %function; __thrd_sleep_time64: @@ -718,9 +771,16 @@ __timerfd_settime64: .type __timespec_get_time64, %function; __timespec_get_time64: #endif +#if !defined(ARCH_s390x) .globl __tls_get_addr .type __tls_get_addr, %function; __tls_get_addr: +#endif +#ifdef ARCH_s390x +.globl __tls_get_offset +.type __tls_get_offset, %function; +__tls_get_offset: +#endif .globl __tolower_l .type __tolower_l, %function; __tolower_l: @@ -743,7 +803,7 @@ __uflow: .globl __uselocale .type __uselocale, %function; __uselocale: -#ifdef PTR32 +#ifdef TIME32 .globl __utime64 .type __utime64, %function; __utime64: @@ -796,7 +856,7 @@ _dl_debug_state: .globl _dlstart .type _dlstart, %function; _dlstart: -#if !defined(ARCH_riscv64) && !defined(ARCH_mips) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64) +#ifdef FAMILY_mips .globl _dlstart_data .type _dlstart_data, %function; _dlstart_data: @@ -807,7 +867,7 @@ _exit: .weak _fini .type _fini, %function; _fini: -#if !defined(ARCH_riscv64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64) +#ifdef FAMILY_mips .globl _flush_cache .type _flush_cache, %function; _flush_cache: @@ -908,7 +968,7 @@ aligned_alloc: .globl alphasort .type alphasort, %function; alphasort: -#if !defined(ARCH_riscv64) && !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64) +#ifdef FAMILY_x86 .globl arch_prctl .type arch_prctl, %function; arch_prctl: @@ -1033,12 +1093,10 @@ cabsf: .globl cabsl .type cabsl, %function; cabsl: -#if !defined(ARCH_riscv64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64) +#ifdef FAMILY_mips .weak cachectl .type cachectl, %function; cachectl: -#endif -#if !defined(ARCH_riscv64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64) .weak cacheflush .type cacheflush, %function; cacheflush: @@ -1232,10 +1290,10 @@ clock_getcpuclockid: .globl clock_getres .type clock_getres, %function; clock_getres: -WEAK64 clock_gettime +WEAKTIME64 clock_gettime .type clock_gettime, %function; clock_gettime: -WEAK64 clock_nanosleep +WEAKTIME64 clock_nanosleep .type clock_nanosleep, %function; clock_nanosleep: .globl clock_settime @@ -2018,10 +2076,10 @@ fsetpos: .globl fsetxattr .type fsetxattr, %function; fsetxattr: -WEAK64 fstat +WEAKTIME64 fstat .type fstat, %function; fstat: -WEAK64 fstatat +WEAKTIME64 fstatat .type fstatat, %function; fstatat: .weak fstatfs @@ -2063,7 +2121,7 @@ futimens: .globl futimes .type futimes, %function; futimes: -WEAK64 futimesat +WEAKTIME64 futimesat .type futimesat, %function; futimesat: .globl fwide @@ -2408,7 +2466,7 @@ globfree: .globl gmtime .type gmtime, %function; gmtime: -WEAK64 gmtime_r +WEAKTIME64 gmtime_r .type gmtime_r, %function; gmtime_r: .globl grantpt @@ -2549,12 +2607,12 @@ insque: .globl ioctl .type ioctl, %function; ioctl: -#if !defined(ARCH_riscv64) && !defined(ARCH_mips64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64) +#if !defined(ARCH_aarch64) && !defined(ARCH_arm) && !defined(ARCH_loongarch64) && !defined(ARCH_mips64) && !defined(ARCH_mipsn32) && !defined(ARCH_riscv32) && !defined(ARCH_riscv64) && !defined(ARCH_s390x) .globl ioperm .type ioperm, %function; ioperm: #endif -#if !defined(ARCH_riscv64) && !defined(ARCH_mips64) && !defined(ARCH_aarch64) && !defined(ARCH_riscv32) && !defined(ARCH_loongarch64) +#if !defined(ARCH_aarch64) && !defined(ARCH_arm) && !defined(ARCH_loongarch64) && !defined(ARCH_mips64) && !defined(ARCH_mipsn32) && !defined(ARCH_riscv32) && !defined(ARCH_riscv64) && !defined(ARCH_s390x) .globl iopl .type iopl, %function; iopl: @@ -2853,7 +2911,7 @@ localeconv: .globl localtime .type localtime, %function; localtime: -WEAK64 localtime_r +WEAKTIME64 localtime_r .type localtime_r, %function; localtime_r: .globl lockf @@ -3552,7 +3610,7 @@ pthread_cond_init: .globl pthread_cond_signal .type pthread_cond_signal, %function; pthread_cond_signal: -WEAK64 pthread_cond_timedwait +WEAKTIME64 pthread_cond_timedwait .type pthread_cond_timedwait, %function; pthread_cond_timedwait: .globl pthread_cond_wait @@ -3642,7 +3700,7 @@ pthread_mutex_lock: .globl pthread_mutex_setprioceiling .type pthread_mutex_setprioceiling, %function; pthread_mutex_setprioceiling: -WEAK64 pthread_mutex_timedlock +WEAKTIME64 pthread_mutex_timedlock .type pthread_mutex_timedlock, %function; pthread_mutex_timedlock: .weak pthread_mutex_trylock @@ -3693,10 +3751,10 @@ pthread_rwlock_init: .weak pthread_rwlock_rdlock .type pthread_rwlock_rdlock, %function; pthread_rwlock_rdlock: -WEAK64 pthread_rwlock_timedrdlock +WEAKTIME64 pthread_rwlock_timedrdlock .type pthread_rwlock_timedrdlock, %function; pthread_rwlock_timedrdlock: -WEAK64 pthread_rwlock_timedwrlock +WEAKTIME64 pthread_rwlock_timedwrlock .type pthread_rwlock_timedwrlock, %function; pthread_rwlock_timedwrlock: .weak pthread_rwlock_tryrdlock @@ -3774,7 +3832,7 @@ pthread_spin_unlock: .weak pthread_testcancel .type pthread_testcancel, %function; pthread_testcancel: -WEAK64 pthread_timedjoin_np +WEAKTIME64 pthread_timedjoin_np .type pthread_timedjoin_np, %function; pthread_timedjoin_np: .weak pthread_tryjoin_np @@ -3999,7 +4057,7 @@ rintf: .globl rintl .type rintl, %function; rintl: -#if !defined(ARCH_mips) && !defined(ARCH_mips64) && !defined(ARCH_x86) && !defined(ARCH_x86_64) && !defined(ARCH_powerpc) && !defined(ARCH_powerpc64) && !defined(ARCH_aarch64) && !defined(ARCH_loongarch64) +#ifdef FAMILY_riscv .weak riscv_flush_icache .type riscv_flush_icache, %function; riscv_flush_icache: diff --git a/lib/libc/musl/src/signal/riscv32/restore.s b/lib/libc/musl/src/signal/riscv32/restore.s index 40012c757a..5a0af6959d 100644 --- a/lib/libc/musl/src/signal/riscv32/restore.s +++ b/lib/libc/musl/src/signal/riscv32/restore.s @@ -1,7 +1,9 @@ .global __restore +.hidden __restore .type __restore, %function __restore: .global __restore_rt +.hidden __restore_rt .type __restore_rt, %function __restore_rt: li a7, 139 # SYS_rt_sigreturn diff --git a/lib/libc/musl/src/signal/riscv64/restore.s b/lib/libc/musl/src/signal/riscv64/restore.s index 40012c757a..5a0af6959d 100644 --- a/lib/libc/musl/src/signal/riscv64/restore.s +++ b/lib/libc/musl/src/signal/riscv64/restore.s @@ -1,7 +1,9 @@ .global __restore +.hidden __restore .type __restore, %function __restore: .global __restore_rt +.hidden __restore_rt .type __restore_rt, %function __restore_rt: li a7, 139 # SYS_rt_sigreturn diff --git a/lib/libc/musl/src/thread/s390x/__tls_get_offset.s b/lib/libc/musl/src/thread/s390x/__tls_get_offset.s index 8ee92de8ea..2e0913ccb0 100644 --- a/lib/libc/musl/src/thread/s390x/__tls_get_offset.s +++ b/lib/libc/musl/src/thread/s390x/__tls_get_offset.s @@ -5,6 +5,7 @@ __tls_get_offset: aghi %r15, -160 la %r2, 0(%r2, %r12) +.hidden __tls_get_addr brasl %r14, __tls_get_addr ear %r1, %a0 diff --git a/lib/std/crypto.zig b/lib/std/crypto.zig index aa524fa2c2..7b167a467a 100644 --- a/lib/std/crypto.zig +++ b/lib/std/crypto.zig @@ -7,10 +7,23 @@ pub const timing_safe = @import("crypto/timing_safe.zig"); /// Authenticated Encryption with Associated Data pub const aead = struct { pub const aegis = struct { - pub const Aegis128L = @import("crypto/aegis.zig").Aegis128L; - pub const Aegis128L_256 = @import("crypto/aegis.zig").Aegis128L_256; - pub const Aegis256 = @import("crypto/aegis.zig").Aegis256; - pub const Aegis256_256 = @import("crypto/aegis.zig").Aegis256_256; + const variants = @import("crypto/aegis.zig"); + + pub const Aegis128X4 = variants.Aegis128X4; + pub const Aegis128X2 = variants.Aegis128X2; + pub const Aegis128L = variants.Aegis128L; + + pub const Aegis256X4 = variants.Aegis256X4; + pub const Aegis256X2 = variants.Aegis256X2; + pub const Aegis256 = variants.Aegis256; + + pub const Aegis128X4_256 = variants.Aegis128X4_256; + pub const Aegis128X2_256 = variants.Aegis128X2_256; + pub const Aegis128L_256 = variants.Aegis128L_256; + + pub const Aegis256X4_256 = variants.Aegis256X4_256; + pub const Aegis256X2_256 = variants.Aegis256X2_256; + pub const Aegis256_256 = variants.Aegis256_256; }; pub const aes_gcm = struct { @@ -44,10 +57,22 @@ pub const auth = struct { pub const hmac = @import("crypto/hmac.zig"); pub const siphash = @import("crypto/siphash.zig"); pub const aegis = struct { - pub const Aegis128LMac = @import("crypto/aegis.zig").Aegis128LMac; - pub const Aegis128LMac_128 = @import("crypto/aegis.zig").Aegis128LMac_128; - pub const Aegis256Mac = @import("crypto/aegis.zig").Aegis256Mac; - pub const Aegis256Mac_128 = @import("crypto/aegis.zig").Aegis256Mac_128; + const variants = @import("crypto/aegis.zig"); + pub const Aegis128X4Mac = variants.Aegis128X4Mac; + pub const Aegis128X2Mac = variants.Aegis128X2Mac; + pub const Aegis128LMac = variants.Aegis128LMac; + + pub const Aegis256X4Mac = variants.Aegis256X4Mac; + pub const Aegis256X2Mac = variants.Aegis256X2Mac; + pub const Aegis256Mac = variants.Aegis256Mac; + + pub const Aegis128X4Mac_128 = variants.Aegis128X4Mac_128; + pub const Aegis128X2Mac_128 = variants.Aegis128X2Mac_128; + pub const Aegis128LMac_128 = variants.Aegis128LMac_128; + + pub const Aegis256X4Mac_128 = variants.Aegis256X4Mac_128; + pub const Aegis256X2Mac_128 = variants.Aegis256X2Mac_128; + pub const Aegis256Mac_128 = variants.Aegis256Mac_128; }; pub const cmac = @import("crypto/cmac.zig"); }; diff --git a/lib/std/crypto/aegis.zig b/lib/std/crypto/aegis.zig index 67cc13c8c0..be6a655850 100644 --- a/lib/std/crypto/aegis.zig +++ b/lib/std/crypto/aegis.zig @@ -1,16 +1,21 @@ //! AEGIS is a very fast authenticated encryption system built on top of the core AES function. //! -//! The AEGIS-128L variant has a 128 bit key, a 128 bit nonce, and processes 256 bit message blocks. -//! The AEGIS-256 variant has a 256 bit key, a 256 bit nonce, and processes 128 bit message blocks. +//! The AEGIS-128* variants have a 128 bit key and a 128 bit nonce. +//! The AEGIS-256* variants have a 256 bit key and a 256 bit nonce. +//! All of them can compute 128 and 256 bit authentication tags. //! //! The AEGIS cipher family offers performance that significantly exceeds that of AES-GCM with //! hardware support for parallelizable AES block encryption. //! -//! Unlike with AES-GCM, nonces can be safely chosen at random with no practical limit when using AEGIS-256. -//! AEGIS-128L also allows for more messages to be safely encrypted when using random nonces. +//! On high-end Intel CPUs with AVX-512 support, AEGIS-128X4 and AEGIS-256X4 are the fastest options. +//! On other modern server, desktop and mobile CPUs, AEGIS-128X2 and AEGIS-256X2 are usually the fastest options. +//! AEGIS-128L and AEGIS-256 perform well on a broad range of platforms, including WebAssembly. //! -//! AEGIS is believed to be key-committing, making it a safer choice than most other AEADs -//! when the key has low entropy, or can be controlled by an attacker. +//! Unlike with AES-GCM, nonces can be safely chosen at random with no practical limit when using AEGIS-256*. +//! AEGIS-128* also allows for more messages to be safely encrypted when using random nonces. +//! +//! Unless the associated data can be fully controled by an adversary, AEGIS is believed to be key-committing, +//! making it a safer choice than most other AEADs when the key has low entropy, or can be controlled by an attacker. //! //! Finally, leaking the state does not leak the key. //! @@ -20,305 +25,202 @@ const std = @import("std"); const crypto = std.crypto; const mem = std.mem; const assert = std.debug.assert; -const AesBlock = crypto.core.aes.Block; const AuthenticationError = crypto.errors.AuthenticationError; -/// AEGIS-128L with a 128-bit authentication tag. -pub const Aegis128L = Aegis128LGeneric(128); +/// AEGIS-128X4 with a 128 bit tag +pub const Aegis128X4 = Aegis128XGeneric(4, 128); +/// AEGIS-128X2 with a 128 bit tag +pub const Aegis128X2 = Aegis128XGeneric(2, 128); +/// AEGIS-128L with a 128 bit tag +pub const Aegis128L = Aegis128XGeneric(1, 128); -/// AEGIS-128L with a 256-bit authentication tag. -pub const Aegis128L_256 = Aegis128LGeneric(256); +/// AEGIS-256X4 with a 128 bit tag +pub const Aegis256X4 = Aegis256XGeneric(4, 128); +/// AEGIS-256X2 with a 128 bit tag +pub const Aegis256X2 = Aegis256XGeneric(2, 128); +/// AEGIS-256 with a 128 bit tag +pub const Aegis256 = Aegis256XGeneric(1, 128); -/// AEGIS-256 with a 128-bit authentication tag. -pub const Aegis256 = Aegis256Generic(128); +/// AEGIS-128X4 with a 256 bit tag +pub const Aegis128X4_256 = Aegis128XGeneric(4, 256); +/// AEGIS-128X2 with a 256 bit tag +pub const Aegis128X2_256 = Aegis128XGeneric(2, 256); +/// AEGIS-128L with a 256 bit tag +pub const Aegis128L_256 = Aegis128XGeneric(1, 256); -/// AEGIS-256 with a 256-bit authentication tag. -pub const Aegis256_256 = Aegis256Generic(256); - -const State128L = struct { - blocks: [8]AesBlock, - - fn init(key: [16]u8, nonce: [16]u8) State128L { - const c1 = AesBlock.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd }); - const c2 = AesBlock.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 }); - const key_block = AesBlock.fromBytes(&key); - const nonce_block = AesBlock.fromBytes(&nonce); - const blocks = [8]AesBlock{ - key_block.xorBlocks(nonce_block), - c1, - c2, - c1, - key_block.xorBlocks(nonce_block), - key_block.xorBlocks(c2), - key_block.xorBlocks(c1), - key_block.xorBlocks(c2), - }; - var state = State128L{ .blocks = blocks }; - var i: usize = 0; - while (i < 10) : (i += 1) { - state.update(nonce_block, key_block); - } - return state; - } - - inline fn update(state: *State128L, d1: AesBlock, d2: AesBlock) void { - const blocks = &state.blocks; - const tmp = blocks[7]; - comptime var i: usize = 7; - inline while (i > 0) : (i -= 1) { - blocks[i] = blocks[i - 1].encrypt(blocks[i]); - } - blocks[0] = tmp.encrypt(blocks[0]); - blocks[0] = blocks[0].xorBlocks(d1); - blocks[4] = blocks[4].xorBlocks(d2); - } - - fn absorb(state: *State128L, src: *const [32]u8) void { - const msg0 = AesBlock.fromBytes(src[0..16]); - const msg1 = AesBlock.fromBytes(src[16..32]); - state.update(msg0, msg1); - } - - fn enc(state: *State128L, dst: *[32]u8, src: *const [32]u8) void { - const blocks = &state.blocks; - const msg0 = AesBlock.fromBytes(src[0..16]); - const msg1 = AesBlock.fromBytes(src[16..32]); - var tmp0 = msg0.xorBlocks(blocks[6]).xorBlocks(blocks[1]); - var tmp1 = msg1.xorBlocks(blocks[2]).xorBlocks(blocks[5]); - tmp0 = tmp0.xorBlocks(blocks[2].andBlocks(blocks[3])); - tmp1 = tmp1.xorBlocks(blocks[6].andBlocks(blocks[7])); - dst[0..16].* = tmp0.toBytes(); - dst[16..32].* = tmp1.toBytes(); - state.update(msg0, msg1); - } - - fn dec(state: *State128L, dst: *[32]u8, src: *const [32]u8) void { - const blocks = &state.blocks; - var msg0 = AesBlock.fromBytes(src[0..16]).xorBlocks(blocks[6]).xorBlocks(blocks[1]); - var msg1 = AesBlock.fromBytes(src[16..32]).xorBlocks(blocks[2]).xorBlocks(blocks[5]); - msg0 = msg0.xorBlocks(blocks[2].andBlocks(blocks[3])); - msg1 = msg1.xorBlocks(blocks[6].andBlocks(blocks[7])); - dst[0..16].* = msg0.toBytes(); - dst[16..32].* = msg1.toBytes(); - state.update(msg0, msg1); - } - - fn mac(state: *State128L, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 { - const blocks = &state.blocks; - var sizes: [16]u8 = undefined; - mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little); - mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little); - const tmp = AesBlock.fromBytes(&sizes).xorBlocks(blocks[2]); - var i: usize = 0; - while (i < 7) : (i += 1) { - state.update(tmp, tmp); - } - return switch (tag_bits) { - 128 => blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]) - .xorBlocks(blocks[4]).xorBlocks(blocks[5]).xorBlocks(blocks[6]).toBytes(), - 256 => tag: { - const t1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]); - const t2 = blocks[4].xorBlocks(blocks[5]).xorBlocks(blocks[6]).xorBlocks(blocks[7]); - break :tag t1.toBytes() ++ t2.toBytes(); - }, - else => unreachable, - }; - } -}; - -fn Aegis128LGeneric(comptime tag_bits: u9) type { - comptime assert(tag_bits == 128 or tag_bits == 256); // tag must be 128 or 256 bits +/// AEGIS-256X4 with a 256 bit tag +pub const Aegis256X4_256 = Aegis256XGeneric(4, 256); +/// AEGIS-256X2 with a 256 bit tag +pub const Aegis256X2_256 = Aegis256XGeneric(2, 256); +/// AEGIS-256 with a 256 bit tag +pub const Aegis256_256 = Aegis256XGeneric(1, 256); +fn State128X(comptime degree: u7) type { return struct { - pub const tag_length = tag_bits / 8; - pub const nonce_length = 16; - pub const key_length = 16; - pub const block_length = 32; + const AesBlockVec = crypto.core.aes.BlockVec(degree); + const State = @This(); - const State = State128L; + blocks: [8]AesBlockVec, - /// c: ciphertext: output buffer should be of size m.len - /// tag: authentication tag: output MAC - /// m: message - /// ad: Associated Data - /// npub: public nonce - /// k: private key - pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) void { - assert(c.len == m.len); - var state = State128L.init(key, npub); - var src: [32]u8 align(16) = undefined; - var dst: [32]u8 align(16) = undefined; - var i: usize = 0; - while (i + 32 <= ad.len) : (i += 32) { - state.absorb(ad[i..][0..32]); + const aes_block_length = AesBlockVec.block_length; + const rate = aes_block_length * 2; + const alignment = AesBlockVec.native_word_size; + + fn init(key: [16]u8, nonce: [16]u8) State { + const c1 = AesBlockVec.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd } ** degree); + const c2 = AesBlockVec.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 } ** degree); + const key_block = AesBlockVec.fromBytes(&(key ** degree)); + const nonce_block = AesBlockVec.fromBytes(&(nonce ** degree)); + const blocks = [8]AesBlockVec{ + key_block.xorBlocks(nonce_block), + c1, + c2, + c1, + key_block.xorBlocks(nonce_block), + key_block.xorBlocks(c2), + key_block.xorBlocks(c1), + key_block.xorBlocks(c2), + }; + var state = State{ .blocks = blocks }; + if (degree > 1) { + const context_block = ctx: { + var contexts_bytes = [_]u8{0} ** aes_block_length; + for (0..degree) |i| { + contexts_bytes[i * 16] = @intCast(i); + contexts_bytes[i * 16 + 1] = @intCast(degree - 1); + } + break :ctx AesBlockVec.fromBytes(&contexts_bytes); + }; + for (0..10) |_| { + state.blocks[3] = state.blocks[3].xorBlocks(context_block); + state.blocks[7] = state.blocks[7].xorBlocks(context_block); + state.update(nonce_block, key_block); + } + } else { + for (0..10) |_| { + state.update(nonce_block, key_block); + } } - if (ad.len % 32 != 0) { - @memset(src[0..], 0); - @memcpy(src[0 .. ad.len % 32], ad[i..][0 .. ad.len % 32]); - state.absorb(&src); - } - i = 0; - while (i + 32 <= m.len) : (i += 32) { - state.enc(c[i..][0..32], m[i..][0..32]); - } - if (m.len % 32 != 0) { - @memset(src[0..], 0); - @memcpy(src[0 .. m.len % 32], m[i..][0 .. m.len % 32]); - state.enc(&dst, &src); - @memcpy(c[i..][0 .. m.len % 32], dst[0 .. m.len % 32]); - } - tag.* = state.mac(tag_bits, ad.len, m.len); + return state; } - /// `m`: Message - /// `c`: Ciphertext - /// `tag`: Authentication tag - /// `ad`: Associated data - /// `npub`: Public nonce - /// `k`: Private key - /// Asserts `c.len == m.len`. - /// - /// Contents of `m` are undefined if an error is returned. - pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) AuthenticationError!void { - assert(c.len == m.len); - var state = State128L.init(key, npub); - var src: [32]u8 align(16) = undefined; - var dst: [32]u8 align(16) = undefined; - var i: usize = 0; - while (i + 32 <= ad.len) : (i += 32) { - state.absorb(ad[i..][0..32]); + inline fn update(state: *State, d1: AesBlockVec, d2: AesBlockVec) void { + const blocks = &state.blocks; + const tmp = blocks[7]; + comptime var i: usize = 7; + inline while (i > 0) : (i -= 1) { + blocks[i] = blocks[i - 1].encrypt(blocks[i]); } - if (ad.len % 32 != 0) { - @memset(src[0..], 0); - @memcpy(src[0 .. ad.len % 32], ad[i..][0 .. ad.len % 32]); - state.absorb(&src); + blocks[0] = tmp.encrypt(blocks[0]); + blocks[0] = blocks[0].xorBlocks(d1); + blocks[4] = blocks[4].xorBlocks(d2); + } + + fn absorb(state: *State, src: *const [rate]u8) void { + const msg0 = AesBlockVec.fromBytes(src[0..aes_block_length]); + const msg1 = AesBlockVec.fromBytes(src[aes_block_length..rate]); + state.update(msg0, msg1); + } + + fn enc(state: *State, dst: *[rate]u8, src: *const [rate]u8) void { + const blocks = &state.blocks; + const msg0 = AesBlockVec.fromBytes(src[0..aes_block_length]); + const msg1 = AesBlockVec.fromBytes(src[aes_block_length..rate]); + var tmp0 = msg0.xorBlocks(blocks[6]).xorBlocks(blocks[1]); + var tmp1 = msg1.xorBlocks(blocks[2]).xorBlocks(blocks[5]); + tmp0 = tmp0.xorBlocks(blocks[2].andBlocks(blocks[3])); + tmp1 = tmp1.xorBlocks(blocks[6].andBlocks(blocks[7])); + dst[0..aes_block_length].* = tmp0.toBytes(); + dst[aes_block_length..rate].* = tmp1.toBytes(); + state.update(msg0, msg1); + } + + fn dec(state: *State, dst: *[rate]u8, src: *const [rate]u8) void { + const blocks = &state.blocks; + var msg0 = AesBlockVec.fromBytes(src[0..aes_block_length]).xorBlocks(blocks[6]).xorBlocks(blocks[1]); + var msg1 = AesBlockVec.fromBytes(src[aes_block_length..rate]).xorBlocks(blocks[2]).xorBlocks(blocks[5]); + msg0 = msg0.xorBlocks(blocks[2].andBlocks(blocks[3])); + msg1 = msg1.xorBlocks(blocks[6].andBlocks(blocks[7])); + dst[0..aes_block_length].* = msg0.toBytes(); + dst[aes_block_length..rate].* = msg1.toBytes(); + state.update(msg0, msg1); + } + + fn decLast(state: *State, dst: []u8, src: []const u8) void { + const blocks = &state.blocks; + const z0 = blocks[6].xorBlocks(blocks[1]).xorBlocks(blocks[2].andBlocks(blocks[3])); + const z1 = blocks[2].xorBlocks(blocks[5]).xorBlocks(blocks[6].andBlocks(blocks[7])); + var pad = [_]u8{0} ** rate; + pad[0..aes_block_length].* = z0.toBytes(); + pad[aes_block_length..].* = z1.toBytes(); + for (pad[0..src.len], src) |*p, x| p.* ^= x; + @memcpy(dst, pad[0..src.len]); + @memset(pad[src.len..], 0); + const msg0 = AesBlockVec.fromBytes(pad[0..aes_block_length]); + const msg1 = AesBlockVec.fromBytes(pad[aes_block_length..rate]); + state.update(msg0, msg1); + } + + fn mac(state: *State, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 { + const blocks = &state.blocks; + var sizes: [aes_block_length]u8 = undefined; + mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little); + mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little); + for (1..degree) |i| { + @memcpy(sizes[i * 16 ..][0..16], sizes[0..16]); } - i = 0; - while (i + 32 <= m.len) : (i += 32) { - state.dec(m[i..][0..32], c[i..][0..32]); + const tmp = AesBlockVec.fromBytes(&sizes).xorBlocks(blocks[2]); + for (0..7) |_| { + state.update(tmp, tmp); } - if (m.len % 32 != 0) { - @memset(src[0..], 0); - @memcpy(src[0 .. m.len % 32], c[i..][0 .. m.len % 32]); - state.dec(&dst, &src); - @memcpy(m[i..][0 .. m.len % 32], dst[0 .. m.len % 32]); - @memset(dst[0 .. m.len % 32], 0); - const blocks = &state.blocks; - blocks[0] = blocks[0].xorBlocks(AesBlock.fromBytes(dst[0..16])); - blocks[4] = blocks[4].xorBlocks(AesBlock.fromBytes(dst[16..32])); - } - var computed_tag = state.mac(tag_bits, ad.len, m.len); - const verify = crypto.timing_safe.eql([tag_length]u8, computed_tag, tag); - if (!verify) { - crypto.secureZero(u8, &computed_tag); - @memset(m, undefined); - return error.AuthenticationFailed; + switch (tag_bits) { + 128 => { + var tag_multi = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]).xorBlocks(blocks[4]).xorBlocks(blocks[5]).xorBlocks(blocks[6]).toBytes(); + var tag = tag_multi[0..16].*; + @memcpy(tag[0..], tag_multi[0..16]); + for (1..degree) |d| { + for (0..16) |i| { + tag[i] ^= tag_multi[d * 16 + i]; + } + } + return tag; + }, + 256 => { + const tag_multi_1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]).toBytes(); + const tag_multi_2 = blocks[4].xorBlocks(blocks[5]).xorBlocks(blocks[6]).xorBlocks(blocks[7]).toBytes(); + var tag = tag_multi_1[0..16].* ++ tag_multi_2[0..16].*; + for (1..degree) |d| { + for (0..16) |i| { + tag[i] ^= tag_multi_1[d * 16 + i]; + tag[i + 16] ^= tag_multi_2[d * 16 + i]; + } + } + return tag; + }, + else => unreachable, } } }; } -const State256 = struct { - blocks: [6]AesBlock, - - fn init(key: [32]u8, nonce: [32]u8) State256 { - const c1 = AesBlock.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd }); - const c2 = AesBlock.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 }); - const key_block1 = AesBlock.fromBytes(key[0..16]); - const key_block2 = AesBlock.fromBytes(key[16..32]); - const nonce_block1 = AesBlock.fromBytes(nonce[0..16]); - const nonce_block2 = AesBlock.fromBytes(nonce[16..32]); - const kxn1 = key_block1.xorBlocks(nonce_block1); - const kxn2 = key_block2.xorBlocks(nonce_block2); - const blocks = [6]AesBlock{ - kxn1, - kxn2, - c1, - c2, - key_block1.xorBlocks(c2), - key_block2.xorBlocks(c1), - }; - var state = State256{ .blocks = blocks }; - var i: usize = 0; - while (i < 4) : (i += 1) { - state.update(key_block1); - state.update(key_block2); - state.update(kxn1); - state.update(kxn2); - } - return state; - } - - inline fn update(state: *State256, d: AesBlock) void { - const blocks = &state.blocks; - const tmp = blocks[5].encrypt(blocks[0]); - comptime var i: usize = 5; - inline while (i > 0) : (i -= 1) { - blocks[i] = blocks[i - 1].encrypt(blocks[i]); - } - blocks[0] = tmp.xorBlocks(d); - } - - fn absorb(state: *State256, src: *const [16]u8) void { - const msg = AesBlock.fromBytes(src); - state.update(msg); - } - - fn enc(state: *State256, dst: *[16]u8, src: *const [16]u8) void { - const blocks = &state.blocks; - const msg = AesBlock.fromBytes(src); - var tmp = msg.xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]); - tmp = tmp.xorBlocks(blocks[2].andBlocks(blocks[3])); - dst.* = tmp.toBytes(); - state.update(msg); - } - - fn dec(state: *State256, dst: *[16]u8, src: *const [16]u8) void { - const blocks = &state.blocks; - var msg = AesBlock.fromBytes(src).xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]); - msg = msg.xorBlocks(blocks[2].andBlocks(blocks[3])); - dst.* = msg.toBytes(); - state.update(msg); - } - - fn mac(state: *State256, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 { - const blocks = &state.blocks; - var sizes: [16]u8 = undefined; - mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little); - mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little); - const tmp = AesBlock.fromBytes(&sizes).xorBlocks(blocks[3]); - var i: usize = 0; - while (i < 7) : (i += 1) { - state.update(tmp); - } - return switch (tag_bits) { - 128 => blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]) - .xorBlocks(blocks[4]).xorBlocks(blocks[5]).toBytes(), - 256 => tag: { - const t1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]); - const t2 = blocks[3].xorBlocks(blocks[4]).xorBlocks(blocks[5]); - break :tag t1.toBytes() ++ t2.toBytes(); - }, - else => unreachable, - }; - } -}; - /// AEGIS is a very fast authenticated encryption system built on top of the core AES function. /// -/// The 256 bit variant of AEGIS has a 256 bit key, a 256 bit nonce, and processes 128 bit message blocks. +/// The 128 bits variants of AEGIS have a 128 bit key and a 128 bit nonce. /// /// https://datatracker.ietf.org/doc/draft-irtf-cfrg-aegis-aead/ -fn Aegis256Generic(comptime tag_bits: u9) type { +fn Aegis128XGeneric(comptime degree: u7, comptime tag_bits: u9) type { + comptime assert(degree > 0); // degree must be greater than 0 comptime assert(tag_bits == 128 or tag_bits == 256); // tag must be 128 or 256 bits return struct { - pub const tag_length = tag_bits / 8; - pub const nonce_length = 32; - pub const key_length = 32; - pub const block_length = 16; + const State = State128X(degree); - const State = State256; + pub const tag_length = tag_bits / 8; + pub const nonce_length = 16; + pub const key_length = 16; + pub const block_length = State.rate; + + const alignment = State.alignment; /// c: ciphertext: output buffer should be of size m.len /// tag: authentication tag: output MAC @@ -328,27 +230,27 @@ fn Aegis256Generic(comptime tag_bits: u9) type { /// k: private key pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) void { assert(c.len == m.len); - var state = State256.init(key, npub); - var src: [16]u8 align(16) = undefined; - var dst: [16]u8 align(16) = undefined; + var state = State.init(key, npub); + var src: [block_length]u8 align(alignment) = undefined; + var dst: [block_length]u8 align(alignment) = undefined; var i: usize = 0; - while (i + 16 <= ad.len) : (i += 16) { - state.enc(&dst, ad[i..][0..16]); + while (i + block_length <= ad.len) : (i += block_length) { + state.absorb(ad[i..][0..block_length]); } - if (ad.len % 16 != 0) { + if (ad.len % block_length != 0) { @memset(src[0..], 0); - @memcpy(src[0 .. ad.len % 16], ad[i..][0 .. ad.len % 16]); - state.enc(&dst, &src); + @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]); + state.absorb(&src); } i = 0; - while (i + 16 <= m.len) : (i += 16) { - state.enc(c[i..][0..16], m[i..][0..16]); + while (i + block_length <= m.len) : (i += block_length) { + state.enc(c[i..][0..block_length], m[i..][0..block_length]); } - if (m.len % 16 != 0) { + if (m.len % block_length != 0) { @memset(src[0..], 0); - @memcpy(src[0 .. m.len % 16], m[i..][0 .. m.len % 16]); + @memcpy(src[0 .. m.len % block_length], m[i..][0 .. m.len % block_length]); state.enc(&dst, &src); - @memcpy(c[i..][0 .. m.len % 16], dst[0 .. m.len % 16]); + @memcpy(c[i..][0 .. m.len % block_length], dst[0 .. m.len % block_length]); } tag.* = state.mac(tag_bits, ad.len, m.len); } @@ -364,30 +266,23 @@ fn Aegis256Generic(comptime tag_bits: u9) type { /// Contents of `m` are undefined if an error is returned. pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) AuthenticationError!void { assert(c.len == m.len); - var state = State256.init(key, npub); - var src: [16]u8 align(16) = undefined; - var dst: [16]u8 align(16) = undefined; + var state = State.init(key, npub); + var src: [block_length]u8 align(alignment) = undefined; var i: usize = 0; - while (i + 16 <= ad.len) : (i += 16) { - state.enc(&dst, ad[i..][0..16]); + while (i + block_length <= ad.len) : (i += block_length) { + state.absorb(ad[i..][0..block_length]); } - if (ad.len % 16 != 0) { + if (ad.len % block_length != 0) { @memset(src[0..], 0); - @memcpy(src[0 .. ad.len % 16], ad[i..][0 .. ad.len % 16]); - state.enc(&dst, &src); + @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]); + state.absorb(&src); } i = 0; - while (i + 16 <= m.len) : (i += 16) { - state.dec(m[i..][0..16], c[i..][0..16]); + while (i + block_length <= m.len) : (i += block_length) { + state.dec(m[i..][0..block_length], c[i..][0..block_length]); } - if (m.len % 16 != 0) { - @memset(src[0..], 0); - @memcpy(src[0 .. m.len % 16], c[i..][0 .. m.len % 16]); - state.dec(&dst, &src); - @memcpy(m[i..][0 .. m.len % 16], dst[0 .. m.len % 16]); - @memset(dst[0 .. m.len % 16], 0); - const blocks = &state.blocks; - blocks[0] = blocks[0].xorBlocks(AesBlock.fromBytes(&dst)); + if (m.len % block_length != 0) { + state.decLast(m[i..], c[i..]); } var computed_tag = state.mac(tag_bits, ad.len, m.len); const verify = crypto.timing_safe.eql([tag_length]u8, computed_tag, tag); @@ -400,6 +295,264 @@ fn Aegis256Generic(comptime tag_bits: u9) type { }; } +fn State256X(comptime degree: u7) type { + return struct { + const AesBlockVec = crypto.core.aes.BlockVec(degree); + const State = @This(); + + blocks: [6]AesBlockVec, + + const aes_block_length = AesBlockVec.block_length; + const rate = aes_block_length; + const alignment = AesBlockVec.native_word_size; + + fn init(key: [32]u8, nonce: [32]u8) State { + const c1 = AesBlockVec.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd } ** degree); + const c2 = AesBlockVec.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 } ** degree); + const key_block1 = AesBlockVec.fromBytes(key[0..16] ** degree); + const key_block2 = AesBlockVec.fromBytes(key[16..32] ** degree); + const nonce_block1 = AesBlockVec.fromBytes(nonce[0..16] ** degree); + const nonce_block2 = AesBlockVec.fromBytes(nonce[16..32] ** degree); + const kxn1 = key_block1.xorBlocks(nonce_block1); + const kxn2 = key_block2.xorBlocks(nonce_block2); + const blocks = [6]AesBlockVec{ + kxn1, + kxn2, + c1, + c2, + key_block1.xorBlocks(c2), + key_block2.xorBlocks(c1), + }; + var state = State{ .blocks = blocks }; + if (degree > 1) { + const context_block = ctx: { + var contexts_bytes = [_]u8{0} ** aes_block_length; + for (0..degree) |i| { + contexts_bytes[i * 16] = @intCast(i); + contexts_bytes[i * 16 + 1] = @intCast(degree - 1); + } + break :ctx AesBlockVec.fromBytes(&contexts_bytes); + }; + for (0..4) |_| { + state.blocks[3] = state.blocks[3].xorBlocks(context_block); + state.blocks[5] = state.blocks[5].xorBlocks(context_block); + state.update(key_block1); + state.blocks[3] = state.blocks[3].xorBlocks(context_block); + state.blocks[5] = state.blocks[5].xorBlocks(context_block); + state.update(key_block2); + state.blocks[3] = state.blocks[3].xorBlocks(context_block); + state.blocks[5] = state.blocks[5].xorBlocks(context_block); + state.update(kxn1); + state.blocks[3] = state.blocks[3].xorBlocks(context_block); + state.blocks[5] = state.blocks[5].xorBlocks(context_block); + state.update(kxn2); + } + } else { + for (0..4) |_| { + state.update(key_block1); + state.update(key_block2); + state.update(kxn1); + state.update(kxn2); + } + } + return state; + } + + inline fn update(state: *State, d: AesBlockVec) void { + const blocks = &state.blocks; + const tmp = blocks[5].encrypt(blocks[0]); + comptime var i: usize = 5; + inline while (i > 0) : (i -= 1) { + blocks[i] = blocks[i - 1].encrypt(blocks[i]); + } + blocks[0] = tmp.xorBlocks(d); + } + + fn absorb(state: *State, src: *const [rate]u8) void { + const msg = AesBlockVec.fromBytes(src); + state.update(msg); + } + + fn enc(state: *State, dst: *[rate]u8, src: *const [rate]u8) void { + const blocks = &state.blocks; + const msg = AesBlockVec.fromBytes(src); + var tmp = msg.xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]); + tmp = tmp.xorBlocks(blocks[2].andBlocks(blocks[3])); + dst.* = tmp.toBytes(); + state.update(msg); + } + + fn dec(state: *State, dst: *[rate]u8, src: *const [rate]u8) void { + const blocks = &state.blocks; + var msg = AesBlockVec.fromBytes(src).xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]); + msg = msg.xorBlocks(blocks[2].andBlocks(blocks[3])); + dst.* = msg.toBytes(); + state.update(msg); + } + + fn decLast(state: *State, dst: []u8, src: []const u8) void { + const blocks = &state.blocks; + const z = blocks[5].xorBlocks(blocks[4]).xorBlocks(blocks[1]).xorBlocks(blocks[2].andBlocks(blocks[3])); + var pad = z.toBytes(); + for (pad[0..src.len], src) |*p, x| p.* ^= x; + @memcpy(dst, pad[0..src.len]); + @memset(pad[src.len..], 0); + const msg = AesBlockVec.fromBytes(pad[0..]); + state.update(msg); + } + + fn mac(state: *State, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 { + const blocks = &state.blocks; + var sizes: [aes_block_length]u8 = undefined; + mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little); + mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little); + for (1..degree) |i| { + @memcpy(sizes[i * 16 ..][0..16], sizes[0..16]); + } + const tmp = AesBlockVec.fromBytes(&sizes).xorBlocks(blocks[3]); + for (0..7) |_| { + state.update(tmp); + } + switch (tag_bits) { + 128 => { + var tag_multi = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]).xorBlocks(blocks[4]).xorBlocks(blocks[5]).toBytes(); + var tag = tag_multi[0..16].*; + @memcpy(tag[0..], tag_multi[0..16]); + for (1..degree) |d| { + for (0..16) |i| { + tag[i] ^= tag_multi[d * 16 + i]; + } + } + return tag; + }, + 256 => { + const tag_multi_1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).toBytes(); + const tag_multi_2 = blocks[3].xorBlocks(blocks[4]).xorBlocks(blocks[5]).toBytes(); + var tag = tag_multi_1[0..16].* ++ tag_multi_2[0..16].*; + for (1..degree) |d| { + for (0..16) |i| { + tag[i] ^= tag_multi_1[d * 16 + i]; + tag[i + 16] ^= tag_multi_2[d * 16 + i]; + } + } + return tag; + }, + else => unreachable, + } + } + }; +} + +/// AEGIS is a very fast authenticated encryption system built on top of the core AES function. +/// +/// The 256 bits variants of AEGIS have a 256 bit key and a 256 bit nonce. +/// +/// https://datatracker.ietf.org/doc/draft-irtf-cfrg-aegis-aead/ +fn Aegis256XGeneric(comptime degree: u7, comptime tag_bits: u9) type { + comptime assert(degree > 0); // degree must be greater than 0 + comptime assert(tag_bits == 128 or tag_bits == 256); // tag must be 128 or 256 bits + + return struct { + const State = State256X(degree); + + pub const tag_length = tag_bits / 8; + pub const nonce_length = 32; + pub const key_length = 32; + pub const block_length = State.rate; + + const alignment = State.alignment; + + /// c: ciphertext: output buffer should be of size m.len + /// tag: authentication tag: output MAC + /// m: message + /// ad: Associated Data + /// npub: public nonce + /// k: private key + pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) void { + assert(c.len == m.len); + var state = State.init(key, npub); + var src: [block_length]u8 align(alignment) = undefined; + var dst: [block_length]u8 align(alignment) = undefined; + var i: usize = 0; + while (i + block_length <= ad.len) : (i += block_length) { + state.enc(&dst, ad[i..][0..block_length]); + } + if (ad.len % block_length != 0) { + @memset(src[0..], 0); + @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]); + state.enc(&dst, &src); + } + i = 0; + while (i + block_length <= m.len) : (i += block_length) { + state.enc(c[i..][0..block_length], m[i..][0..block_length]); + } + if (m.len % block_length != 0) { + @memset(src[0..], 0); + @memcpy(src[0 .. m.len % block_length], m[i..][0 .. m.len % block_length]); + state.enc(&dst, &src); + @memcpy(c[i..][0 .. m.len % block_length], dst[0 .. m.len % block_length]); + } + tag.* = state.mac(tag_bits, ad.len, m.len); + } + + /// `m`: Message + /// `c`: Ciphertext + /// `tag`: Authentication tag + /// `ad`: Associated data + /// `npub`: Public nonce + /// `k`: Private key + /// Asserts `c.len == m.len`. + /// + /// Contents of `m` are undefined if an error is returned. + pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) AuthenticationError!void { + assert(c.len == m.len); + var state = State.init(key, npub); + var src: [block_length]u8 align(alignment) = undefined; + var i: usize = 0; + while (i + block_length <= ad.len) : (i += block_length) { + state.absorb(ad[i..][0..block_length]); + } + if (ad.len % block_length != 0) { + @memset(src[0..], 0); + @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]); + state.absorb(&src); + } + i = 0; + while (i + block_length <= m.len) : (i += block_length) { + state.dec(m[i..][0..block_length], c[i..][0..block_length]); + } + if (m.len % block_length != 0) { + state.decLast(m[i..], c[i..]); + } + var computed_tag = state.mac(tag_bits, ad.len, m.len); + const verify = crypto.timing_safe.eql([tag_length]u8, computed_tag, tag); + if (!verify) { + crypto.secureZero(u8, &computed_tag); + @memset(m, undefined); + return error.AuthenticationFailed; + } + } + }; +} + +/// The `Aegis128X4Mac` message authentication function outputs 256 bit tags. +/// In addition to being extremely fast, its large state, non-linearity +/// and non-invertibility provides the following properties: +/// - 128 bit security, stronger than GHash/Polyval/Poly1305. +/// - Recovering the secret key from the state would require ~2^128 attempts, +/// which is infeasible for any practical adversary. +/// - It has a large security margin against internal collisions. +pub const Aegis128X4Mac = AegisMac(Aegis128X4_256); + +/// The `Aegis128X2Mac` message authentication function outputs 256 bit tags. +/// In addition to being extremely fast, its large state, non-linearity +/// and non-invertibility provides the following properties: +/// - 128 bit security, stronger than GHash/Polyval/Poly1305. +/// - Recovering the secret key from the state would require ~2^128 attempts, +/// which is infeasible for any practical adversary. +/// - It has a large security margin against internal collisions. +pub const Aegis128X2Mac = AegisMac(Aegis128X2_256); + /// The `Aegis128LMac` message authentication function outputs 256 bit tags. /// In addition to being extremely fast, its large state, non-linearity /// and non-invertibility provides the following properties: @@ -409,34 +562,60 @@ fn Aegis256Generic(comptime tag_bits: u9) type { /// - It has a large security margin against internal collisions. pub const Aegis128LMac = AegisMac(Aegis128L_256); +/// The `Aegis256X4Mac` message authentication function has a 256-bit key size, +/// and outputs 256 bit tags. Unless theoretical multi-target attacks are a +/// concern, the AEGIS-128L variant should be preferred. +/// AEGIS' large state, non-linearity and non-invertibility provides the +/// following properties: +/// - 256 bit security against forgery. +/// - Recovering the secret key from the state would require ~2^256 attempts, +/// which is infeasible for any practical adversary. +/// - It has a large security margin against internal collisions. +pub const Aegis256X4Mac = AegisMac(Aegis256X4_256); + +/// The `Aegis256X2Mac` message authentication function has a 256-bit key size, +/// and outputs 256 bit tags. Unless theoretical multi-target attacks are a +/// concern, the AEGIS-128L variant should be preferred. +/// AEGIS' large state, non-linearity and non-invertibility provides the +/// following properties: +/// - 256 bit security against forgery. +/// - Recovering the secret key from the state would require ~2^256 attempts, +/// which is infeasible for any practical adversary. +/// - It has a large security margin against internal collisions. +pub const Aegis256X2Mac = AegisMac(Aegis256X2_256); + /// The `Aegis256Mac` message authentication function has a 256-bit key size, /// and outputs 256 bit tags. Unless theoretical multi-target attacks are a /// concern, the AEGIS-128L variant should be preferred. /// AEGIS' large state, non-linearity and non-invertibility provides the /// following properties: -/// - More than 128 bit security against forgery. +/// - 256 bit security against forgery. /// - Recovering the secret key from the state would require ~2^256 attempts, /// which is infeasible for any practical adversary. /// - It has a large security margin against internal collisions. pub const Aegis256Mac = AegisMac(Aegis256_256); -/// Aegis128L MAC with a 128-bit output. -/// A MAC with a 128-bit output is not safe unless the number of messages -/// authenticated with the same key remains small. -/// After 2^48 messages, the probability of a collision is already ~ 2^-33. -/// If unsure, use the Aegis128LMac type, that has a 256 bit output. +/// AEGIS-128X4 MAC with 128-bit tags +pub const Aegis128X4Mac_128 = AegisMac(Aegis128X4); + +/// AEGIS-128X2 MAC with 128-bit tags +pub const Aegis128X2Mac_128 = AegisMac(Aegis128X2); + +/// AEGIS-128L MAC with 128-bit tags pub const Aegis128LMac_128 = AegisMac(Aegis128L); -/// Aegis256 MAC with a 128-bit output. -/// A MAC with a 128-bit output is not safe unless the number of messages -/// authenticated with the same key remains small. -/// After 2^48 messages, the probability of a collision is already ~ 2^-33. -/// If unsure, use the Aegis256Mac type, that has a 256 bit output. +/// AEGIS-256X4 MAC with 128-bit tags +pub const Aegis256X4Mac_128 = AegisMac(Aegis256X4); + +/// AEGIS-256X2 MAC with 128-bit tags +pub const Aegis256X2Mac_128 = AegisMac(Aegis256X2); + +/// AEGIS-256 MAC with 128-bit tags pub const Aegis256Mac_128 = AegisMac(Aegis256); fn AegisMac(comptime T: type) type { return struct { - const Self = @This(); + const Mac = @This(); pub const mac_length = T.tag_length; pub const key_length = T.key_length; @@ -448,15 +627,15 @@ fn AegisMac(comptime T: type) type { msg_len: usize = 0, /// Initialize a state for the MAC function - pub fn init(key: *const [key_length]u8) Self { + pub fn init(key: *const [key_length]u8) Mac { const nonce = [_]u8{0} ** T.nonce_length; - return Self{ + return Mac{ .state = T.State.init(key.*, nonce), }; } /// Add data to the state - pub fn update(self: *Self, b: []const u8) void { + pub fn update(self: *Mac, b: []const u8) void { self.msg_len += b.len; const len_partial = @min(b.len, block_length - self.off); @@ -469,6 +648,10 @@ fn AegisMac(comptime T: type) type { var i = len_partial; self.off = 0; + while (i + block_length * 2 <= b.len) : (i += block_length * 2) { + self.state.absorb(b[i..][0..block_length]); + self.state.absorb(b[i..][block_length .. block_length * 2]); + } while (i + block_length <= b.len) : (i += block_length) { self.state.absorb(b[i..][0..block_length]); } @@ -479,7 +662,7 @@ fn AegisMac(comptime T: type) type { } /// Return an authentication tag for the current state - pub fn final(self: *Self, out: *[mac_length]u8) void { + pub fn final(self: *Mac, out: *[mac_length]u8) void { if (self.off > 0) { var pad = [_]u8{0} ** block_length; @memcpy(pad[0..self.off], self.buf[0..self.off]); @@ -490,20 +673,20 @@ fn AegisMac(comptime T: type) type { /// Return an authentication tag for a message and a key pub fn create(out: *[mac_length]u8, msg: []const u8, key: *const [key_length]u8) void { - var ctx = Self.init(key); + var ctx = Mac.init(key); ctx.update(msg); ctx.final(out); } pub const Error = error{}; - pub const Writer = std.io.Writer(*Self, Error, write); + pub const Writer = std.io.Writer(*Mac, Error, write); - fn write(self: *Self, bytes: []const u8) Error!usize { + fn write(self: *Mac, bytes: []const u8) Error!usize { self.update(bytes); return bytes.len; } - pub fn writer(self: *Self) Writer { + pub fn writer(self: *Mac) Writer { return .{ .context = self }; } }; @@ -568,6 +751,23 @@ test "Aegis128L test vector 3" { try htest.assertEqual("83cc600dc4e3e7e62d4055826174f149", &tag); } +test "Aegis128X2 test vector 1" { + const key: [Aegis128X2.key_length]u8 = [_]u8{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }; + const nonce: [Aegis128X2.nonce_length]u8 = [_]u8{ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; + var empty = [_]u8{}; + var tag: [Aegis128X2.tag_length]u8 = undefined; + var tag256: [Aegis128X2_256.tag_length]u8 = undefined; + + Aegis128X2.encrypt(&empty, &tag, &empty, &empty, nonce, key); + Aegis128X2_256.encrypt(&empty, &tag256, &empty, &empty, nonce, key); + try htest.assertEqual("63117dc57756e402819a82e13eca8379", &tag); + try htest.assertEqual("b92c71fdbd358b8a4de70b27631ace90cffd9b9cfba82028412bac41b4f53759", &tag256); + tag[0] +%= 1; + try testing.expectError(error.AuthenticationFailed, Aegis128X2.decrypt(&empty, &empty, tag, &empty, nonce, key)); + tag256[0] +%= 1; + try testing.expectError(error.AuthenticationFailed, Aegis128X2_256.decrypt(&empty, &empty, tag256, &empty, nonce, key)); +} + test "Aegis256 test vector 1" { const key: [Aegis256.key_length]u8 = [_]u8{ 0x10, 0x01 } ++ [_]u8{0x00} ** 30; const nonce: [Aegis256.nonce_length]u8 = [_]u8{ 0x10, 0x00, 0x02 } ++ [_]u8{0x00} ** 29; @@ -624,6 +824,23 @@ test "Aegis256 test vector 3" { try htest.assertEqual("f7a0878f68bd083e8065354071fc27c3", &tag); } +test "Aegis256X4 test vector 1" { + const key: [Aegis256X4.key_length]u8 = [_]u8{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; + const nonce: [Aegis256X4.nonce_length]u8 = [_]u8{ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f }; + var empty = [_]u8{}; + var tag: [Aegis256X4.tag_length]u8 = undefined; + var tag256: [Aegis256X4_256.tag_length]u8 = undefined; + + Aegis256X4.encrypt(&empty, &tag, &empty, &empty, nonce, key); + Aegis256X4_256.encrypt(&empty, &tag256, &empty, &empty, nonce, key); + try htest.assertEqual("3b7fee6cee7bf17888ad11ed2397beb4", &tag); + try htest.assertEqual("6093a1a8aab20ec635dc1ca71745b01b5bec4fc444c9ffbebd710d4a34d20eaf", &tag256); + tag[0] +%= 1; + try testing.expectError(error.AuthenticationFailed, Aegis256X4.decrypt(&empty, &empty, tag, &empty, nonce, key)); + tag256[0] +%= 1; + try testing.expectError(error.AuthenticationFailed, Aegis256X4_256.decrypt(&empty, &empty, tag256, &empty, nonce, key)); +} + test "Aegis MAC" { const key = [_]u8{0x00} ** Aegis128LMac.key_length; var msg: [64]u8 = undefined; diff --git a/lib/std/crypto/aes.zig b/lib/std/crypto/aes.zig index 5e5ae04b58..d14b82c937 100644 --- a/lib/std/crypto/aes.zig +++ b/lib/std/crypto/aes.zig @@ -22,6 +22,7 @@ pub const has_hardware_support = (builtin.cpu.arch == .aarch64 and has_armaes); pub const Block = impl.Block; +pub const BlockVec = impl.BlockVec; pub const AesEncryptCtx = impl.AesEncryptCtx; pub const AesDecryptCtx = impl.AesDecryptCtx; pub const Aes128 = impl.Aes128; diff --git a/lib/std/crypto/aes/aesni.zig b/lib/std/crypto/aes/aesni.zig index e0893cfba8..2793ff4184 100644 --- a/lib/std/crypto/aes/aesni.zig +++ b/lib/std/crypto/aes/aesni.zig @@ -2,18 +2,23 @@ const std = @import("../../std.zig"); const builtin = @import("builtin"); const mem = std.mem; const debug = std.debug; -const BlockVec = @Vector(2, u64); + +const has_vaes = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .vaes); +const has_avx512f = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f); /// A single AES block. pub const Block = struct { + const Repr = @Vector(2, u64); + + /// The length of an AES block in bytes. pub const block_length: usize = 16; /// Internal representation of a block. - repr: BlockVec, + repr: Repr, /// Convert a byte sequence into an internal representation. pub inline fn fromBytes(bytes: *const [16]u8) Block { - const repr = mem.bytesToValue(BlockVec, bytes); + const repr = mem.bytesToValue(Repr, bytes); return Block{ .repr = repr }; } @@ -33,7 +38,7 @@ pub const Block = struct { return Block{ .repr = asm ( \\ vaesenc %[rk], %[in], %[out] - : [out] "=x" (-> BlockVec), + : [out] "=x" (-> Repr), : [in] "x" (block.repr), [rk] "x" (round_key.repr), ), @@ -45,7 +50,7 @@ pub const Block = struct { return Block{ .repr = asm ( \\ vaesenclast %[rk], %[in], %[out] - : [out] "=x" (-> BlockVec), + : [out] "=x" (-> Repr), : [in] "x" (block.repr), [rk] "x" (round_key.repr), ), @@ -57,7 +62,7 @@ pub const Block = struct { return Block{ .repr = asm ( \\ vaesdec %[rk], %[in], %[out] - : [out] "=x" (-> BlockVec), + : [out] "=x" (-> Repr), : [in] "x" (block.repr), [rk] "x" (inv_round_key.repr), ), @@ -69,7 +74,7 @@ pub const Block = struct { return Block{ .repr = asm ( \\ vaesdeclast %[rk], %[in], %[out] - : [out] "=x" (-> BlockVec), + : [out] "=x" (-> Repr), : [in] "x" (block.repr), [rk] "x" (inv_round_key.repr), ), @@ -168,17 +173,158 @@ pub const Block = struct { }; }; +/// A fixed-size vector of AES blocks. +/// All operations are performed in parallel, using SIMD instructions when available. +pub fn BlockVec(comptime blocks_count: comptime_int) type { + return struct { + const Self = @This(); + + /// The number of AES blocks the target architecture can process with a single instruction. + pub const native_vector_size = w: { + if (has_avx512f and blocks_count % 4 == 0) break :w 4; + if (has_vaes and blocks_count % 2 == 0) break :w 2; + break :w 1; + }; + + /// The size of the AES block vector that the target architecture can process with a single instruction, in bytes. + pub const native_word_size = native_vector_size * 16; + + const native_words = blocks_count / native_vector_size; + + const Repr = @Vector(native_vector_size * 2, u64); + + /// Internal representation of a block vector. + repr: [native_words]Repr, + + /// Length of the block vector in bytes. + pub const block_length: usize = blocks_count * 16; + + /// Convert a byte sequence into an internal representation. + pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = mem.bytesToValue(Repr, bytes[i * native_word_size ..][0..native_word_size]); + } + return out; + } + + /// Convert the internal representation of a block vector into a byte sequence. + pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 { + var out: [blocks_count * 16]u8 = undefined; + inline for (0..native_words) |i| { + out[i * native_word_size ..][0..native_word_size].* = mem.toBytes(block_vec.repr[i]); + } + return out; + } + + /// XOR the block vector with a byte sequence. + pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [blocks_count * 16]u8 { + var x: Self = undefined; + inline for (0..native_words) |i| { + x.repr[i] = block_vec.repr[i] ^ mem.bytesToValue(Repr, bytes[i * native_word_size ..][0..native_word_size]); + } + return x.toBytes(); + } + + /// Apply the forward AES operation to the block vector with a vector of round keys. + pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = asm ( + \\ vaesenc %[rk], %[in], %[out] + : [out] "=x" (-> Repr), + : [in] "x" (block_vec.repr[i]), + [rk] "x" (round_key_vec.repr[i]), + ); + } + return out; + } + + /// Apply the forward AES operation to the block vector with a vector of last round keys. + pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = asm ( + \\ vaesenclast %[rk], %[in], %[out] + : [out] "=x" (-> Repr), + : [in] "x" (block_vec.repr[i]), + [rk] "x" (round_key_vec.repr[i]), + ); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of round keys. + pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = asm ( + \\ vaesdec %[rk], %[in], %[out] + : [out] "=x" (-> Repr), + : [in] "x" (block_vec.repr[i]), + [rk] "x" (inv_round_key_vec.repr[i]), + ); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of last round keys. + pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = asm ( + \\ vaesdeclast %[rk], %[in], %[out] + : [out] "=x" (-> Repr), + : [in] "x" (block_vec.repr[i]), + [rk] "x" (inv_round_key_vec.repr[i]), + ); + } + return out; + } + + /// Apply the bitwise XOR operation to the content of two block vectors. + pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i] ^ block_vec2.repr[i]; + } + return out; + } + + /// Apply the bitwise AND operation to the content of two block vectors. + pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i] & block_vec2.repr[i]; + } + return out; + } + + /// Apply the bitwise OR operation to the content of two block vectors. + pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i] | block_vec2.repr[i]; + } + return out; + } + }; +} + fn KeySchedule(comptime Aes: type) type { std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14); const rounds = Aes.rounds; return struct { const Self = @This(); + + const Repr = Aes.block.Repr; + round_keys: [rounds + 1]Block, - fn drc(comptime second: bool, comptime rc: u8, t: BlockVec, tx: BlockVec) BlockVec { - var s: BlockVec = undefined; - var ts: BlockVec = undefined; + fn drc(comptime second: bool, comptime rc: u8, t: Repr, tx: Repr) Repr { + var s: Repr = undefined; + var ts: Repr = undefined; return asm ( \\ vaeskeygenassist %[rc], %[t], %[s] \\ vpslldq $4, %[tx], %[ts] @@ -187,7 +333,7 @@ fn KeySchedule(comptime Aes: type) type { \\ vpxor %[ts], %[r], %[r] \\ vpshufd %[mask], %[s], %[ts] \\ vpxor %[ts], %[r], %[r] - : [r] "=&x" (-> BlockVec), + : [r] "=&x" (-> Repr), [s] "=&x" (s), [ts] "=&x" (ts), : [rc] "n" (rc), @@ -234,7 +380,7 @@ fn KeySchedule(comptime Aes: type) type { inv_round_keys[i] = Block{ .repr = asm ( \\ vaesimc %[rk], %[inv_rk] - : [inv_rk] "=x" (-> BlockVec), + : [inv_rk] "=x" (-> Repr), : [rk] "x" (round_keys[rounds - i].repr), ), }; diff --git a/lib/std/crypto/aes/armcrypto.zig b/lib/std/crypto/aes/armcrypto.zig index a6574c372a..2487ab7e72 100644 --- a/lib/std/crypto/aes/armcrypto.zig +++ b/lib/std/crypto/aes/armcrypto.zig @@ -1,18 +1,19 @@ const std = @import("../../std.zig"); const mem = std.mem; const debug = std.debug; -const BlockVec = @Vector(2, u64); /// A single AES block. pub const Block = struct { + const Repr = @Vector(2, u64); + pub const block_length: usize = 16; /// Internal representation of a block. - repr: BlockVec, + repr: Repr, /// Convert a byte sequence into an internal representation. pub inline fn fromBytes(bytes: *const [16]u8) Block { - const repr = mem.bytesToValue(BlockVec, bytes); + const repr = mem.bytesToValue(Repr, bytes); return Block{ .repr = repr }; } @@ -36,7 +37,7 @@ pub const Block = struct { \\ mov %[out].16b, %[in].16b \\ aese %[out].16b, %[zero].16b \\ aesmc %[out].16b, %[out].16b - : [out] "=&x" (-> BlockVec), + : [out] "=&x" (-> Repr), : [in] "x" (block.repr), [zero] "x" (zero), )) ^ round_key.repr, @@ -49,7 +50,7 @@ pub const Block = struct { .repr = (asm ( \\ mov %[out].16b, %[in].16b \\ aese %[out].16b, %[zero].16b - : [out] "=&x" (-> BlockVec), + : [out] "=&x" (-> Repr), : [in] "x" (block.repr), [zero] "x" (zero), )) ^ round_key.repr, @@ -63,7 +64,7 @@ pub const Block = struct { \\ mov %[out].16b, %[in].16b \\ aesd %[out].16b, %[zero].16b \\ aesimc %[out].16b, %[out].16b - : [out] "=&x" (-> BlockVec), + : [out] "=&x" (-> Repr), : [in] "x" (block.repr), [zero] "x" (zero), )) ^ inv_round_key.repr, @@ -76,7 +77,7 @@ pub const Block = struct { .repr = (asm ( \\ mov %[out].16b, %[in].16b \\ aesd %[out].16b, %[zero].16b - : [out] "=&x" (-> BlockVec), + : [out] "=&x" (-> Repr), : [in] "x" (block.repr), [zero] "x" (zero), )) ^ inv_round_key.repr, @@ -165,6 +166,118 @@ pub const Block = struct { }; }; +/// A fixed-size vector of AES blocks. +/// All operations are performed in parallel, using SIMD instructions when available. +pub fn BlockVec(comptime blocks_count: comptime_int) type { + return struct { + const Self = @This(); + + /// The number of AES blocks the target architecture can process with a single instruction. + pub const native_vector_size = 1; + + /// The size of the AES block vector that the target architecture can process with a single instruction, in bytes. + pub const native_word_size = native_vector_size * 16; + + const native_words = blocks_count; + + /// Internal representation of a block vector. + repr: [native_words]Block, + + /// Length of the block vector in bytes. + pub const block_length: usize = blocks_count * 16; + + /// Convert a byte sequence into an internal representation. + pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = Block.fromBytes(bytes[i * native_word_size ..][0..native_word_size]); + } + return out; + } + + /// Convert the internal representation of a block vector into a byte sequence. + pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 { + var out: [blocks_count * 16]u8 = undefined; + inline for (0..native_words) |i| { + out[i * native_word_size ..][0..native_word_size].* = block_vec.repr[i].toBytes(); + } + return out; + } + + /// XOR the block vector with a byte sequence. + pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [32]u8 { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].xorBytes(bytes[i * native_word_size ..][0..native_word_size]); + } + return out; + } + + /// Apply the forward AES operation to the block vector with a vector of round keys. + pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].encrypt(round_key_vec.repr[i]); + } + return out; + } + + /// Apply the forward AES operation to the block vector with a vector of last round keys. + pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].encryptLast(round_key_vec.repr[i]); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of round keys. + pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].decrypt(inv_round_key_vec.repr[i]); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of last round keys. + pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].decryptLast(inv_round_key_vec.repr[i]); + } + return out; + } + + /// Apply the bitwise XOR operation to the content of two block vectors. + pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].xorBlocks(block_vec2.repr[i]); + } + return out; + } + + /// Apply the bitwise AND operation to the content of two block vectors. + pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].andBlocks(block_vec2.repr[i]); + } + return out; + } + + /// Apply the bitwise OR operation to the content of two block vectors. + pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].orBlocks(block_vec2.repr[i]); + } + return out; + } + }; +} + fn KeySchedule(comptime Aes: type) type { std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14); const rounds = Aes.rounds; @@ -172,17 +285,19 @@ fn KeySchedule(comptime Aes: type) type { return struct { const Self = @This(); + const Repr = Aes.block.Repr; + const zero = @Vector(2, u64){ 0, 0 }; const mask1 = @Vector(16, u8){ 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 }; const mask2 = @Vector(16, u8){ 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 }; round_keys: [rounds + 1]Block, - fn drc128(comptime rc: u8, t: BlockVec) BlockVec { - var v1: BlockVec = undefined; - var v2: BlockVec = undefined; - var v3: BlockVec = undefined; - var v4: BlockVec = undefined; + fn drc128(comptime rc: u8, t: Repr) Repr { + var v1: Repr = undefined; + var v2: Repr = undefined; + var v3: Repr = undefined; + var v4: Repr = undefined; return asm ( \\ movi %[v2].4s, %[rc] @@ -196,7 +311,7 @@ fn KeySchedule(comptime Aes: type) type { \\ eor %[v1].16b, %[v1].16b, %[r].16b \\ eor %[r].16b, %[v1].16b, %[v3].16b \\ eor %[r].16b, %[r].16b, %[v4].16b - : [r] "=&x" (-> BlockVec), + : [r] "=&x" (-> Repr), [v1] "=&x" (v1), [v2] "=&x" (v2), [v3] "=&x" (v3), @@ -208,11 +323,11 @@ fn KeySchedule(comptime Aes: type) type { ); } - fn drc256(comptime second: bool, comptime rc: u8, t: BlockVec, tx: BlockVec) BlockVec { - var v1: BlockVec = undefined; - var v2: BlockVec = undefined; - var v3: BlockVec = undefined; - var v4: BlockVec = undefined; + fn drc256(comptime second: bool, comptime rc: u8, t: Repr, tx: Repr) Repr { + var v1: Repr = undefined; + var v2: Repr = undefined; + var v3: Repr = undefined; + var v4: Repr = undefined; return asm ( \\ movi %[v2].4s, %[rc] @@ -226,7 +341,7 @@ fn KeySchedule(comptime Aes: type) type { \\ eor %[v1].16b, %[v1].16b, %[v2].16b \\ eor %[v1].16b, %[v1].16b, %[v3].16b \\ eor %[r].16b, %[v1].16b, %[v4].16b - : [r] "=&x" (-> BlockVec), + : [r] "=&x" (-> Repr), [v1] "=&x" (v1), [v2] "=&x" (v2), [v3] "=&x" (v3), @@ -276,7 +391,7 @@ fn KeySchedule(comptime Aes: type) type { inv_round_keys[i] = Block{ .repr = asm ( \\ aesimc %[inv_rk].16b, %[rk].16b - : [inv_rk] "=x" (-> BlockVec), + : [inv_rk] "=x" (-> Repr), : [rk] "x" (round_keys[rounds - i].repr), ), }; diff --git a/lib/std/crypto/aes/soft.zig b/lib/std/crypto/aes/soft.zig index 8430a3af7e..7f3d298a3a 100644 --- a/lib/std/crypto/aes/soft.zig +++ b/lib/std/crypto/aes/soft.zig @@ -2,16 +2,16 @@ const std = @import("../../std.zig"); const math = std.math; const mem = std.mem; -const BlockVec = [4]u32; - const side_channels_mitigations = std.options.side_channels_mitigations; /// A single AES block. pub const Block = struct { + const Repr = [4]u32; + pub const block_length: usize = 16; /// Internal representation of a block. - repr: BlockVec align(16), + repr: Repr align(16), /// Convert a byte sequence into an internal representation. pub inline fn fromBytes(bytes: *const [16]u8) Block { @@ -19,7 +19,7 @@ pub const Block = struct { const s1 = mem.readInt(u32, bytes[4..8], .little); const s2 = mem.readInt(u32, bytes[8..12], .little); const s3 = mem.readInt(u32, bytes[12..16], .little); - return Block{ .repr = BlockVec{ s0, s1, s2, s3 } }; + return Block{ .repr = Repr{ s0, s1, s2, s3 } }; } /// Convert the internal representation of a block into a byte sequence. @@ -65,7 +65,7 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Encrypt a block with a round key *WITHOUT ANY PROTECTION AGAINST SIDE CHANNELS* @@ -110,7 +110,7 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Encrypt a block with the last round key. @@ -136,7 +136,7 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Decrypt a block with a round key. @@ -161,7 +161,7 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Decrypt a block with a round key *WITHOUT ANY PROTECTION AGAINST SIDE CHANNELS* @@ -206,7 +206,7 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Decrypt a block with the last round key. @@ -232,12 +232,12 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Apply the bitwise XOR operation to the content of two blocks. pub inline fn xorBlocks(block1: Block, block2: Block) Block { - var x: BlockVec = undefined; + var x: Repr = undefined; comptime var i = 0; inline while (i < 4) : (i += 1) { x[i] = block1.repr[i] ^ block2.repr[i]; @@ -247,7 +247,7 @@ pub const Block = struct { /// Apply the bitwise AND operation to the content of two blocks. pub inline fn andBlocks(block1: Block, block2: Block) Block { - var x: BlockVec = undefined; + var x: Repr = undefined; comptime var i = 0; inline while (i < 4) : (i += 1) { x[i] = block1.repr[i] & block2.repr[i]; @@ -257,7 +257,7 @@ pub const Block = struct { /// Apply the bitwise OR operation to the content of two blocks. pub inline fn orBlocks(block1: Block, block2: Block) Block { - var x: BlockVec = undefined; + var x: Repr = undefined; comptime var i = 0; inline while (i < 4) : (i += 1) { x[i] = block1.repr[i] | block2.repr[i]; @@ -332,6 +332,118 @@ pub const Block = struct { }; }; +/// A fixed-size vector of AES blocks. +/// All operations are performed in parallel, using SIMD instructions when available. +pub fn BlockVec(comptime blocks_count: comptime_int) type { + return struct { + const Self = @This(); + + /// The number of AES blocks the target architecture can process with a single instruction. + pub const native_vector_size = 1; + + /// The size of the AES block vector that the target architecture can process with a single instruction, in bytes. + pub const native_word_size = native_vector_size * 16; + + const native_words = blocks_count; + + /// Internal representation of a block vector. + repr: [native_words]Block, + + /// Length of the block vector in bytes. + pub const block_length: usize = blocks_count * 16; + + /// Convert a byte sequence into an internal representation. + pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = Block.fromBytes(bytes[i * native_word_size ..][0..native_word_size]); + } + return out; + } + + /// Convert the internal representation of a block vector into a byte sequence. + pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 { + var out: [blocks_count * 16]u8 = undefined; + for (0..native_words) |i| { + out[i * native_word_size ..][0..native_word_size].* = block_vec.repr[i].toBytes(); + } + return out; + } + + /// XOR the block vector with a byte sequence. + pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [32]u8 { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].xorBytes(bytes[i * native_word_size ..][0..native_word_size]); + } + return out; + } + + /// Apply the forward AES operation to the block vector with a vector of round keys. + pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].encrypt(round_key_vec.repr[i]); + } + return out; + } + + /// Apply the forward AES operation to the block vector with a vector of last round keys. + pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].encryptLast(round_key_vec.repr[i]); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of round keys. + pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].decrypt(inv_round_key_vec.repr[i]); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of last round keys. + pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].decryptLast(inv_round_key_vec.repr[i]); + } + return out; + } + + /// Apply the bitwise XOR operation to the content of two block vectors. + pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].xorBlocks(block_vec2.repr[i]); + } + return out; + } + + /// Apply the bitwise AND operation to the content of two block vectors. + pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].andBlocks(block_vec2.repr[i]); + } + return out; + } + + /// Apply the bitwise OR operation to the content of two block vectors. + pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].orBlocks(block_vec2.repr[i]); + } + return out; + } + }; +} + fn KeySchedule(comptime Aes: type) type { std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14); const key_length = Aes.key_bits / 8; @@ -671,7 +783,7 @@ fn mul(a: u8, b: u8) u8 { const cache_line_bytes = std.atomic.cache_line; -inline fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u8 { +fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u8 { if (side_channels_mitigations == .none) { return [4]u8{ sbox[idx0], @@ -709,7 +821,7 @@ inline fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: } } -inline fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u32 { +fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u32 { if (side_channels_mitigations == .none) { return [4]u32{ table[0][idx0], @@ -718,17 +830,18 @@ inline fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, table[3][idx3], }; } else { + const table_len: usize = 256; const stride = switch (side_channels_mitigations) { .none => unreachable, - .basic => table[0].len / 4, - .medium => @max(1, @min(table[0].len, 2 * cache_line_bytes / 4)), - .full => @max(1, @min(table[0].len, cache_line_bytes / 4)), + .basic => table_len / 4, + .medium => @max(1, @min(table_len, 2 * cache_line_bytes / 4)), + .full => @max(1, @min(table_len, cache_line_bytes / 4)), }; const of0 = idx0 % stride; const of1 = idx1 % stride; const of2 = idx2 % stride; const of3 = idx3 % stride; - var t: [4][table[0].len / stride]u32 align(64) = undefined; + var t: [4][table_len / stride]u32 align(64) = undefined; var i: usize = 0; while (i < t[0].len) : (i += 1) { const tx = table[0][i * stride ..]; diff --git a/lib/std/crypto/bcrypt.zig b/lib/std/crypto/bcrypt.zig index f3c30ab5ce..308cd1a42e 100644 --- a/lib/std/crypto/bcrypt.zig +++ b/lib/std/crypto/bcrypt.zig @@ -563,15 +563,57 @@ const pbkdf_prf = struct { }; /// bcrypt-pbkdf is a key derivation function based on bcrypt. -/// This is the function used in OpenSSH to derive encryption keys from passphrases. -/// -/// This implementation is compatible with the OpenBSD implementation (https://github.com/openbsd/src/blob/master/lib/libutil/bcrypt_pbkdf.c). /// /// Unlike the password hashing function `bcrypt`, this function doesn't silently truncate passwords longer than 72 bytes. pub fn pbkdf(pass: []const u8, salt: []const u8, key: []u8, rounds: u32) !void { try crypto.pwhash.pbkdf2(key, pass, salt, rounds, pbkdf_prf); } +/// The function used in OpenSSH to derive encryption keys from passphrases. +/// +/// This implementation is compatible with the OpenBSD implementation (https://github.com/openbsd/src/blob/master/lib/libutil/bcrypt_pbkdf.c). +pub fn opensshKdf(pass: []const u8, salt: []const u8, key: []u8, rounds: u32) !void { + var tmp: [32]u8 = undefined; + var tmp2: [32]u8 = undefined; + if (rounds < 1 or pass.len == 0 or salt.len == 0 or key.len == 0 or key.len > tmp.len * tmp.len) { + return error.InvalidInput; + } + var sha2pass: [Sha512.digest_length]u8 = undefined; + Sha512.hash(pass, &sha2pass, .{}); + const stride = (key.len + tmp.len - 1) / tmp.len; + var amt = (key.len + stride - 1) / stride; + if (math.shr(usize, key.len, 32) >= amt) { + return error.InvalidInput; + } + var key_remainder = key.len; + var count: u32 = 1; + while (key_remainder > 0) : (count += 1) { + var count_salt: [4]u8 = undefined; + std.mem.writeInt(u32, count_salt[0..], count, .big); + var sha2salt: [Sha512.digest_length]u8 = undefined; + var h = Sha512.init(.{}); + h.update(salt); + h.update(&count_salt); + h.final(&sha2salt); + tmp2 = pbkdf_prf.hash(sha2pass, sha2salt); + tmp = tmp2; + for (1..rounds) |_| { + Sha512.hash(&tmp2, &sha2salt, .{}); + tmp2 = pbkdf_prf.hash(sha2pass, sha2salt); + for (&tmp, tmp2) |*o, t| o.* ^= t; + } + amt = @min(amt, key_remainder); + key_remainder -= for (0..amt) |i| { + const dest = i * stride + (count - 1); + if (dest >= key.len) break i; + key[dest] = tmp[i]; + } else amt; + } + crypto.secureZero(u8, &tmp); + crypto.secureZero(u8, &tmp2); + crypto.secureZero(u8, &sha2pass); +} + const crypt_format = struct { /// String prefix for bcrypt pub const prefix = "$2"; @@ -847,3 +889,13 @@ test "bcrypt phc format" { verify_options, ); } + +test "openssh kdf" { + var key: [100]u8 = undefined; + const pass = "password"; + const salt = "salt"; + const rounds = 5; + try opensshKdf(pass, salt, &key, rounds); + const expected = [_]u8{ 65, 207, 68, 58, 55, 252, 114, 141, 255, 65, 216, 175, 5, 92, 235, 68, 220, 92, 118, 161, 40, 13, 241, 190, 56, 152, 69, 136, 41, 214, 51, 205, 37, 221, 101, 59, 105, 73, 133, 36, 14, 59, 94, 212, 111, 107, 109, 237, 213, 235, 246, 119, 59, 76, 45, 130, 142, 81, 178, 231, 161, 158, 138, 108, 18, 162, 26, 50, 218, 251, 23, 66, 2, 232, 20, 202, 216, 46, 12, 250, 247, 246, 252, 23, 155, 74, 77, 195, 120, 113, 57, 88, 126, 81, 9, 249, 72, 18, 208, 160 }; + try testing.expectEqualSlices(u8, &key, &expected); +} diff --git a/lib/std/crypto/benchmark.zig b/lib/std/crypto/benchmark.zig index 8bb651f73b..c3dcd9b8cb 100644 --- a/lib/std/crypto/benchmark.zig +++ b/lib/std/crypto/benchmark.zig @@ -72,6 +72,10 @@ const macs = [_]Crypto{ Crypto{ .ty = crypto.auth.siphash.SipHash64(1, 3), .name = "siphash-1-3" }, Crypto{ .ty = crypto.auth.siphash.SipHash128(2, 4), .name = "siphash128-2-4" }, Crypto{ .ty = crypto.auth.siphash.SipHash128(1, 3), .name = "siphash128-1-3" }, + Crypto{ .ty = crypto.auth.aegis.Aegis128X4Mac, .name = "aegis-128x4 mac" }, + Crypto{ .ty = crypto.auth.aegis.Aegis256X4Mac, .name = "aegis-256x4 mac" }, + Crypto{ .ty = crypto.auth.aegis.Aegis128X2Mac, .name = "aegis-128x2 mac" }, + Crypto{ .ty = crypto.auth.aegis.Aegis256X2Mac, .name = "aegis-256x2 mac" }, Crypto{ .ty = crypto.auth.aegis.Aegis128LMac, .name = "aegis-128l mac" }, Crypto{ .ty = crypto.auth.aegis.Aegis256Mac, .name = "aegis-256 mac" }, Crypto{ .ty = crypto.auth.cmac.CmacAes128, .name = "aes-cmac" }, @@ -283,7 +287,11 @@ const aeads = [_]Crypto{ Crypto{ .ty = crypto.aead.chacha_poly.XChaCha20Poly1305, .name = "xchacha20Poly1305" }, Crypto{ .ty = crypto.aead.chacha_poly.XChaCha8Poly1305, .name = "xchacha8Poly1305" }, Crypto{ .ty = crypto.aead.salsa_poly.XSalsa20Poly1305, .name = "xsalsa20Poly1305" }, + Crypto{ .ty = crypto.aead.aegis.Aegis128X4, .name = "aegis-128x4" }, + Crypto{ .ty = crypto.aead.aegis.Aegis128X2, .name = "aegis-128x2" }, Crypto{ .ty = crypto.aead.aegis.Aegis128L, .name = "aegis-128l" }, + Crypto{ .ty = crypto.aead.aegis.Aegis256X4, .name = "aegis-256x4" }, + Crypto{ .ty = crypto.aead.aegis.Aegis256X2, .name = "aegis-256x2" }, Crypto{ .ty = crypto.aead.aegis.Aegis256, .name = "aegis-256" }, Crypto{ .ty = crypto.aead.aes_gcm.Aes128Gcm, .name = "aes128-gcm" }, Crypto{ .ty = crypto.aead.aes_gcm.Aes256Gcm, .name = "aes256-gcm" }, diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 982e71bc35..0756e456ff 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1531,9 +1531,9 @@ pub fn ConfigurableTrace(comptime size: usize, comptime stack_frame_count: usize } pub const SafetyLock = struct { - state: State = .unlocked, + state: State = if (runtime_safety) .unlocked else .unknown, - pub const State = if (runtime_safety) enum { unlocked, locked } else enum { unlocked }; + pub const State = if (runtime_safety) enum { unlocked, locked } else enum { unknown }; pub fn lock(l: *SafetyLock) void { if (!runtime_safety) return; @@ -1551,8 +1551,22 @@ pub const SafetyLock = struct { if (!runtime_safety) return; assert(l.state == .unlocked); } + + pub fn assertLocked(l: SafetyLock) void { + if (!runtime_safety) return; + assert(l.state == .locked); + } }; +test SafetyLock { + var safety_lock: SafetyLock = .{}; + safety_lock.assertUnlocked(); + safety_lock.lock(); + safety_lock.assertLocked(); + safety_lock.unlock(); + safety_lock.assertUnlocked(); +} + /// Detect whether the program is being executed in the Valgrind virtual machine. /// /// When Valgrind integrations are disabled, this returns comptime-known false. diff --git a/lib/std/debug/MemoryAccessor.zig b/lib/std/debug/MemoryAccessor.zig index bfdda609f6..a420d9cdcf 100644 --- a/lib/std/debug/MemoryAccessor.zig +++ b/lib/std/debug/MemoryAccessor.zig @@ -48,7 +48,8 @@ fn read(ma: *MemoryAccessor, address: usize, buf: []u8) bool { switch (linux.E.init(bytes_read)) { .SUCCESS => return bytes_read == buf.len, .FAULT => return false, - .INVAL, .PERM, .SRCH => unreachable, // own pid is always valid + .INVAL, .SRCH => unreachable, // own pid is always valid + .PERM => {}, // Known to happen in containers. .NOMEM => {}, .NOSYS => {}, // QEMU is known not to implement this syscall. else => unreachable, // unexpected diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig index eba72721f9..2f16d849b0 100644 --- a/lib/std/fmt.zig +++ b/lib/std/fmt.zig @@ -224,7 +224,6 @@ pub const Placeholder = struct { pub fn parse(comptime str: anytype) Placeholder { const view = std.unicode.Utf8View.initComptime(&str); comptime var parser = Parser{ - .buf = &str, .iter = view.iterator(), }; @@ -311,10 +310,13 @@ pub const Specifier = union(enum) { named: []const u8, }; +/// A stream based parser for format strings. +/// +/// Allows to implement formatters compatible with std.fmt without replicating +/// the standard library behavior. pub const Parser = struct { - buf: []const u8, pos: usize = 0, - iter: std.unicode.Utf8Iterator = undefined, + iter: std.unicode.Utf8Iterator, // Returns a decimal number or null if the current character is not a // digit diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index 9c436320b7..e6f51dc648 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -1692,7 +1692,7 @@ pub fn HashMapUnmanaged( } self.size = 0; - self.pointer_stability = .{ .state = .unlocked }; + self.pointer_stability = .{}; std.mem.swap(Self, self, &map); map.deinit(allocator); } diff --git a/lib/std/mem/Allocator.zig b/lib/std/mem/Allocator.zig index 0d4ab9141f..8aea197d6a 100644 --- a/lib/std/mem/Allocator.zig +++ b/lib/std/mem/Allocator.zig @@ -301,8 +301,9 @@ pub fn reallocAdvanced( return mem.bytesAsSlice(T, new_bytes); } -/// Free an array allocated with `alloc`. To free a single item, -/// see `destroy`. +/// Free an array allocated with `alloc`. +/// If memory has length 0, free is a no-op. +/// To free a single item, see `destroy`. pub fn free(self: Allocator, memory: anytype) void { const Slice = @typeInfo(@TypeOf(memory)).pointer; const bytes = mem.sliceAsBytes(memory); diff --git a/lib/std/meta.zig b/lib/std/meta.zig index 0ea83bb11e..44bfb65f8a 100644 --- a/lib/std/meta.zig +++ b/lib/std/meta.zig @@ -737,13 +737,15 @@ test TagPayload { try testing.expect(MovedEvent == @TypeOf(e.Moved)); } -/// Compares two of any type for equality. Containers are compared on a field-by-field basis, -/// where possible. Pointers are not followed. +/// Compares two of any type for equality. Containers that do not support comparison +/// on their own are compared on a field-by-field basis. Pointers are not followed. pub fn eql(a: anytype, b: @TypeOf(a)) bool { const T = @TypeOf(a); switch (@typeInfo(T)) { .@"struct" => |info| { + if (info.layout == .@"packed") return a == b; + inline for (info.fields) |field_info| { if (!eql(@field(a, field_info.name), @field(b, field_info.name))) return false; } diff --git a/lib/std/posix.zig b/lib/std/posix.zig index d46307dbdf..3ca5e1ae59 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -1817,6 +1817,7 @@ pub fn openatZ(dir_fd: fd_t, file_path: [*:0]const u8, flags: O, mode: mode_t) O .OPNOTSUPP => return error.FileLocksNotSupported, .AGAIN => return error.WouldBlock, .TXTBSY => return error.FileBusy, + .NXIO => return error.NoDevice, .ILSEQ => |err| if (native_os == .wasi) return error.InvalidUtf8 else diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index abec1f354a..a9b518357b 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -6024,7 +6024,7 @@ fn tryExpr( if (!parent_gz.is_comptime) { try emitDbgNode(parent_gz, node); } - const try_lc = LineColumn{ astgen.source_line - parent_gz.decl_line, astgen.source_column }; + const try_lc: LineColumn = .{ astgen.source_line - parent_gz.decl_line, astgen.source_column }; const operand_rl: ResultInfo.Loc, const block_tag: Zir.Inst.Tag = switch (ri.rl) { .ref => .{ .ref, .try_ptr }, @@ -6577,6 +6577,7 @@ fn whileExpr( const astgen = parent_gz.astgen; const tree = astgen.tree; const token_tags = tree.tokens.items(.tag); + const token_starts = tree.tokens.items(.start); const need_rl = astgen.nodes_need_rl.contains(node); const block_ri: ResultInfo = if (need_rl) ri else .{ @@ -6774,6 +6775,16 @@ fn whileExpr( try checkUsed(parent_gz, &then_scope.base, then_sub_scope); const break_tag: Zir.Inst.Tag = if (is_inline) .break_inline else .@"break"; if (!continue_scope.endsWithNoReturn()) { + astgen.advanceSourceCursor(token_starts[tree.lastToken(then_node)]); + try emitDbgStmt(parent_gz, .{ astgen.source_line - parent_gz.decl_line, astgen.source_column }); + _ = try parent_gz.add(.{ + .tag = .extended, + .data = .{ .extended = .{ + .opcode = .dbg_empty_stmt, + .small = undefined, + .operand = undefined, + } }, + }); _ = try continue_scope.addBreak(break_tag, continue_block, .void_value); } try continue_scope.setBlockBody(continue_block); @@ -6882,6 +6893,7 @@ fn forExpr( } const tree = astgen.tree; const token_tags = tree.tokens.items(.tag); + const token_starts = tree.tokens.items(.start); const node_tags = tree.nodes.items(.tag); const node_data = tree.nodes.items(.data); const gpa = astgen.gpa; @@ -7087,8 +7099,18 @@ fn forExpr( try checkUsed(parent_gz, &then_scope.base, then_sub_scope); - const break_tag: Zir.Inst.Tag = if (is_inline) .break_inline else .@"break"; + astgen.advanceSourceCursor(token_starts[tree.lastToken(then_node)]); + try emitDbgStmt(parent_gz, .{ astgen.source_line - parent_gz.decl_line, astgen.source_column }); + _ = try parent_gz.add(.{ + .tag = .extended, + .data = .{ .extended = .{ + .opcode = .dbg_empty_stmt, + .small = undefined, + .operand = undefined, + } }, + }); + const break_tag: Zir.Inst.Tag = if (is_inline) .break_inline else .@"break"; _ = try then_scope.addBreak(break_tag, cond_block, .void_value); var else_scope = parent_gz.makeSubBlock(&cond_scope.base); @@ -7135,6 +7157,7 @@ fn forExpr( .lhs = index_ptr, .rhs = index_plus_one, }); + const repeat_tag: Zir.Inst.Tag = if (is_inline) .repeat_inline else .repeat; _ = try loop_scope.addNode(repeat_tag, node); @@ -7279,7 +7302,7 @@ fn switchExprErrUnion( }; astgen.advanceSourceCursorToNode(operand_node); - const operand_lc = LineColumn{ astgen.source_line - parent_gz.decl_line, astgen.source_column }; + const operand_lc: LineColumn = .{ astgen.source_line - parent_gz.decl_line, astgen.source_column }; const raw_operand = try reachableExpr(parent_gz, scope, operand_ri, operand_node, switch_node); const item_ri: ResultInfo = .{ .rl = .none }; @@ -7868,7 +7891,7 @@ fn switchExpr( const operand_ri: ResultInfo = .{ .rl = if (any_payload_is_ref) .ref else .none }; astgen.advanceSourceCursorToNode(operand_node); - const operand_lc = LineColumn{ astgen.source_line - parent_gz.decl_line, astgen.source_column }; + const operand_lc: LineColumn = .{ astgen.source_line - parent_gz.decl_line, astgen.source_column }; const raw_operand = try expr(parent_gz, scope, operand_ri, operand_node); const item_ri: ResultInfo = .{ .rl = .none }; @@ -8214,7 +8237,7 @@ fn ret(gz: *GenZir, scope: *Scope, node: Ast.Node.Index) InnerError!Zir.Inst.Ref if (!gz.is_comptime) { try emitDbgNode(gz, node); } - const ret_lc = LineColumn{ astgen.source_line - gz.decl_line, astgen.source_column }; + const ret_lc: LineColumn = .{ astgen.source_line - gz.decl_line, astgen.source_column }; const defer_outer = &astgen.fn_block.?.base; diff --git a/lib/std/zig/Zir.zig b/lib/std/zig/Zir.zig index 00a48e21f7..f2c103f835 100644 --- a/lib/std/zig/Zir.zig +++ b/lib/std/zig/Zir.zig @@ -2088,6 +2088,8 @@ pub const Inst = struct { /// `operand` is `Zir.Inst.Ref` of the loaded LHS (*not* its type). /// `small` is an `Inst.InplaceOp`. inplace_arith_result_ty, + /// Marks a statement that can be stepped to but produces no code. + dbg_empty_stmt, pub const InstData = struct { opcode: Extended, @@ -4062,6 +4064,7 @@ fn findDeclsInner( .branch_hint, .inplace_arith_result_ty, .tuple_decl, + .dbg_empty_stmt, => return, // `@TypeOf` has a body. diff --git a/lib/zig.h b/lib/zig.h index 248bb8641c..14b50aea47 100644 --- a/lib/zig.h +++ b/lib/zig.h @@ -256,6 +256,8 @@ typedef char bool; #define zig_trap() __asm__ volatile("udf #0xfe") #elif defined(__arm__) || defined(__aarch64__) #define zig_trap() __asm__ volatile("udf #0xfdee") +#elif defined(__hexagon__) +#define zig_trap() __asm__ volatile("r27:26 = memd(#0xbadc0fee)") #elif defined(__loongarch__) || defined(__powerpc__) #define zig_trap() __asm__ volatile(".word 0x0") #elif defined(__mips__) @@ -280,6 +282,8 @@ typedef char bool; #define zig_breakpoint() __asm__ volatile("bkpt #0x0") #elif defined(__aarch64__) #define zig_breakpoint() __asm__ volatile("brk #0xf000") +#elif defined(__hexagon__) +#define zig_breakpoint() __asm__ volatile("brkpt") #elif defined(__loongarch__) #define zig_breakpoint() __asm__ volatile("break 0x0") #elif defined(__mips__) diff --git a/src/Air.zig b/src/Air.zig index 3aa5f317c0..4589bb1557 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -460,6 +460,8 @@ pub const Inst = struct { /// Result type is always void. /// Uses the `dbg_stmt` field. dbg_stmt, + /// Marks a statement that can be stepped to but produces no code. + dbg_empty_stmt, /// A block that represents an inlined function call. /// Uses the `ty_pl` field. Payload is `DbgInlineBlock`. dbg_inline_block, @@ -1468,6 +1470,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) .breakpoint, .dbg_stmt, + .dbg_empty_stmt, .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline, @@ -1629,6 +1632,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { .try_ptr, .try_ptr_cold, .dbg_stmt, + .dbg_empty_stmt, .dbg_inline_block, .dbg_var_ptr, .dbg_var_val, diff --git a/src/Air/types_resolved.zig b/src/Air/types_resolved.zig index 098cb29b22..cc866184e4 100644 --- a/src/Air/types_resolved.zig +++ b/src/Air/types_resolved.zig @@ -417,6 +417,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { .work_group_size, .work_group_id, .dbg_stmt, + .dbg_empty_stmt, .err_return_trace, .save_err_return_trace_index, .repeat, diff --git a/src/Compilation.zig b/src/Compilation.zig index a228d61257..3ad5c7932f 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -5884,7 +5884,9 @@ pub const FileExt = enum { }; pub fn hasObjectExt(filename: []const u8) bool { - return mem.endsWith(u8, filename, ".o") or mem.endsWith(u8, filename, ".obj"); + return mem.endsWith(u8, filename, ".o") or + mem.endsWith(u8, filename, ".lo") or + mem.endsWith(u8, filename, ".obj"); } pub fn hasStaticLibraryExt(filename: []const u8) bool { diff --git a/src/Liveness.zig b/src/Liveness.zig index b5bffc6a48..709844c0ac 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -334,6 +334,7 @@ pub fn categorizeOperand( .repeat, .switch_dispatch, .dbg_stmt, + .dbg_empty_stmt, .unreach, .ret_addr, .frame_addr, @@ -973,6 +974,7 @@ fn analyzeInst( .ret_ptr, .breakpoint, .dbg_stmt, + .dbg_empty_stmt, .ret_addr, .frame_addr, .wasm_memory_size, diff --git a/src/Liveness/Verify.zig b/src/Liveness/Verify.zig index aa2239793a..01e0842ded 100644 --- a/src/Liveness/Verify.zig +++ b/src/Liveness/Verify.zig @@ -56,6 +56,7 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { .ret_ptr, .breakpoint, .dbg_stmt, + .dbg_empty_stmt, .ret_addr, .frame_addr, .wasm_memory_size, diff --git a/src/Sema.zig b/src/Sema.zig index ceaff910ba..635c3437c0 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -1355,6 +1355,11 @@ fn analyzeBodyInner( .field_parent_ptr => try sema.zirFieldParentPtr(block, extended), .builtin_value => try sema.zirBuiltinValue(block, extended), .inplace_arith_result_ty => try sema.zirInplaceArithResultTy(extended), + .dbg_empty_stmt => { + try sema.zirDbgEmptyStmt(block, inst); + i += 1; + continue; + }, }; }, @@ -2584,18 +2589,7 @@ fn validateAlign( src: LazySrcLoc, alignment: u64, ) !Alignment { - const result = try validateAlignAllowZero(sema, block, src, alignment); - if (result == .none) return sema.fail(block, src, "alignment must be >= 1", .{}); - return result; -} - -fn validateAlignAllowZero( - sema: *Sema, - block: *Block, - src: LazySrcLoc, - alignment: u64, -) !Alignment { - if (alignment == 0) return .none; + if (alignment == 0) return sema.fail(block, src, "alignment must be >= 1", .{}); if (!std.math.isPowerOfTwo(alignment)) { return sema.fail(block, src, "alignment value '{d}' is not a power of two", .{ alignment, @@ -6682,6 +6676,11 @@ fn zirDbgStmt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!voi }); } +fn zirDbgEmptyStmt(_: *Sema, block: *Block, _: Zir.Inst.Index) CompileError!void { + if (block.is_comptime or block.ownerModule().strip) return; + _ = try block.addNoOp(.dbg_empty_stmt); +} + fn zirDbgVar( sema: *Sema, block: *Block, @@ -20542,7 +20541,7 @@ fn zirPtrType(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air else => {}, } const align_bytes = (try val.getUnsignedIntSema(pt)).?; - break :blk try sema.validateAlignAllowZero(block, align_src, align_bytes); + break :blk try sema.validateAlign(block, align_src, align_bytes); } else .none; const address_space: std.builtin.AddressSpace = if (inst_data.flags.has_addrspace) blk: { @@ -26904,7 +26903,7 @@ fn zirFuncFancy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A if (val.isGenericPoison()) { break :blk null; } - break :blk try sema.validateAlignAllowZero(block, align_src, try val.toUnsignedIntSema(pt)); + break :blk try sema.validateAlign(block, align_src, try val.toUnsignedIntSema(pt)); } else if (extra.data.bits.has_align_ref) blk: { const align_ref: Zir.Inst.Ref = @enumFromInt(sema.code.extra[extra_index]); extra_index += 1; @@ -26922,7 +26921,7 @@ fn zirFuncFancy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A error.GenericPoison => break :blk null, else => |e| return e, }; - break :blk try sema.validateAlignAllowZero(block, align_src, try align_val.toUnsignedIntSema(pt)); + break :blk try sema.validateAlign(block, align_src, try align_val.toUnsignedIntSema(pt)); } else .none; const @"addrspace": ?std.builtin.AddressSpace = if (extra.data.bits.has_addrspace_body) blk: { diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 6371cf92f3..8fd27d4bb7 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -800,6 +800,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .try_ptr_cold => try self.airTryPtr(inst), .dbg_stmt => try self.airDbgStmt(inst), + .dbg_empty_stmt => self.finishAirBookkeeping(), .dbg_inline_block => try self.airDbgInlineBlock(inst), .dbg_var_ptr, .dbg_var_val, diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 9966648759..065f4a047d 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -787,6 +787,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .try_ptr_cold => try self.airTryPtr(inst), .dbg_stmt => try self.airDbgStmt(inst), + .dbg_empty_stmt => self.finishAirBookkeeping(), .dbg_inline_block => try self.airDbgInlineBlock(inst), .dbg_var_ptr, .dbg_var_val, diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index 24497defa2..29a0a8b8b5 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -1593,6 +1593,7 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { .frame_addr => try func.airFrameAddress(inst), .cond_br => try func.airCondBr(inst), .dbg_stmt => try func.airDbgStmt(inst), + .dbg_empty_stmt => func.finishAirBookkeeping(), .fptrunc => try func.airFptrunc(inst), .fpext => try func.airFpext(inst), .intcast => try func.airIntCast(inst), diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index a1bef1f4cd..7bbed29d8f 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -642,6 +642,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .try_ptr_cold => @panic("TODO try self.airTryPtrCold(inst)"), .dbg_stmt => try self.airDbgStmt(inst), + .dbg_empty_stmt => self.finishAirBookkeeping(), .dbg_inline_block => try self.airDbgInlineBlock(inst), .dbg_var_ptr, .dbg_var_val, diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 50a8869282..ccdf38a474 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -1924,6 +1924,7 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { .try_ptr_cold => func.airTryPtr(inst), .dbg_stmt => func.airDbgStmt(inst), + .dbg_empty_stmt => try func.finishAir(inst, .none, &.{}), .dbg_inline_block => func.airDbgInlineBlock(inst), .dbg_var_ptr => func.airDbgVar(inst, .local_var, true), .dbg_var_val => func.airDbgVar(inst, .local_var, false), diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 06ae399f25..298b2e11e0 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -961,9 +961,16 @@ pub fn generate( }, .debug_output = debug_output, .code = code, + .prev_di_loc = .{ + .line = func.lbrace_line, + .column = func.lbrace_column, + .is_stmt = switch (debug_output) { + .dwarf => |dwarf| dwarf.dwarf.debug_line.header.default_is_stmt, + .plan9 => undefined, + .none => undefined, + }, + }, .prev_di_pc = 0, - .prev_di_line = func.lbrace_line, - .prev_di_column = func.lbrace_column, }; defer emit.deinit(); emit.emitMir() catch |err| switch (err) { @@ -1066,9 +1073,8 @@ pub fn generateLazy( }, .debug_output = debug_output, .code = code, + .prev_di_loc = undefined, // no debug info yet .prev_di_pc = undefined, // no debug info yet - .prev_di_line = undefined, // no debug info yet - .prev_di_column = undefined, // no debug info yet }; defer emit.deinit(); emit.emitMir() catch |err| switch (err) { @@ -1194,13 +1200,16 @@ fn formatWipMir( switch (mir_inst.ops) { else => unreachable, .pseudo_dbg_prologue_end_none, - .pseudo_dbg_line_line_column, .pseudo_dbg_epilogue_begin_none, .pseudo_dbg_enter_block_none, .pseudo_dbg_leave_block_none, .pseudo_dbg_var_args_none, .pseudo_dead_none, => {}, + .pseudo_dbg_line_stmt_line_column, .pseudo_dbg_line_line_column => try writer.print( + " {[line]d}, {[column]d}", + mir_inst.data.line_column, + ), .pseudo_dbg_enter_inline_func, .pseudo_dbg_leave_inline_func => try writer.print(" {}", .{ ip.getNav(ip.indexToKey(mir_inst.data.func).func.owner_nav).name.fmt(ip), }), @@ -1281,14 +1290,7 @@ fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { try self.mir_instructions.ensureUnusedCapacity(gpa, 1); const result_index: Mir.Inst.Index = @intCast(self.mir_instructions.len); self.mir_instructions.appendAssumeCapacity(inst); - if (inst.tag != .pseudo or switch (inst.ops) { - else => true, - .pseudo_dbg_prologue_end_none, - .pseudo_dbg_line_line_column, - .pseudo_dbg_epilogue_begin_none, - .pseudo_dead_none, - => false, - }) wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)}); + wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)}); return result_index; } @@ -2218,7 +2220,7 @@ fn gen(self: *Self) InnerError!void { // Drop them off at the rbrace. _ = try self.addInst(.{ .tag = .pseudo, - .ops = .pseudo_dbg_line_line_column, + .ops = .pseudo_dbg_line_stmt_line_column, .data = .{ .line_column = .{ .line = self.end_di_line, .column = self.end_di_column, @@ -2426,6 +2428,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .try_ptr_cold => try self.airTryPtr(inst), // TODO .dbg_stmt => try self.airDbgStmt(inst), + .dbg_empty_stmt => try self.airDbgEmptyStmt(), .dbg_inline_block => try self.airDbgInlineBlock(inst), .dbg_var_ptr, .dbg_var_val, @@ -13281,7 +13284,7 @@ fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { const dbg_stmt = self.air.instructions.items(.data)[@intFromEnum(inst)].dbg_stmt; _ = try self.addInst(.{ .tag = .pseudo, - .ops = .pseudo_dbg_line_line_column, + .ops = .pseudo_dbg_line_stmt_line_column, .data = .{ .line_column = .{ .line = dbg_stmt.line, .column = dbg_stmt.column, @@ -13290,6 +13293,14 @@ fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { self.finishAirBookkeeping(); } +fn airDbgEmptyStmt(self: *Self) !void { + if (self.mir_instructions.len > 0 and + self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] == .pseudo_dbg_line_stmt_line_column) + self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] = .pseudo_dbg_line_line_column; + try self.asmOpOnly(.{ ._, .nop }); + self.finishAirBookkeeping(); +} + fn airDbgInlineBlock(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.DbgInlineBlock, ty_pl.payload); diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 0395b7a43b..f744eb3fc4 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -6,8 +6,7 @@ atom_index: u32, debug_output: link.File.DebugInfoOutput, code: *std.ArrayList(u8), -prev_di_line: u32, -prev_di_column: u32, +prev_di_loc: Loc, /// Relative to the beginning of `code`. prev_di_pc: usize, @@ -263,77 +262,71 @@ pub fn emitMir(emit: *Emit) Error!void { else => unreachable, .pseudo => switch (mir_inst.ops) { else => unreachable, - .pseudo_dbg_prologue_end_none => { - switch (emit.debug_output) { - .dwarf => |dw| try dw.setPrologueEnd(), - .plan9 => {}, - .none => {}, - } + .pseudo_dbg_prologue_end_none => switch (emit.debug_output) { + .dwarf => |dwarf| try dwarf.setPrologueEnd(), + .plan9 => {}, + .none => {}, }, - .pseudo_dbg_line_line_column => try emit.dbgAdvancePCAndLine( - mir_inst.data.line_column.line, - mir_inst.data.line_column.column, - ), - .pseudo_dbg_epilogue_begin_none => { - switch (emit.debug_output) { - .dwarf => |dw| { - try dw.setEpilogueBegin(); - log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); - }, - .plan9 => {}, - .none => {}, - } + .pseudo_dbg_line_stmt_line_column => try emit.dbgAdvancePCAndLine(.{ + .line = mir_inst.data.line_column.line, + .column = mir_inst.data.line_column.column, + .is_stmt = true, + }), + .pseudo_dbg_line_line_column => try emit.dbgAdvancePCAndLine(.{ + .line = mir_inst.data.line_column.line, + .column = mir_inst.data.line_column.column, + .is_stmt = false, + }), + .pseudo_dbg_epilogue_begin_none => switch (emit.debug_output) { + .dwarf => |dwarf| { + try dwarf.setEpilogueBegin(); + log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{ + emit.prev_di_loc.line, emit.prev_di_loc.column, + }); + try emit.dbgAdvancePCAndLine(emit.prev_di_loc); + }, + .plan9 => {}, + .none => {}, }, - .pseudo_dbg_enter_block_none => { - switch (emit.debug_output) { - .dwarf => |dw| { - log.debug("mirDbgEnterBlock (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try dw.enterBlock(emit.code.items.len); - }, - .plan9 => {}, - .none => {}, - } + .pseudo_dbg_enter_block_none => switch (emit.debug_output) { + .dwarf => |dwarf| { + log.debug("mirDbgEnterBlock (line={d}, col={d})", .{ + emit.prev_di_loc.line, emit.prev_di_loc.column, + }); + try dwarf.enterBlock(emit.code.items.len); + }, + .plan9 => {}, + .none => {}, }, - .pseudo_dbg_leave_block_none => { - switch (emit.debug_output) { - .dwarf => |dw| { - log.debug("mirDbgLeaveBlock (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try dw.leaveBlock(emit.code.items.len); - }, - .plan9 => {}, - .none => {}, - } + .pseudo_dbg_leave_block_none => switch (emit.debug_output) { + .dwarf => |dwarf| { + log.debug("mirDbgLeaveBlock (line={d}, col={d})", .{ + emit.prev_di_loc.line, emit.prev_di_loc.column, + }); + try dwarf.leaveBlock(emit.code.items.len); + }, + .plan9 => {}, + .none => {}, }, - .pseudo_dbg_enter_inline_func => { - switch (emit.debug_output) { - .dwarf => |dw| { - log.debug("mirDbgEnterInline (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try dw.enterInlineFunc(mir_inst.data.func, emit.code.items.len, emit.prev_di_line, emit.prev_di_column); - }, - .plan9 => {}, - .none => {}, - } + .pseudo_dbg_enter_inline_func => switch (emit.debug_output) { + .dwarf => |dwarf| { + log.debug("mirDbgEnterInline (line={d}, col={d})", .{ + emit.prev_di_loc.line, emit.prev_di_loc.column, + }); + try dwarf.enterInlineFunc(mir_inst.data.func, emit.code.items.len, emit.prev_di_loc.line, emit.prev_di_loc.column); + }, + .plan9 => {}, + .none => {}, }, - .pseudo_dbg_leave_inline_func => { - switch (emit.debug_output) { - .dwarf => |dw| { - log.debug("mirDbgLeaveInline (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try dw.leaveInlineFunc(mir_inst.data.func, emit.code.items.len); - }, - .plan9 => {}, - .none => {}, - } + .pseudo_dbg_leave_inline_func => switch (emit.debug_output) { + .dwarf => |dwarf| { + log.debug("mirDbgLeaveInline (line={d}, col={d})", .{ + emit.prev_di_loc.line, emit.prev_di_loc.column, + }); + try dwarf.leaveInlineFunc(mir_inst.data.func, emit.code.items.len); + }, + .plan9 => {}, + .none => {}, }, .pseudo_dbg_local_a, .pseudo_dbg_local_ai_s, @@ -344,129 +337,125 @@ pub fn emitMir(emit: *Emit) Error!void { .pseudo_dbg_local_aro, .pseudo_dbg_local_af, .pseudo_dbg_local_am, - => { - switch (emit.debug_output) { - .dwarf => |dw| { - var loc_buf: [2]link.File.Dwarf.Loc = undefined; - const air_inst_index, const loc: link.File.Dwarf.Loc = switch (mir_inst.ops) { + => switch (emit.debug_output) { + .dwarf => |dwarf| { + var loc_buf: [2]link.File.Dwarf.Loc = undefined; + const air_inst_index, const loc: link.File.Dwarf.Loc = switch (mir_inst.ops) { + else => unreachable, + .pseudo_dbg_local_a => .{ mir_inst.data.a.air_inst, .empty }, + .pseudo_dbg_local_ai_s, + .pseudo_dbg_local_ai_u, + .pseudo_dbg_local_ai_64, + => .{ mir_inst.data.ai.air_inst, .{ .stack_value = stack_value: { + loc_buf[0] = switch (emit.lower.imm(mir_inst.ops, mir_inst.data.ai.i)) { + .signed => |s| .{ .consts = s }, + .unsigned => |u| .{ .constu = u }, + }; + break :stack_value &loc_buf[0]; + } } }, + .pseudo_dbg_local_as => .{ mir_inst.data.as.air_inst, .{ .addr = .{ + .sym = mir_inst.data.as.sym_index, + } } }, + .pseudo_dbg_local_aso => loc: { + const sym_off = emit.lower.mir.extraData( + bits.SymbolOffset, + mir_inst.data.ax.payload, + ).data; + break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{ + sym: { + loc_buf[0] = .{ .addr = .{ .sym = sym_off.sym_index } }; + break :sym &loc_buf[0]; + }, + off: { + loc_buf[1] = .{ .consts = sym_off.off }; + break :off &loc_buf[1]; + }, + } } }; + }, + .pseudo_dbg_local_aro => loc: { + const air_off = emit.lower.mir.extraData( + Mir.AirOffset, + mir_inst.data.rx.payload, + ).data; + break :loc .{ air_off.air_inst, .{ .plus = .{ + reg: { + loc_buf[0] = .{ .breg = mir_inst.data.rx.r1.dwarfNum() }; + break :reg &loc_buf[0]; + }, + off: { + loc_buf[1] = .{ .consts = air_off.off }; + break :off &loc_buf[1]; + }, + } } }; + }, + .pseudo_dbg_local_af => loc: { + const reg_off = emit.lower.mir.resolveFrameAddr(emit.lower.mir.extraData( + bits.FrameAddr, + mir_inst.data.ax.payload, + ).data); + break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{ + reg: { + loc_buf[0] = .{ .breg = reg_off.reg.dwarfNum() }; + break :reg &loc_buf[0]; + }, + off: { + loc_buf[1] = .{ .consts = reg_off.off }; + break :off &loc_buf[1]; + }, + } } }; + }, + .pseudo_dbg_local_am => loc: { + const mem = emit.lower.mem(mir_inst.data.ax.payload); + break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{ + base: { + loc_buf[0] = switch (mem.base()) { + .none => .{ .constu = 0 }, + .reg => |reg| .{ .breg = reg.dwarfNum() }, + .frame => unreachable, + .reloc => |sym_index| .{ .addr = .{ .sym = sym_index } }, + }; + break :base &loc_buf[0]; + }, + disp: { + loc_buf[1] = switch (mem.disp()) { + .signed => |s| .{ .consts = s }, + .unsigned => |u| .{ .constu = u }, + }; + break :disp &loc_buf[1]; + }, + } } }; + }, + }; + const ip = &emit.lower.bin_file.comp.zcu.?.intern_pool; + const air_inst = emit.air.instructions.get(@intFromEnum(air_inst_index)); + const name: Air.NullTerminatedString = switch (air_inst.tag) { + else => unreachable, + .arg => air_inst.data.arg.name, + .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => @enumFromInt(air_inst.data.pl_op.payload), + }; + try dwarf.genLocalDebugInfo( + switch (air_inst.tag) { else => unreachable, - .pseudo_dbg_local_a => .{ mir_inst.data.a.air_inst, .empty }, - .pseudo_dbg_local_ai_s, - .pseudo_dbg_local_ai_u, - .pseudo_dbg_local_ai_64, - => .{ mir_inst.data.ai.air_inst, .{ .stack_value = stack_value: { - loc_buf[0] = switch (emit.lower.imm(mir_inst.ops, mir_inst.data.ai.i)) { - .signed => |s| .{ .consts = s }, - .unsigned => |u| .{ .constu = u }, - }; - break :stack_value &loc_buf[0]; - } } }, - .pseudo_dbg_local_as => .{ mir_inst.data.as.air_inst, .{ .addr = .{ - .sym = mir_inst.data.as.sym_index, - } } }, - .pseudo_dbg_local_aso => loc: { - const sym_off = emit.lower.mir.extraData( - bits.SymbolOffset, - mir_inst.data.ax.payload, - ).data; - break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{ - sym: { - loc_buf[0] = .{ .addr = .{ .sym = sym_off.sym_index } }; - break :sym &loc_buf[0]; - }, - off: { - loc_buf[1] = .{ .consts = sym_off.off }; - break :off &loc_buf[1]; - }, - } } }; - }, - .pseudo_dbg_local_aro => loc: { - const air_off = emit.lower.mir.extraData( - Mir.AirOffset, - mir_inst.data.rx.payload, - ).data; - break :loc .{ air_off.air_inst, .{ .plus = .{ - reg: { - loc_buf[0] = .{ .breg = mir_inst.data.rx.r1.dwarfNum() }; - break :reg &loc_buf[0]; - }, - off: { - loc_buf[1] = .{ .consts = air_off.off }; - break :off &loc_buf[1]; - }, - } } }; - }, - .pseudo_dbg_local_af => loc: { - const reg_off = emit.lower.mir.resolveFrameAddr(emit.lower.mir.extraData( - bits.FrameAddr, - mir_inst.data.ax.payload, - ).data); - break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{ - reg: { - loc_buf[0] = .{ .breg = reg_off.reg.dwarfNum() }; - break :reg &loc_buf[0]; - }, - off: { - loc_buf[1] = .{ .consts = reg_off.off }; - break :off &loc_buf[1]; - }, - } } }; - }, - .pseudo_dbg_local_am => loc: { - const mem = emit.lower.mem(mir_inst.data.ax.payload); - break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{ - base: { - loc_buf[0] = switch (mem.base()) { - .none => .{ .constu = 0 }, - .reg => |reg| .{ .breg = reg.dwarfNum() }, - .frame => unreachable, - .reloc => |sym_index| .{ .addr = .{ .sym = sym_index } }, - }; - break :base &loc_buf[0]; - }, - disp: { - loc_buf[1] = switch (mem.disp()) { - .signed => |s| .{ .consts = s }, - .unsigned => |u| .{ .constu = u }, - }; - break :disp &loc_buf[1]; - }, - } } }; - }, - }; - const ip = &emit.lower.bin_file.comp.zcu.?.intern_pool; - const air_inst = emit.air.instructions.get(@intFromEnum(air_inst_index)); - const name: Air.NullTerminatedString = switch (air_inst.tag) { + .arg, .dbg_arg_inline => .local_arg, + .dbg_var_ptr, .dbg_var_val => .local_var, + }, + name.toSlice(emit.air), + switch (air_inst.tag) { else => unreachable, - .arg => air_inst.data.arg.name, - .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => @enumFromInt(air_inst.data.pl_op.payload), - }; - try dw.genLocalDebugInfo( - switch (air_inst.tag) { - else => unreachable, - .arg, .dbg_arg_inline => .local_arg, - .dbg_var_ptr, .dbg_var_val => .local_var, - }, - name.toSlice(emit.air), - switch (air_inst.tag) { - else => unreachable, - .arg => emit.air.typeOfIndex(air_inst_index, ip), - .dbg_var_ptr => emit.air.typeOf(air_inst.data.pl_op.operand, ip).childTypeIp(ip), - .dbg_var_val, .dbg_arg_inline => emit.air.typeOf(air_inst.data.pl_op.operand, ip), - }, - loc, - ); - }, - .plan9 => {}, - .none => {}, - } + .arg => emit.air.typeOfIndex(air_inst_index, ip), + .dbg_var_ptr => emit.air.typeOf(air_inst.data.pl_op.operand, ip).childTypeIp(ip), + .dbg_var_val, .dbg_arg_inline => emit.air.typeOf(air_inst.data.pl_op.operand, ip), + }, + loc, + ); + }, + .plan9 => {}, + .none => {}, }, - .pseudo_dbg_var_args_none => { - switch (emit.debug_output) { - .dwarf => |dw| try dw.genVarArgsDebugInfo(), - .plan9 => {}, - .none => {}, - } + .pseudo_dbg_var_args_none => switch (emit.debug_output) { + .dwarf => |dwarf| try dwarf.genVarArgsDebugInfo(), + .plan9 => {}, + .none => {}, }, .pseudo_dead_none => {}, }, @@ -515,16 +504,22 @@ fn fixupRelocs(emit: *Emit) Error!void { } } -fn dbgAdvancePCAndLine(emit: *Emit, line: u32, column: u32) Error!void { - const delta_line = @as(i33, line) - @as(i33, emit.prev_di_line); +const Loc = struct { + line: u32, + column: u32, + is_stmt: bool, +}; + +fn dbgAdvancePCAndLine(emit: *Emit, loc: Loc) Error!void { + const delta_line = @as(i33, loc.line) - @as(i33, emit.prev_di_loc.line); const delta_pc: usize = emit.code.items.len - emit.prev_di_pc; log.debug(" (advance pc={d} and line={d})", .{ delta_pc, delta_line }); switch (emit.debug_output) { - .dwarf => |dw| { - if (column != emit.prev_di_column) try dw.setColumn(column); - try dw.advancePCAndLine(delta_line, delta_pc); - emit.prev_di_line = line; - emit.prev_di_column = column; + .dwarf => |dwarf| { + if (loc.is_stmt != emit.prev_di_loc.is_stmt) try dwarf.negateStmt(); + if (loc.column != emit.prev_di_loc.column) try dwarf.setColumn(loc.column); + try dwarf.advancePCAndLine(delta_line, delta_pc); + emit.prev_di_loc = loc; emit.prev_di_pc = emit.code.items.len; }, .plan9 => |dbg_out| { @@ -553,11 +548,10 @@ fn dbgAdvancePCAndLine(emit: *Emit, line: u32, column: u32) Error!void { // we don't need to do anything, because adding the pc quanta does it for us } else unreachable; if (dbg_out.start_line == null) - dbg_out.start_line = emit.prev_di_line; - dbg_out.end_line = line; + dbg_out.start_line = emit.prev_di_loc.line; + dbg_out.end_line = loc.line; // only do this if the pc changed - emit.prev_di_line = line; - emit.prev_di_column = column; + emit.prev_di_loc = loc; emit.prev_di_pc = emit.code.items.len; }, .none => {}, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 6ac79378c1..015b3ba12e 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -310,6 +310,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }), .pseudo_dbg_prologue_end_none, + .pseudo_dbg_line_stmt_line_column, .pseudo_dbg_line_line_column, .pseudo_dbg_epilogue_begin_none, .pseudo_dbg_enter_block_none, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 0e9d010758..a7f308b7b4 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -930,7 +930,10 @@ pub const Inst = struct { /// End of prologue pseudo_dbg_prologue_end_none, - /// Update debug line + /// Update debug line with is_stmt register set + /// Uses `line_column` payload. + pseudo_dbg_line_stmt_line_column, + /// Update debug line with is_stmt register clear /// Uses `line_column` payload. pseudo_dbg_line_line_column, /// Start of epilogue diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 0410023588..56466b4395 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -3289,6 +3289,7 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, .try_ptr_cold => try airTryPtr(f, inst), .dbg_stmt => try airDbgStmt(f, inst), + .dbg_empty_stmt => try airDbgEmptyStmt(f, inst), .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => try airDbgVar(f, inst), .float_from_int, @@ -4601,6 +4602,11 @@ fn airDbgStmt(f: *Function, inst: Air.Inst.Index) !CValue { return .none; } +fn airDbgEmptyStmt(f: *Function, _: Air.Inst.Index) !CValue { + try f.object.writer().writeAll("(void)0;\n"); + return .none; +} + fn airDbgInlineBlock(f: *Function, inst: Air.Inst.Index) !CValue { const pt = f.object.dg.pt; const zcu = pt.zcu; diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index fb20d4d622..d0b12350c0 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -5391,6 +5391,7 @@ pub const FuncGen = struct { .inferred_alloc, .inferred_alloc_comptime => unreachable, .dbg_stmt => try self.airDbgStmt(inst), + .dbg_empty_stmt => try self.airDbgEmptyStmt(inst), .dbg_var_ptr => try self.airDbgVarPtr(inst), .dbg_var_val => try self.airDbgVarVal(inst, false), .dbg_arg_inline => try self.airDbgVarVal(inst, true), @@ -7433,6 +7434,12 @@ pub const FuncGen = struct { return .none; } + fn airDbgEmptyStmt(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value { + _ = self; + _ = inst; + return .none; + } + fn airDbgInlineBlock(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.DbgInlineBlock, ty_pl.payload); diff --git a/src/dev.zig b/src/dev.zig index 0d365c3e19..d623a708e7 100644 --- a/src/dev.zig +++ b/src/dev.zig @@ -81,6 +81,7 @@ pub const Env = enum { => true, .cc_command, .translate_c_command, + .fmt_command, .jit_command, .fetch_command, .init_command, @@ -168,6 +169,7 @@ pub const Feature = enum { clang_command, cc_command, translate_c_command, + fmt_command, jit_command, fetch_command, init_command, diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 6653a8309a..afdc5d1d48 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -1474,6 +1474,11 @@ pub const WipNav = struct { try uleb128(dlw, column + 1); } + pub fn negateStmt(wip_nav: *WipNav) error{OutOfMemory}!void { + const dlw = wip_nav.debug_line.writer(wip_nav.dwarf.gpa); + try dlw.writeByte(DW.LNS.negate_stmt); + } + pub fn setPrologueEnd(wip_nav: *WipNav) error{OutOfMemory}!void { const dlw = wip_nav.debug_line.writer(wip_nav.dwarf.gpa); try dlw.writeByte(DW.LNS.set_prologue_end); diff --git a/src/link/Elf.zig b/src/link/Elf.zig index 8f39d3412f..a068ac6cdc 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -113,8 +113,6 @@ thunks: std.ArrayListUnmanaged(Thunk) = .empty, merge_sections: std.ArrayListUnmanaged(Merge.Section) = .empty, comment_merge_section_index: ?Merge.Section.Index = null, -first_eflags: ?elf.Word = null, - /// `--verbose-link` output. /// Initialized on creation, appended to as inputs are added, printed during `flush`. dump_argv_list: std.ArrayListUnmanaged([]const u8), @@ -791,7 +789,7 @@ pub fn loadInput(self: *Elf, input: link.Input) !void { .res => unreachable, .dso_exact => @panic("TODO"), .object => |obj| try parseObject(self, obj), - .archive => |obj| try parseArchive(gpa, diags, &self.file_handles, &self.files, &self.first_eflags, target, debug_fmt_strip, default_sym_version, &self.objects, obj, is_static_lib), + .archive => |obj| try parseArchive(gpa, diags, &self.file_handles, &self.files, target, debug_fmt_strip, default_sym_version, &self.objects, obj, is_static_lib), .dso => |dso| try parseDso(gpa, diags, dso, &self.shared_objects, &self.files, target), } } @@ -1124,7 +1122,6 @@ fn parseObject(self: *Elf, obj: link.Input.Object) !void { const gpa = self.base.comp.gpa; const diags = &self.base.comp.link_diags; - const first_eflags = &self.first_eflags; const target = self.base.comp.root_mod.resolved_target.result; const debug_fmt_strip = self.base.comp.config.debug_format == .strip; const default_sym_version = self.default_sym_version; @@ -1145,7 +1142,7 @@ fn parseObject(self: *Elf, obj: link.Input.Object) !void { try self.objects.append(gpa, index); const object = self.file(index).?.object; - try object.parseCommon(gpa, diags, obj.path, handle, target, first_eflags); + try object.parseCommon(gpa, diags, obj.path, handle, target); if (!self.base.isStaticLib()) { try object.parse(gpa, diags, obj.path, handle, target, debug_fmt_strip, default_sym_version); } @@ -1156,7 +1153,6 @@ fn parseArchive( diags: *Diags, file_handles: *std.ArrayListUnmanaged(File.Handle), files: *std.MultiArrayList(File.Entry), - first_eflags: *?elf.Word, target: std.Target, debug_fmt_strip: bool, default_sym_version: elf.Versym, @@ -1179,7 +1175,7 @@ fn parseArchive( const object = &files.items(.data)[index].object; object.index = index; object.alive = init_alive; - try object.parseCommon(gpa, diags, obj.path, obj.file, target, first_eflags); + try object.parseCommon(gpa, diags, obj.path, obj.file, target); if (!is_static_lib) try object.parse(gpa, diags, obj.path, obj.file, target, debug_fmt_strip, default_sym_version); try objects.append(gpa, index); diff --git a/src/link/Elf/Object.zig b/src/link/Elf/Object.zig index 688e51f3f1..65a62ff1a6 100644 --- a/src/link/Elf/Object.zig +++ b/src/link/Elf/Object.zig @@ -99,7 +99,6 @@ pub fn parseCommon( path: Path, handle: fs.File, target: std.Target, - first_eflags: *?elf.Word, ) !void { const offset = if (self.archive) |ar| ar.offset else 0; const file_size = (try handle.stat()).size; @@ -114,7 +113,7 @@ pub fn parseCommon( @tagName(self.header.?.e_machine), }); } - try validateEFlags(diags, path, target, self.header.?.e_flags, first_eflags); + try validateEFlags(diags, path, target, self.header.?.e_flags); if (self.header.?.e_shnum == 0) return; @@ -180,39 +179,81 @@ pub fn parseCommon( } } -fn validateEFlags( +pub fn validateEFlags( diags: *Diags, path: Path, target: std.Target, e_flags: elf.Word, - first_eflags: *?elf.Word, -) error{LinkFailure}!void { - if (first_eflags.*) |*self_eflags| { - switch (target.cpu.arch) { - .riscv64 => { - if (e_flags != self_eflags.*) { - const riscv_eflags: riscv.RiscvEflags = @bitCast(e_flags); - const self_riscv_eflags: *riscv.RiscvEflags = @ptrCast(self_eflags); +) !void { + switch (target.cpu.arch) { + .riscv64 => { + const features = target.cpu.features; + const flags: riscv.Eflags = @bitCast(e_flags); + var any_errors: bool = false; - self_riscv_eflags.rvc = self_riscv_eflags.rvc or riscv_eflags.rvc; - self_riscv_eflags.tso = self_riscv_eflags.tso or riscv_eflags.tso; + // For an input object to target an ABI that the target CPU doesn't have enabled + // is invalid, and will throw an error. - var any_errors: bool = false; - if (self_riscv_eflags.fabi != riscv_eflags.fabi) { - any_errors = true; - diags.addParseError(path, "cannot link object files with different float-point ABIs", .{}); - } - if (self_riscv_eflags.rve != riscv_eflags.rve) { - any_errors = true; - diags.addParseError(path, "cannot link object files with different RVEs", .{}); - } - if (any_errors) return error.LinkFailure; - } - }, - else => {}, - } - } else { - first_eflags.* = e_flags; + // Invalid when + // 1. The input uses C and we do not. + if (flags.rvc and !std.Target.riscv.featureSetHas(features, .c)) { + any_errors = true; + diags.addParseError( + path, + "cannot link object file targeting the C feature without having the C feature enabled", + .{}, + ); + } + + // Invalid when + // 1. We use E and the input does not. + // 2. The input uses E and we do not. + if (std.Target.riscv.featureSetHas(features, .e) != flags.rve) { + any_errors = true; + diags.addParseError( + path, + "{s}", + .{ + if (flags.rve) + "cannot link object file targeting the E feature without having the E feature enabled" + else + "cannot link object file not targeting the E feature while having the E feature enabled", + }, + ); + } + + // Invalid when + // 1. We use total store order and the input does not. + // 2. The input uses total store order and we do not. + if (flags.tso != std.Target.riscv.featureSetHas(features, .ztso)) { + any_errors = true; + diags.addParseError( + path, + "cannot link object file targeting the TSO memory model without having the ztso feature enabled", + .{}, + ); + } + + const fabi: riscv.Eflags.FloatAbi = + if (std.Target.riscv.featureSetHas(features, .d)) + .double + else if (std.Target.riscv.featureSetHas(features, .f)) + .single + else + .soft; + + if (flags.fabi != fabi) { + any_errors = true; + diags.addParseError( + path, + "cannot link object file targeting a different floating-point ABI. targeting {s}, found {s}", + .{ @tagName(fabi), @tagName(flags.fabi) }, + ); + } + + if (any_errors) return error.LinkFailure; + }, + else => {}, } } diff --git a/src/link/Elf/ZigObject.zig b/src/link/Elf/ZigObject.zig index 1e29ab8bf6..effe12539c 100644 --- a/src/link/Elf/ZigObject.zig +++ b/src/link/Elf/ZigObject.zig @@ -1496,7 +1496,7 @@ pub fn updateFunc( }); defer gpa.free(name); const osec = if (self.text_index) |sect_sym_index| - self.symbol(sect_sym_index).output_section_index + self.symbol(sect_sym_index).outputShndx(elf_file).? else osec: { const osec = try elf_file.addSection(.{ .name = try elf_file.insertShString(".text"), diff --git a/src/link/riscv.zig b/src/link/riscv.zig index 7c0282ef3b..106fd1a817 100644 --- a/src/link/riscv.zig +++ b/src/link/riscv.zig @@ -70,18 +70,20 @@ fn bitSlice( return @truncate((value >> low) & (1 << (high - low + 1)) - 1); } -pub const RiscvEflags = packed struct(u32) { +pub const Eflags = packed struct(u32) { rvc: bool, - fabi: enum(u2) { + fabi: FloatAbi, + rve: bool, + tso: bool, + _reserved: u19 = 0, + _unused: u8 = 0, + + pub const FloatAbi = enum(u2) { soft = 0b00, single = 0b01, double = 0b10, quad = 0b11, - }, - rve: bool, - tso: bool, - _reserved: u19, - _unused: u8, + }; }; const mem = std.mem; diff --git a/src/main.zig b/src/main.zig index 291820cb75..13e24d8c25 100644 --- a/src/main.zig +++ b/src/main.zig @@ -309,6 +309,7 @@ fn mainArgs(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { .server = use_server, }); } else if (mem.eql(u8, cmd, "fmt")) { + dev.check(.fmt_command); return @import("fmt.zig").run(gpa, arena, cmd_args); } else if (mem.eql(u8, cmd, "objcopy")) { return jitCmd(gpa, arena, cmd_args, .{ diff --git a/src/musl.zig b/src/musl.zig index d1b2fd2e2d..ace72c0b07 100644 --- a/src/musl.zig +++ b/src/musl.zig @@ -138,17 +138,6 @@ pub fn buildCrtFile(comp: *Compilation, in_crt_file: CrtFile, prog_node: std.Pro try addSrcFile(arena, &source_table, src_file); } - const time32_compat_arch_list = [_][]const u8{ - "arm", - "i386", - "m68k", - "microblaze", - "mips", - "mipsn32", - "or1k", - "powerpc", - "sh", - }; for (time32_compat_arch_list) |time32_compat_arch| { if (mem.eql(u8, arch_name, time32_compat_arch)) { for (compat_time32_files) |compat_time32_file| { @@ -239,13 +228,29 @@ pub fn buildCrtFile(comp: *Compilation, in_crt_file: CrtFile, prog_node: std.Pro }); const target = comp.root_mod.resolved_target.result; - const arch_define = try std.fmt.allocPrint(arena, "-DARCH_{s}", .{ - @tagName(target.cpu.arch), - }); + const arch_name = std.zig.target.muslArchName(target.cpu.arch, target.abi); + const time32 = for (time32_compat_arch_list) |time32_compat_arch| { + if (mem.eql(u8, arch_name, time32_compat_arch)) break true; + } else false; + const arch_define = try std.fmt.allocPrint(arena, "-DARCH_{s}", .{arch_name}); + const family_define = switch (target.cpu.arch) { + .arm, .armeb, .thumb, .thumbeb => "-DFAMILY_arm", + .aarch64, .aarch64_be => "-DFAMILY_aarch64", + .loongarch64 => "-DFAMILY_loongarch", + .m68k => "-DFAMILY_m68k", + .mips, .mipsel, .mips64, .mips64el => "-DFAMILY_mips", + .powerpc, .powerpc64, .powerpc64le => "-DFAMILY_powerpc", + .riscv32, .riscv64 => "-DFAMILY_riscv", + .s390x => "-DFAMILY_s390x", + .x86, .x86_64 => "-DFAMILY_x86", + else => unreachable, + }; const cc_argv: []const []const u8 = if (target.ptrBitWidth() == 64) - &.{ "-DPTR64", arch_define } + &.{ "-DPTR64", arch_define, family_define } + else if (time32) + &.{ "-DTIME32", arch_define, family_define } else - &.{arch_define}; + &.{ arch_define, family_define }; const root_mod = try Module.create(arena, .{ .global_cache_directory = comp.global_cache_directory, @@ -347,6 +352,18 @@ pub fn needsCrt0(output_mode: std.builtin.OutputMode, link_mode: std.builtin.Lin }; } +const time32_compat_arch_list = [_][]const u8{ + "arm", + "i386", + "m68k", + "microblaze", + "mips", + "mipsn32", + "or1k", + "powerpc", + "sh", +}; + fn isArchName(name: []const u8) bool { const musl_arch_names = [_][]const u8{ "aarch64", diff --git a/src/print_air.zig b/src/print_air.zig index 6a3f31a27c..280d05edfa 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -202,6 +202,7 @@ const Writer = struct { .trap, .breakpoint, + .dbg_empty_stmt, .unreach, .ret_addr, .frame_addr, diff --git a/src/print_zir.zig b/src/print_zir.zig index b8f4432e72..808ead0e79 100644 --- a/src/print_zir.zig +++ b/src/print_zir.zig @@ -621,6 +621,8 @@ const Writer = struct { .field_parent_ptr => try self.writeFieldParentPtr(stream, extended), .builtin_value => try self.writeBuiltinValue(stream, extended), .inplace_arith_result_ty => try self.writeInplaceArithResultTy(stream, extended), + + .dbg_empty_stmt => try stream.writeAll("))"), } } diff --git a/test/cases/compile_errors/align_zero.zig b/test/cases/compile_errors/align_zero.zig new file mode 100644 index 0000000000..a63523b853 --- /dev/null +++ b/test/cases/compile_errors/align_zero.zig @@ -0,0 +1,52 @@ +pub var global_var: i32 align(0) = undefined; + +pub export fn a() void { + _ = &global_var; +} + +pub extern var extern_var: i32 align(0); + +pub export fn b() void { + _ = &extern_var; +} + +pub export fn c() align(0) void {} + +pub export fn d() void { + _ = *align(0) fn () i32; +} + +pub export fn e() void { + var local_var: i32 align(0) = undefined; + _ = &local_var; +} + +pub export fn f() void { + _ = *align(0) i32; +} + +pub export fn g() void { + _ = []align(0) i32; +} + +pub export fn h() void { + _ = struct { field: i32 align(0) }; +} + +pub export fn i() void { + _ = union { field: i32 align(0) }; +} + +// error +// backend=stage2 +// target=native +// +// :1:31: error: alignment must be >= 1 +// :7:38: error: alignment must be >= 1 +// :13:25: error: alignment must be >= 1 +// :16:16: error: alignment must be >= 1 +// :20:30: error: alignment must be >= 1 +// :25:16: error: alignment must be >= 1 +// :29:17: error: alignment must be >= 1 +// :33:35: error: alignment must be >= 1 +// :37:34: error: alignment must be >= 1 diff --git a/test/cases/compile_errors/function_alignment_on_unsupported_target.zig b/test/cases/compile_errors/function_alignment_on_unsupported_target.zig index 7b033e0f60..e3ea1dd068 100644 --- a/test/cases/compile_errors/function_alignment_on_unsupported_target.zig +++ b/test/cases/compile_errors/function_alignment_on_unsupported_target.zig @@ -1,4 +1,4 @@ -export fn entry() align(0) void {} +export fn entry() align(64) void {} // error // backend=stage2 diff --git a/test/src/Debugger.zig b/test/src/Debugger.zig index 91eaa385ba..1e2289eb11 100644 --- a/test/src/Debugger.zig +++ b/test/src/Debugger.zig @@ -808,6 +808,424 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void { \\1 breakpoints deleted; 0 breakpoint locations disabled. }, ); + db.addLldbTest( + "step_single_stmt_loops", + target, + &.{ + .{ + .path = "step_single_stmt_loops.zig", + .source = + \\pub fn main() void { + \\ var x: u32 = 0; + \\ for (0..3) |_| { + \\ x +%= 1; + \\ } + \\ { + \\ var i: u32 = 0; + \\ while (i < 3) : (i +%= 1) { + \\ x +%= 1; + \\ } + \\ } + \\ { + \\ var i: u32 = 0; + \\ while (i < 3) { + \\ i +%= 1; + \\ } + \\ } + \\ inline for (0..3) |_| { + \\ x +%= 1; + \\ } + \\ { + \\ comptime var i: u32 = 0; + \\ inline while (i < 3) : (i +%= 1) { + \\ x +%= 1; + \\ } + \\ } + \\ { + \\ comptime var i: u32 = 0; + \\ inline while (i < 3) { + \\ i +%= 1; + \\ } + \\ } + \\ x +%= 1; + \\} + \\ + , + }, + }, + \\breakpoint set --name step_single_stmt_loops.main + \\process launch + \\thread step-in + \\#00 + \\frame variable x + \\thread step-in + \\#01 + \\frame variable x + \\thread step-in + \\#02 + \\frame variable x + \\thread step-in + \\#03 + \\frame variable x + \\thread step-in + \\#04 + \\frame variable x + \\thread step-in + \\#05 + \\frame variable x + \\thread step-in + \\#06 + \\frame variable x + \\thread step-in + \\#07 + \\frame variable x + \\thread step-in + \\#08 + \\frame variable x + \\thread step-in + \\#09 + \\frame variable x + \\thread step-in + \\#10 + \\frame variable x + \\thread step-in + \\#11 + \\frame variable x + \\thread step-in + \\#12 + \\frame variable x + \\thread step-in + \\#13 + \\frame variable x + \\thread step-in + \\#14 + \\frame variable x + \\thread step-in + \\#15 + \\frame variable x + \\thread step-in + \\#16 + \\frame variable x + \\thread step-in + \\#17 + \\frame variable x + \\thread step-in + \\#18 + \\frame variable x + \\thread step-in + \\#19 + \\frame variable x + \\thread step-in + \\#20 + \\frame variable x + \\thread step-in + \\#21 + \\frame variable x + \\thread step-in + \\#22 + \\frame variable x + \\thread step-in + \\#23 + \\frame variable x + \\thread step-in + \\#24 + \\frame variable x + \\thread step-in + \\#25 + \\frame variable x + \\thread step-in + \\#26 + \\frame variable x + \\thread step-in + \\#27 + \\frame variable x + \\thread step-in + \\#28 + \\frame variable x + \\thread step-in + \\#29 + \\frame variable x + \\thread step-in + \\#30 + \\frame variable x + \\thread step-in + \\#31 + \\frame variable x + \\thread step-in + \\#32 + \\frame variable x + \\thread step-in + \\#33 + \\frame variable x + \\thread step-in + \\#34 + \\frame variable x + \\thread step-in + \\#35 + \\frame variable x + \\thread step-in + \\#36 + \\frame variable x + \\thread step-in + \\#37 + \\frame variable x + \\thread step-in + \\#38 + \\frame variable x + \\thread step-in + \\#39 + \\frame variable x + \\thread step-in + \\#40 + \\frame variable x + \\thread step-in + \\#41 + \\frame variable x + \\thread step-in + \\#42 + \\frame variable x + \\thread step-in + \\#43 + \\frame variable x + \\thread step-in + \\#44 + \\frame variable x + \\thread step-in + \\#45 + \\frame variable x + \\ + , + &.{ + \\(lldb) #00 + \\(lldb) frame variable x + \\(u32) x = 0 + \\(lldb) thread step-in + , + \\(lldb) #01 + \\(lldb) frame variable x + \\(u32) x = 0 + \\(lldb) thread step-in + , + \\(lldb) #02 + \\(lldb) frame variable x + \\(u32) x = 1 + \\(lldb) thread step-in + , + \\(lldb) #03 + \\(lldb) frame variable x + \\(u32) x = 1 + \\(lldb) thread step-in + , + \\(lldb) #04 + \\(lldb) frame variable x + \\(u32) x = 1 + \\(lldb) thread step-in + , + \\(lldb) #05 + \\(lldb) frame variable x + \\(u32) x = 2 + \\(lldb) thread step-in + , + \\(lldb) #06 + \\(lldb) frame variable x + \\(u32) x = 2 + \\(lldb) thread step-in + , + \\(lldb) #07 + \\(lldb) frame variable x + \\(u32) x = 2 + \\(lldb) thread step-in + , + \\(lldb) #08 + \\(lldb) frame variable x + \\(u32) x = 3 + \\(lldb) thread step-in + , + \\(lldb) #09 + \\(lldb) frame variable x + \\(u32) x = 3 + \\(lldb) thread step-in + , + \\(lldb) #10 + \\(lldb) frame variable x + \\(u32) x = 3 + \\(lldb) thread step-in + , + \\(lldb) #11 + \\(lldb) frame variable x + \\(u32) x = 3 + \\(lldb) thread step-in + , + \\(lldb) #12 + \\(lldb) frame variable x + \\(u32) x = 3 + \\(lldb) thread step-in + , + \\(lldb) #13 + \\(lldb) frame variable x + \\(u32) x = 4 + \\(lldb) thread step-in + , + \\(lldb) #14 + \\(lldb) frame variable x + \\(u32) x = 4 + \\(lldb) thread step-in + , + \\(lldb) #15 + \\(lldb) frame variable x + \\(u32) x = 4 + \\(lldb) thread step-in + , + \\(lldb) #16 + \\(lldb) frame variable x + \\(u32) x = 5 + \\(lldb) thread step-in + , + \\(lldb) #17 + \\(lldb) frame variable x + \\(u32) x = 5 + \\(lldb) thread step-in + , + \\(lldb) #18 + \\(lldb) frame variable x + \\(u32) x = 5 + \\(lldb) thread step-in + , + \\(lldb) #19 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #20 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #21 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #22 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #23 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #24 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #25 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #26 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #27 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #28 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #29 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #30 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #31 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #32 + \\(lldb) frame variable x + \\(u32) x = 6 + \\(lldb) thread step-in + , + \\(lldb) #33 + \\(lldb) frame variable x + \\(u32) x = 7 + \\(lldb) thread step-in + , + \\(lldb) #34 + \\(lldb) frame variable x + \\(u32) x = 7 + \\(lldb) thread step-in + , + \\(lldb) #35 + \\(lldb) frame variable x + \\(u32) x = 8 + \\(lldb) thread step-in + , + \\(lldb) #36 + \\(lldb) frame variable x + \\(u32) x = 8 + \\(lldb) thread step-in + , + \\(lldb) #37 + \\(lldb) frame variable x + \\(u32) x = 9 + \\(lldb) thread step-in + , + \\(lldb) #38 + \\(lldb) frame variable x + \\(u32) x = 9 + \\(lldb) thread step-in + , + \\(lldb) #39 + \\(lldb) frame variable x + \\(u32) x = 10 + \\(lldb) thread step-in + , + \\(lldb) #40 + \\(lldb) frame variable x + \\(u32) x = 10 + \\(lldb) thread step-in + , + \\(lldb) #41 + \\(lldb) frame variable x + \\(u32) x = 11 + \\(lldb) thread step-in + , + \\(lldb) #42 + \\(lldb) frame variable x + \\(u32) x = 11 + \\(lldb) thread step-in + , + \\(lldb) #43 + \\(lldb) frame variable x + \\(u32) x = 12 + \\(lldb) thread step-in + , + \\(lldb) #44 + \\(lldb) frame variable x + \\(u32) x = 12 + \\(lldb) thread step-in + , + \\(lldb) #45 + \\(lldb) frame variable x + \\(u32) x = 12 + }, + ); db.addLldbTest( "inline_call", target, @@ -1533,17 +1951,17 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void { &.{ \\(lldb) frame variable --show-types -- list0 list0.len list0.capacity list0[0] list0[1] list0[2] list0.0 list0.1 list0.2 \\(std.multi_array_list.MultiArrayList(struct { u32, u8, u16 })) list0 = len=3 capacity=8 { - \\ (std.struct { u32, u8, u16 }) [0] = { + \\ (struct { u32, u8, u16 }) [0] = { \\ (u32) .@"0" = 1 \\ (u8) .@"1" = 2 \\ (u16) .@"2" = 3 \\ } - \\ (std.struct { u32, u8, u16 }) [1] = { + \\ (struct { u32, u8, u16 }) [1] = { \\ (u32) .@"0" = 4 \\ (u8) .@"1" = 5 \\ (u16) .@"2" = 6 \\ } - \\ (std.struct { u32, u8, u16 }) [2] = { + \\ (struct { u32, u8, u16 }) [2] = { \\ (u32) .@"0" = 7 \\ (u8) .@"1" = 8 \\ (u16) .@"2" = 9 @@ -1551,17 +1969,17 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void { \\} \\(usize) list0.len = 3 \\(usize) list0.capacity = 8 - \\(std.struct { u32, u8, u16 }) list0[0] = { + \\(struct { u32, u8, u16 }) list0[0] = { \\ (u32) .@"0" = 1 \\ (u8) .@"1" = 2 \\ (u16) .@"2" = 3 \\} - \\(std.struct { u32, u8, u16 }) list0[1] = { + \\(struct { u32, u8, u16 }) list0[1] = { \\ (u32) .@"0" = 4 \\ (u8) .@"1" = 5 \\ (u16) .@"2" = 6 \\} - \\(std.struct { u32, u8, u16 }) list0[2] = { + \\(struct { u32, u8, u16 }) list0[2] = { \\ (u32) .@"0" = 7 \\ (u8) .@"1" = 8 \\ (u16) .@"2" = 9 @@ -1583,17 +2001,17 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void { \\} \\(lldb) frame variable --show-types -- slice0 slice0.len slice0.capacity slice0[0] slice0[1] slice0[2] slice0.0 slice0.1 slice0.2 \\(std.multi_array_list.MultiArrayList(struct { u32, u8, u16 }).Slice) slice0 = len=3 capacity=8 { - \\ (std.struct { u32, u8, u16 }) [0] = { + \\ (struct { u32, u8, u16 }) [0] = { \\ (u32) .@"0" = 1 \\ (u8) .@"1" = 2 \\ (u16) .@"2" = 3 \\ } - \\ (std.struct { u32, u8, u16 }) [1] = { + \\ (struct { u32, u8, u16 }) [1] = { \\ (u32) .@"0" = 4 \\ (u8) .@"1" = 5 \\ (u16) .@"2" = 6 \\ } - \\ (std.struct { u32, u8, u16 }) [2] = { + \\ (struct { u32, u8, u16 }) [2] = { \\ (u32) .@"0" = 7 \\ (u8) .@"1" = 8 \\ (u16) .@"2" = 9 @@ -1601,17 +2019,17 @@ pub fn addTestsForTarget(db: *Debugger, target: Target) void { \\} \\(usize) slice0.len = 3 \\(usize) slice0.capacity = 8 - \\(std.struct { u32, u8, u16 }) slice0[0] = { + \\(struct { u32, u8, u16 }) slice0[0] = { \\ (u32) .@"0" = 1 \\ (u8) .@"1" = 2 \\ (u16) .@"2" = 3 \\} - \\(std.struct { u32, u8, u16 }) slice0[1] = { + \\(struct { u32, u8, u16 }) slice0[1] = { \\ (u32) .@"0" = 4 \\ (u8) .@"1" = 5 \\ (u16) .@"2" = 6 \\} - \\(std.struct { u32, u8, u16 }) slice0[2] = { + \\(struct { u32, u8, u16 }) slice0[2] = { \\ (u32) .@"0" = 7 \\ (u8) .@"1" = 8 \\ (u16) .@"2" = 9 diff --git a/tools/gen_stubs.zig b/tools/gen_stubs.zig index 23cfd57d55..c5663fa825 100644 --- a/tools/gen_stubs.zig +++ b/tools/gen_stubs.zig @@ -2,23 +2,51 @@ //! ./gen_stubs /path/to/musl/build-all >libc.S //! //! The directory 'build-all' is expected to contain these subdirectories: -//! arm x86 mips mips64 powerpc powerpc64 riscv32 riscv64 x86_64 loongarch64 +//! +//! * aarch64 +//! * arm +//! * i386 +//! * loongarch64 +//! * mips +//! * mips64 +//! * mipsn32 +//! * powerpc +//! * powerpc64 +//! * riscv32 +//! * riscv64 +//! * s390x +//! * x32 (currently broken) +//! * x86_64 //! //! ...each with 'lib/libc.so' inside of them. //! //! When building the resulting libc.S file, these defines are required: -//! * `-DPTR64`: when the architecture is 64-bit +//! * `-DTIME32`: When the target's primary time ABI is 32-bit +//! * `-DPTR64`: When the target has 64-bit pointers //! * One of the following, corresponding to the CPU architecture: -//! - `-DARCH_riscv32` -//! - `-DARCH_riscv64` +//! - `-DARCH_aarch64` +//! - `-DARCH_arm` +//! - `-DARCH_i386` +//! - `-DARCH_loongarch64` //! - `-DARCH_mips` //! - `-DARCH_mips64` -//! - `-DARCH_i386` -//! - `-DARCH_x86_64` +//! - `-DARCH_mipsn32` //! - `-DARCH_powerpc` //! - `-DARCH_powerpc64` -//! - `-DARCH_aarch64` -//! - `-DARCH_loongarch64` +//! - `-DARCH_riscv32` +//! - `-DARCH_riscv64` +//! - `-DARCH_s390x` +//! - `-DARCH_x32` +//! - `-DARCH_x86_64` +//! * One of the following, corresponding to the CPU architecture family: +//! - `-DFAMILY_aarch64` +//! - `-DFAMILY_arm` +//! - `-DFAMILY_loongarch` +//! - `-DFAMILY_mips` +//! - `-DFAMILY_powerpc` +//! - `-DFAMILY_riscv` +//! - `-DFAMILY_s390x` +//! - `-DFAMILY_x86` // TODO: pick the best index to put them into instead of at the end // - e.g. find a common previous symbol and put it after that one @@ -29,24 +57,85 @@ const builtin = std.builtin; const mem = std.mem; const log = std.log; const elf = std.elf; -const native_endian = @import("builtin").target.cpu.arch.endian(); +const native_endian = @import("builtin").cpu.arch.endian(); -const inputs = .{ - .riscv32, - .riscv64, - .loongarch64, - .mips, - .mips64, - .x86, - .x86_64, - .powerpc, - .powerpc64, - .aarch64, +const Arch = enum { + aarch64, + arm, + i386, + loongarch64, + mips, + mips64, + mipsn32, + powerpc, + powerpc64, + riscv32, + riscv64, + s390x, + x86_64, + + pub fn ptrSize(arch: Arch) u16 { + return switch (arch) { + .arm, + .i386, + .mips, + .mipsn32, + .powerpc, + .riscv32, + => 4, + .aarch64, + .loongarch64, + .mips64, + .powerpc64, + .riscv64, + .s390x, + .x86_64, + => 8, + }; + } + + pub fn isTime32(arch: Arch) bool { + return switch (arch) { + // This list will never grow; newer 32-bit ports will be time64 (e.g. riscv32). + .arm, + .i386, + .mips, + .mipsn32, + .powerpc, + => true, + else => false, + }; + } + + pub fn family(arch: Arch) Family { + return switch (arch) { + .aarch64 => .aarch64, + .arm => .arm, + .i386, .x86_64 => .x86, + .loongarch64 => .loongarch, + .mips, .mips64, .mipsn32 => .mips, + .powerpc, .powerpc64 => .powerpc, + .riscv32, .riscv64 => .riscv, + .s390x => .s390x, + }; + } }; -const arches: [inputs.len]std.Target.Cpu.Arch = blk: { - var result: [inputs.len]std.Target.Cpu.Arch = undefined; - for (inputs) |arch| { +const Family = enum { + aarch64, + arm, + loongarch, + mips, + powerpc, + riscv, + s390x, + x86, +}; + +const arches: [@typeInfo(Arch).@"enum".fields.len]Arch = blk: { + var result: [@typeInfo(Arch).@"enum".fields.len]Arch = undefined; + for (@typeInfo(Arch).@"enum".fields) |field| { + const arch: Arch = @enumFromInt(field.value); result[archIndex(arch)] = arch; } break :blk result; @@ -60,6 +149,31 @@ const MultiSym = struct { ty: u4, visib: elf.STV, + fn isSingleArch(ms: MultiSym) ?Arch { + var result: ?Arch = null; + inline for (@typeInfo(Arch).@"enum".fields) |field| { + const arch: Arch = @enumFromInt(field.value); + if (ms.present[archIndex(arch)]) { + if (result != null) return null; + result = arch; + } + } + return result; + } + + fn isFamily(ms: MultiSym) ?Family { + var result: ?Family = null; + inline for (@typeInfo(Arch).@"enum".fields) |field| { + const arch: Arch = @enumFromInt(field.value); + if (ms.present[archIndex(arch)]) { + const family = arch.family(); + if (result) |r| if (family != r) return null; + result = family; + } + } + return result; + } + fn allPresent(ms: MultiSym) bool { for (arches, 0..) |_, i| { if (!ms.present[i]) { @@ -69,17 +183,14 @@ const MultiSym = struct { return true; } - fn is32Only(ms: MultiSym) bool { - return ms.present[archIndex(.riscv32)] == true and - ms.present[archIndex(.riscv64)] == false and - ms.present[archIndex(.mips)] == true and - ms.present[archIndex(.mips64)] == false and - ms.present[archIndex(.x86)] == true and - ms.present[archIndex(.x86_64)] == false and - ms.present[archIndex(.powerpc)] == true and - ms.present[archIndex(.powerpc64)] == false and - ms.present[archIndex(.aarch64)] == false and - ms.present[archIndex(.loongarch64)] == false; + fn isTime32Only(ms: MultiSym) bool { + inline for (@typeInfo(Arch).@"enum".fields) |field| { + const arch: Arch = @enumFromInt(field.value); + if (ms.present[archIndex(arch)] != arch.isTime32()) { + return false; + } + } + return true; } fn commonSize(ms: MultiSym) ?u64 { @@ -112,48 +223,11 @@ const MultiSym = struct { return binding.?; } - fn isPtrSize(ms: MultiSym) bool { - const map = .{ - .{ .riscv32, 4 }, - .{ .riscv64, 8 }, - .{ .mips, 4 }, - .{ .mips64, 8 }, - .{ .x86, 4 }, - .{ .x86_64, 8 }, - .{ .powerpc, 4 }, - .{ .powerpc64, 8 }, - .{ .aarch64, 8 }, - .{ .loongarch64, 8 }, - }; - inline for (map) |item| { - const arch = item[0]; - const size = item[1]; + fn isPtrSize(ms: MultiSym, mult: u16) bool { + inline for (@typeInfo(Arch).@"enum".fields) |field| { + const arch: Arch = @enumFromInt(field.value); const arch_index = archIndex(arch); - if (ms.present[arch_index] and ms.size[arch_index] != size) { - return false; - } - } - return true; - } - - fn isPtr2Size(ms: MultiSym) bool { - const map = .{ - .{ .riscv32, 8 }, - .{ .riscv64, 16 }, - .{ .mips, 8 }, - .{ .mips64, 16 }, - .{ .x86, 8 }, - .{ .x86_64, 16 }, - .{ .powerpc, 8 }, - .{ .powerpc64, 16 }, - .{ .aarch64, 16 }, - .{ .loongarch64, 16 }, - }; - inline for (map) |item| { - const arch = item[0]; - const size = item[1]; - const arch_index = archIndex(arch); - if (ms.present[arch_index] and ms.size[arch_index] != size) { + if (ms.present[arch_index] and ms.size[arch_index] != arch.ptrSize() * mult) { return false; } } @@ -161,22 +235,26 @@ const MultiSym = struct { } fn isWeak64(ms: MultiSym) bool { - const map = .{ - .{ .riscv32, 1 }, - .{ .riscv64, 2 }, - .{ .mips, 1 }, - .{ .mips64, 2 }, - .{ .x86, 1 }, - .{ .x86_64, 2 }, - .{ .powerpc, 1 }, - .{ .powerpc64, 2 }, - .{ .aarch64, 2 }, - .{ .loongarch64, 2 }, - }; - inline for (map) |item| { - const arch = item[0]; - const binding = item[1]; + inline for (@typeInfo(Arch).@"enum".fields) |field| { + const arch: Arch = @enumFromInt(field.value); const arch_index = archIndex(arch); + const binding: u4 = switch (arch.ptrSize()) { + 4 => std.elf.STB_GLOBAL, + 8 => std.elf.STB_WEAK, + else => unreachable, + }; + if (ms.present[arch_index] and ms.binding[arch_index] != binding) { + return false; + } + } + return true; + } + + fn isWeakTime64(ms: MultiSym) bool { + inline for (@typeInfo(Arch).@"enum".fields) |field| { + const arch: Arch = @enumFromInt(field.value); + const arch_index = archIndex(arch); + const binding: u4 = if (arch.isTime32()) std.elf.STB_GLOBAL else std.elf.STB_WEAK; if (ms.present[arch_index] and ms.binding[arch_index] != binding) { return false; } @@ -189,10 +267,9 @@ const Parse = struct { arena: mem.Allocator, sym_table: *std.StringArrayHashMap(MultiSym), sections: *std.StringArrayHashMap(void), - blacklist: std.StringArrayHashMap(void), elf_bytes: []align(@alignOf(elf.Elf64_Ehdr)) u8, header: elf.Header, - arch: std.Target.Cpu.Arch, + arch: Arch, }; pub fn main() !void { @@ -207,16 +284,10 @@ pub fn main() !void { var sym_table = std.StringArrayHashMap(MultiSym).init(arena); var sections = std.StringArrayHashMap(void).init(arena); - var blacklist = std.StringArrayHashMap(void).init(arena); - - try blacklist.ensureUnusedCapacity(blacklisted_symbols.len); - for (blacklisted_symbols) |name| { - blacklist.putAssumeCapacityNoClobber(name, {}); - } for (arches) |arch| { const libc_so_path = try std.fmt.allocPrint(arena, "{s}/lib/libc.so", .{ - archMuslName(arch), + @tagName(arch), }); // Read the ELF header. @@ -238,7 +309,6 @@ pub fn main() !void { .arena = arena, .sym_table = &sym_table, .sections = §ions, - .blacklist = blacklist, .elf_bytes = elf_bytes, .header = header, .arch = arch, @@ -268,6 +338,13 @@ pub fn main() !void { \\#define PTR2_SIZE_BYTES 8 \\#endif \\ + \\#ifdef TIME32 + \\#define WEAKTIME64 .globl + \\#else + \\#define WEAKTIME64 .weak + \\#endif + \\ + \\ ); // Sort the symbols for deterministic output and cleaner vcs diffs. @@ -301,7 +378,7 @@ pub fn main() !void { sym_table.sort(SymTableSort{ .sym_table = &sym_table, .sections = §ions }); var prev_section: u16 = std.math.maxInt(u16); - var prev_pp_state: enum { none, ptr32, special } = .none; + var prev_pp_state: union(enum) { all, single: Arch, multi, family: Family, time32 } = .all; for (sym_table.values(), 0..) |multi_sym, sym_index| { const name = sym_table.keys()[sym_index]; @@ -313,32 +390,66 @@ pub fn main() !void { if (multi_sym.allPresent()) { switch (prev_pp_state) { - .none => {}, - .ptr32, .special => { + .all => {}, + .single, .multi, .family, .time32 => { try stdout.writeAll("#endif\n"); - prev_pp_state = .none; + prev_pp_state = .all; }, } - } else if (multi_sym.is32Only()) { + } else if (multi_sym.isSingleArch()) |arch| { switch (prev_pp_state) { - .none => { - try stdout.writeAll("#ifdef PTR32\n"); - prev_pp_state = .ptr32; + .all => { + try stdout.print("#ifdef ARCH_{s}\n", .{@tagName(arch)}); + prev_pp_state = .{ .single = arch }; }, - .special => { - try stdout.writeAll("#endif\n#ifdef PTR32\n"); - prev_pp_state = .ptr32; + .multi, .family, .time32 => { + try stdout.print("#endif\n#ifdef ARCH_{s}\n", .{@tagName(arch)}); + prev_pp_state = .{ .single = arch }; }, - .ptr32 => {}, + .single => |prev_arch| { + if (arch != prev_arch) { + try stdout.print("#endif\n#ifdef ARCH_{s}\n", .{@tagName(arch)}); + prev_pp_state = .{ .single = arch }; + } + }, + } + } else if (multi_sym.isFamily()) |family| { + switch (prev_pp_state) { + .all => { + try stdout.print("#ifdef FAMILY_{s}\n", .{@tagName(family)}); + prev_pp_state = .{ .family = family }; + }, + .single, .multi, .time32 => { + try stdout.print("#endif\n#ifdef FAMILY_{s}\n", .{@tagName(family)}); + prev_pp_state = .{ .family = family }; + }, + .family => |prev_family| { + if (family != prev_family) { + try stdout.print("#endif\n#ifdef FAMILY_{s}\n", .{@tagName(family)}); + prev_pp_state = .{ .family = family }; + } + }, + } + } else if (multi_sym.isTime32Only()) { + switch (prev_pp_state) { + .all => { + try stdout.writeAll("#ifdef TIME32\n"); + prev_pp_state = .time32; + }, + .single, .multi, .family => { + try stdout.writeAll("#endif\n#ifdef TIME32\n"); + prev_pp_state = .time32; + }, + .time32 => {}, } } else { switch (prev_pp_state) { - .none => {}, - .special, .ptr32 => { + .all => {}, + .single, .multi, .family, .time32 => { try stdout.writeAll("#endif\n"); }, } - prev_pp_state = .special; + prev_pp_state = .multi; var first = true; try stdout.writeAll("#if "); @@ -366,6 +477,8 @@ pub fn main() !void { } } else if (multi_sym.isWeak64()) { try stdout.print("WEAK64 {s}\n", .{name}); + } else if (multi_sym.isWeakTime64()) { + try stdout.print("WEAKTIME64 {s}\n", .{name}); } else { for (arches, 0..) |arch, i| { log.info("symbol '{s}' binding on {s}: {d}", .{ @@ -384,9 +497,9 @@ pub fn main() !void { try stdout.print(".type {s}, %object;\n", .{name}); if (multi_sym.commonSize()) |size| { try stdout.print(".size {s}, {d}\n", .{ name, size }); - } else if (multi_sym.isPtrSize()) { + } else if (multi_sym.isPtrSize(1)) { try stdout.print(".size {s}, PTR_SIZE_BYTES\n", .{name}); - } else if (multi_sym.isPtr2Size()) { + } else if (multi_sym.isPtrSize(2)) { try stdout.print(".size {s}, PTR2_SIZE_BYTES\n", .{name}); } else { for (arches, 0..) |arch, i| { @@ -410,8 +523,8 @@ pub fn main() !void { } switch (prev_pp_state) { - .none => {}, - .ptr32, .special => try stdout.writeAll("#endif\n"), + .all => {}, + .single, .multi, .family, .time32 => try stdout.writeAll("#endif\n"), } } @@ -487,12 +600,17 @@ fn parseElf(parse: Parse, comptime is_64: bool, comptime endian: builtin.Endian) const visib = @as(elf.STV, @enumFromInt(@as(u2, @truncate(sym.st_other)))); const size = s(sym.st_size); - if (parse.blacklist.contains(name)) continue; - if (size == 0) { log.warn("{s}: symbol '{s}' has size 0", .{ @tagName(parse.arch), name }); } + if (sym.st_shndx == elf.SHN_UNDEF) { + log.debug("{s}: skipping '{s}' due to it being undefined", .{ + @tagName(parse.arch), name, + }); + continue; + } + switch (binding) { elf.STB_GLOBAL, elf.STB_WEAK => {}, else => { @@ -590,40 +708,8 @@ fn parseElf(parse: Parse, comptime is_64: bool, comptime endian: builtin.Endian) } } -fn archIndex(arch: std.Target.Cpu.Arch) u8 { - return switch (arch) { - // zig fmt: off - .riscv64 => 0, - .mips => 1, - .mips64 => 2, - .x86 => 3, - .x86_64 => 4, - .powerpc => 5, - .powerpc64 => 6, - .aarch64 => 7, - .riscv32 => 8, - .loongarch64 => 9, - else => unreachable, - // zig fmt: on - }; -} - -fn archMuslName(arch: std.Target.Cpu.Arch) []const u8 { - return switch (arch) { - // zig fmt: off - .riscv64 => "riscv64", - .mips => "mips", - .mips64 => "mips64", - .x86 => "i386", - .x86_64 => "x86_64", - .powerpc => "powerpc", - .powerpc64 => "powerpc64", - .aarch64 => "aarch64", - .riscv32 => "riscv32", - .loongarch64 => "loongarch64", - else => unreachable, - // zig fmt: on - }; +fn archIndex(arch: Arch) u8 { + return @intFromEnum(arch); } fn archSetName(arch_set: [arches.len]bool) []const u8 { @@ -639,529 +725,3 @@ fn fatal(comptime format: []const u8, args: anytype) noreturn { log.err(format, args); std.process.exit(1); } - -const blacklisted_symbols = [_][]const u8{ - "__absvdi2", - "__absvsi2", - "__absvti2", - "__adddf3", - "__addkf3", - "__addodi4", - "__addosi4", - "__addoti4", - "__addsf3", - "__addtf3", - "__addxf3", - "__ashldi3", - "__ashlsi3", - "__ashlti3", - "__ashrdi3", - "__ashrsi3", - "__ashrti3", - "__atomic_compare_exchange", - "__atomic_compare_exchange_1", - "__atomic_compare_exchange_2", - "__atomic_compare_exchange_4", - "__atomic_compare_exchange_8", - "__atomic_exchange", - "__atomic_exchange_1", - "__atomic_exchange_2", - "__atomic_exchange_4", - "__atomic_exchange_8", - "__atomic_fetch_add_1", - "__atomic_fetch_add_2", - "__atomic_fetch_add_4", - "__atomic_fetch_add_8", - "__atomic_fetch_and_1", - "__atomic_fetch_and_2", - "__atomic_fetch_and_4", - "__atomic_fetch_and_8", - "__atomic_fetch_nand_1", - "__atomic_fetch_nand_2", - "__atomic_fetch_nand_4", - "__atomic_fetch_nand_8", - "__atomic_fetch_or_1", - "__atomic_fetch_or_2", - "__atomic_fetch_or_4", - "__atomic_fetch_or_8", - "__atomic_fetch_sub_1", - "__atomic_fetch_sub_2", - "__atomic_fetch_sub_4", - "__atomic_fetch_sub_8", - "__atomic_fetch_xor_1", - "__atomic_fetch_xor_2", - "__atomic_fetch_xor_4", - "__atomic_fetch_xor_8", - "__atomic_load", - "__atomic_load_1", - "__atomic_load_2", - "__atomic_load_4", - "__atomic_load_8", - "__atomic_store", - "__atomic_store_1", - "__atomic_store_2", - "__atomic_store_4", - "__atomic_store_8", - "__bswapdi2", - "__bswapsi2", - "__bswapti2", - "__ceilh", - "__ceilx", - "__clear_cache", - "__clzdi2", - "__chk_fail", - "__clzsi2", - "__clzti2", - "__cmpdf2", - "__cmpdi2", - "__cmpsf2", - "__cmpsi2", - "__cmptf2", - "__cmpti2", - "__cosh", - "__cosx", - "__ctzdi2", - "__ctzsi2", - "__ctzti2", - "__divdf3", - "__divdi3", - "__divkf3", - "__divmoddi4", - "__divmodsi4", - "__divmodti4", - "__divsf3", - "__divsi3", - "__divtf3", - "__divti3", - "__divxf3", - "__dlstart", - "__eqdf2", - "__eqkf2", - "__eqsf2", - "__eqtf2", - "__eqxf2", - "__exp2h", - "__exp2x", - "__exph", - "__expx", - "__extenddfkf2", - "__extenddftf2", - "__extenddfxf2", - "__extendhfsf2", - "__extendhftf2", - "__extendhfxf2", - "__extendsfdf2", - "__extendsfkf2", - "__extendsftf2", - "__extendsfxf2", - "__extendxftf2", - "__fabsh", - "__fabsx", - "__ffsdi2", - "__ffssi2", - "__ffsti2", - "__fixdfdi", - "__fixdfsi", - "__fixdfti", - "__fixkfdi", - "__fixkfsi", - "__fixkfti", - "__fixsfdi", - "__fixsfsi", - "__fixsfti", - "__fixtfdi", - "__fixtfsi", - "__fixtfti", - "__fixunsdfdi", - "__fixunsdfsi", - "__fixunsdfti", - "__fixunskfdi", - "__fixunskfsi", - "__fixunskfti", - "__fixunssfdi", - "__fixunssfsi", - "__fixunssfti", - "__fixunstfdi", - "__fixunstfsi", - "__fixunstfti", - "__fixunsxfdi", - "__fixunsxfsi", - "__fixunsxfti", - "__fixxfdi", - "__fixxfsi", - "__fixxfti", - "__floatdidf", - "__floatdikf", - "__floatdisf", - "__floatditf", - "__floatdixf", - "__floatsidf", - "__floatsikf", - "__floatsisf", - "__floatsitf", - "__floatsixf", - "__floattidf", - "__floattikf", - "__floattisf", - "__floattitf", - "__floattixf", - "__floatundidf", - "__floatundikf", - "__floatundisf", - "__floatunditf", - "__floatundixf", - "__floatunsidf", - "__floatunsikf", - "__floatunsisf", - "__floatunsitf", - "__floatunsixf", - "__floatuntidf", - "__floatuntikf", - "__floatuntisf", - "__floatuntitf", - "__floatuntixf", - "__floorh", - "__floorx", - "__fmah", - "__fmax", - "__fmaxh", - "__fmaxx", - "__fminh", - "__fminx", - "__fmodh", - "__fmodx", - "__gedf2", - "__gekf2", - "__gesf2", - "__getf2", - "__gexf2", - "__gnu_f2h_ieee", - "__gnu_h2f_ieee", - "__gtdf2", - "__gtkf2", - "__gtsf2", - "__gttf2", - "__gtxf2", - "__ledf2", - "__lekf2", - "__lesf2", - "__letf2", - "__lexf2", - "__log10h", - "__log10x", - "__log2h", - "__log2x", - "__logh", - "__logx", - "__lshrdi3", - "__lshrsi3", - "__lshrti3", - "__ltdf2", - "__ltkf2", - "__ltsf2", - "__lttf2", - "__ltxf2", - "__memcpy_chk", - "__memmove_chk", - "__memset", - "__memset_chk", - "__moddi3", - "__modsi3", - "__modti3", - "__muldc3", - "__muldf3", - "__muldi3", - "__mulkc3", - "__mulkf3", - "__mulodi4", - "__mulosi4", - "__muloti4", - "__mulsc3", - "__mulsf3", - "__mulsi3", - "__multc3", - "__multf3", - "__multi3", - "__mulxc3", - "__mulxf3", - "__nedf2", - "__negdf2", - "__negdi2", - "__negsf2", - "__negsi2", - "__negti2", - "__negvdi2", - "__negvsi2", - "__negvti2", - "__nekf2", - "__nesf2", - "__netf2", - "__nexf2", - "__paritydi2", - "__paritysi2", - "__parityti2", - "__popcountdi2", - "__popcountsi2", - "__popcountti2", - "__powidf2", - "__powihf2", - "__powikf2", - "__powisf2", - "__powitf2", - "__powixf2", - "__roundh", - "__roundx", - "__sincosh", - "__sincosx", - "__sinh", - "__sinx", - "__sqrth", - "__sqrtx", - "__strcat_chk", - "__strcpy_chk", - "__strncat_chk", - "__strncpy_chk", - "__subdf3", - "__subkf3", - "__subodi4", - "__subosi4", - "__suboti4", - "__subsf3", - "__subtf3", - "__subxf3", - "__tanh", - "__tanx", - "__truncdfhf2", - "__truncdfsf2", - "__trunch", - "__trunckfdf2", - "__trunckfsf2", - "__truncsfhf2", - "__trunctfdf2", - "__trunctfhf2", - "__trunctfsf2", - "__trunctfxf2", - "__truncx", - "__truncxfdf2", - "__truncxfhf2", - "__truncxfsf2", - "__ucmpdi2", - "__ucmpsi2", - "__ucmpti2", - "__udivdi3", - "__udivei4", - "__udivmoddi4", - "__udivmodsi4", - "__udivmodti4", - "__udivsi3", - "__udivti3", - "__umoddi3", - "__umodei4", - "__umodsi3", - "__umodti3", - "__unorddf2", - "__unordkf2", - "__unordsf2", - "__unordtf2", - "__zig_probe_stack", - "ceilf128", - "ceilq", - "cosf128", - "cosq", - "exp2f128", - "exp2q", - "expf128", - "expq", - "fabsf128", - "fabsq", - "fabsq.2", - "fabsq.3", - "floorf128", - "floorq", - "fmaf128", - "fmaq", - "fmaxf128", - "fmaxq", - "fmaxq.2", - "fmaxq.3", - "fminf128", - "fminq", - "fmodf128", - "fmodq", - "log10f128", - "log10q", - "log2f128", - "log2q", - "logf128", - "logq", - "roundf128", - "roundq", - "sincosf128", - "sincosq", - "sinf128", - "sinq", - "sqrtf128", - "sqrtq", - "tanf128", - "tanq", - "truncf128", - "truncq", - "__aarch64_cas16_acq", - "__aarch64_cas16_acq_rel", - "__aarch64_cas16_rel", - "__aarch64_cas16_relax", - "__aarch64_cas1_acq", - "__aarch64_cas1_acq_rel", - "__aarch64_cas1_rel", - "__aarch64_cas1_relax", - "__aarch64_cas2_acq", - "__aarch64_cas2_acq_rel", - "__aarch64_cas2_rel", - "__aarch64_cas2_relax", - "__aarch64_cas4_acq", - "__aarch64_cas4_acq_rel", - "__aarch64_cas4_rel", - "__aarch64_cas4_relax", - "__aarch64_cas8_acq", - "__aarch64_cas8_acq_rel", - "__aarch64_cas8_rel", - "__aarch64_cas8_relax", - "__aarch64_ldadd1_acq", - "__aarch64_ldadd1_acq_rel", - "__aarch64_ldadd1_rel", - "__aarch64_ldadd1_relax", - "__aarch64_ldadd2_acq", - "__aarch64_ldadd2_acq_rel", - "__aarch64_ldadd2_rel", - "__aarch64_ldadd2_relax", - "__aarch64_ldadd4_acq", - "__aarch64_ldadd4_acq_rel", - "__aarch64_ldadd4_rel", - "__aarch64_ldadd4_relax", - "__aarch64_ldadd8_acq", - "__aarch64_ldadd8_acq_rel", - "__aarch64_ldadd8_rel", - "__aarch64_ldadd8_relax", - "__aarch64_ldclr1_acq", - "__aarch64_ldclr1_acq_rel", - "__aarch64_ldclr1_rel", - "__aarch64_ldclr1_relax", - "__aarch64_ldclr2_acq", - "__aarch64_ldclr2_acq_rel", - "__aarch64_ldclr2_rel", - "__aarch64_ldclr2_relax", - "__aarch64_ldclr4_acq", - "__aarch64_ldclr4_acq_rel", - "__aarch64_ldclr4_rel", - "__aarch64_ldclr4_relax", - "__aarch64_ldclr8_acq", - "__aarch64_ldclr8_acq_rel", - "__aarch64_ldclr8_rel", - "__aarch64_ldclr8_relax", - "__aarch64_ldeor1_acq", - "__aarch64_ldeor1_acq_rel", - "__aarch64_ldeor1_rel", - "__aarch64_ldeor1_relax", - "__aarch64_ldeor2_acq", - "__aarch64_ldeor2_acq_rel", - "__aarch64_ldeor2_rel", - "__aarch64_ldeor2_relax", - "__aarch64_ldeor4_acq", - "__aarch64_ldeor4_acq_rel", - "__aarch64_ldeor4_rel", - "__aarch64_ldeor4_relax", - "__aarch64_ldeor8_acq", - "__aarch64_ldeor8_acq_rel", - "__aarch64_ldeor8_rel", - "__aarch64_ldeor8_relax", - "__aarch64_ldset1_acq", - "__aarch64_ldset1_acq_rel", - "__aarch64_ldset1_rel", - "__aarch64_ldset1_relax", - "__aarch64_ldset2_acq", - "__aarch64_ldset2_acq_rel", - "__aarch64_ldset2_rel", - "__aarch64_ldset2_relax", - "__aarch64_ldset4_acq", - "__aarch64_ldset4_acq_rel", - "__aarch64_ldset4_rel", - "__aarch64_ldset4_relax", - "__aarch64_ldset8_acq", - "__aarch64_ldset8_acq_rel", - "__aarch64_ldset8_rel", - "__aarch64_ldset8_relax", - "__aarch64_swp1_acq", - "__aarch64_swp1_acq_rel", - "__aarch64_swp1_rel", - "__aarch64_swp1_relax", - "__aarch64_swp2_acq", - "__aarch64_swp2_acq_rel", - "__aarch64_swp2_rel", - "__aarch64_swp2_relax", - "__aarch64_swp4_acq", - "__aarch64_swp4_acq_rel", - "__aarch64_swp4_rel", - "__aarch64_swp4_relax", - "__aarch64_swp8_acq", - "__aarch64_swp8_acq_rel", - "__aarch64_swp8_rel", - "__aarch64_swp8_relax", - "__addhf3", - "__atomic_compare_exchange_16", - "__atomic_exchange_16", - "__atomic_fetch_add_16", - "__atomic_fetch_and_16", - "__atomic_fetch_nand_16", - "__atomic_fetch_or_16", - "__atomic_fetch_sub_16", - "__atomic_fetch_umax_1", - "__atomic_fetch_umax_16", - "__atomic_fetch_umax_2", - "__atomic_fetch_umax_4", - "__atomic_fetch_umax_8", - "__atomic_fetch_umin_1", - "__atomic_fetch_umin_16", - "__atomic_fetch_umin_2", - "__atomic_fetch_umin_4", - "__atomic_fetch_umin_8", - "__atomic_fetch_xor_16", - "__atomic_load_16", - "__atomic_store_16", - "__cmphf2", - "__cmpxf2", - "__divdc3", - "__divhc3", - "__divhf3", - "__divkc3", - "__divsc3", - "__divtc3", - "__divxc3", - "__eqhf2", - "__extendhfdf2", - "__fixhfdi", - "__fixhfsi", - "__fixhfti", - "__fixunshfdi", - "__fixunshfsi", - "__fixunshfti", - "__floatdihf", - "__floatsihf", - "__floattihf", - "__floatundihf", - "__floatunsihf", - "__floatuntihf", - "__gehf2", - "__gthf2", - "__lehf2", - "__lthf2", - "__mulhc3", - "__mulhf3", - "__neghf2", - "__negkf2", - "__negtf2", - "__negxf2", - "__nehf2", - "__subhf3", - "__unordhf2", - "__unordxf2", -};