mirror of
https://github.com/ziglang/zig.git
synced 2024-11-27 23:52:31 +00:00
0556a2ba53
Finishes cleanups that I started in other commits in this branch. * Use common.linkage for all exports instead of redoing the logic in each file. * Remove pointless `@setRuntimeSafety` calls. * Avoid redundantly exporting multiple versions of functions. For example, if PPC wants `ceilf128` then don't also export `ceilq`; similarly if ARM wants `__aeabi_ddiv` then don't also export `__divdf3`. * Use `inline` for helper functions instead of making inline calls at callsites.
255 lines
7.1 KiB
Zig
255 lines
7.1 KiB
Zig
const std = @import("std");
|
|
const builtin = @import("builtin");
|
|
const is_test = builtin.is_test;
|
|
const common = @import("common.zig");
|
|
|
|
pub const panic = common.panic;
|
|
|
|
comptime {
|
|
@export(__clzsi2, .{ .name = "__clzsi2", .linkage = common.linkage });
|
|
@export(__clzdi2, .{ .name = "__clzdi2", .linkage = common.linkage });
|
|
@export(__clzti2, .{ .name = "__clzti2", .linkage = common.linkage });
|
|
@export(__ctzsi2, .{ .name = "__ctzsi2", .linkage = common.linkage });
|
|
@export(__ctzdi2, .{ .name = "__ctzdi2", .linkage = common.linkage });
|
|
@export(__ctzti2, .{ .name = "__ctzti2", .linkage = common.linkage });
|
|
@export(__ffssi2, .{ .name = "__ffssi2", .linkage = common.linkage });
|
|
@export(__ffsdi2, .{ .name = "__ffsdi2", .linkage = common.linkage });
|
|
@export(__ffsti2, .{ .name = "__ffsti2", .linkage = common.linkage });
|
|
}
|
|
|
|
// clz - count leading zeroes
|
|
// - clzXi2 for unoptimized little and big endian
|
|
// - __clzsi2_thumb1: assume a != 0
|
|
// - __clzsi2_arm32: assume a != 0
|
|
|
|
// ctz - count trailing zeroes
|
|
// - ctzXi2 for unoptimized little and big endian
|
|
|
|
// ffs - find first set
|
|
// * ffs = (a == 0) => 0, (a != 0) => ctz + 1
|
|
// * dont pay for `if (x == 0) return shift;` inside ctz
|
|
// - ffsXi2 for unoptimized little and big endian
|
|
|
|
inline fn clzXi2(comptime T: type, a: T) i32 {
|
|
var x = switch (@bitSizeOf(T)) {
|
|
32 => @bitCast(u32, a),
|
|
64 => @bitCast(u64, a),
|
|
128 => @bitCast(u128, a),
|
|
else => unreachable,
|
|
};
|
|
var n: T = @bitSizeOf(T);
|
|
// Count first bit set using binary search, from Hacker's Delight
|
|
var y: @TypeOf(x) = 0;
|
|
comptime var shift: u8 = @bitSizeOf(T);
|
|
inline while (shift > 0) {
|
|
shift = shift >> 1;
|
|
y = x >> shift;
|
|
if (y != 0) {
|
|
n = n - shift;
|
|
x = y;
|
|
}
|
|
}
|
|
return @intCast(i32, n - @bitCast(T, x));
|
|
}
|
|
|
|
fn __clzsi2_thumb1() callconv(.Naked) void {
|
|
@setRuntimeSafety(false);
|
|
|
|
// Similar to the generic version with the last two rounds replaced by a LUT
|
|
asm volatile (
|
|
\\ movs r1, #32
|
|
\\ lsrs r2, r0, #16
|
|
\\ beq 1f
|
|
\\ subs r1, #16
|
|
\\ movs r0, r2
|
|
\\ 1:
|
|
\\ lsrs r2, r0, #8
|
|
\\ beq 1f
|
|
\\ subs r1, #8
|
|
\\ movs r0, r2
|
|
\\ 1:
|
|
\\ lsrs r2, r0, #4
|
|
\\ beq 1f
|
|
\\ subs r1, #4
|
|
\\ movs r0, r2
|
|
\\ 1:
|
|
\\ ldr r3, =LUT
|
|
\\ ldrb r0, [r3, r0]
|
|
\\ subs r0, r1, r0
|
|
\\ bx lr
|
|
\\ .p2align 2
|
|
\\ // Number of bits set in the 0-15 range
|
|
\\ LUT:
|
|
\\ .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4
|
|
);
|
|
|
|
unreachable;
|
|
}
|
|
|
|
fn __clzsi2_arm32() callconv(.Naked) void {
|
|
@setRuntimeSafety(false);
|
|
|
|
asm volatile (
|
|
\\ // Assumption: n != 0
|
|
\\ // r0: n
|
|
\\ // r1: count of leading zeros in n + 1
|
|
\\ // r2: scratch register for shifted r0
|
|
\\ mov r1, #1
|
|
\\
|
|
\\ // Basic block:
|
|
\\ // if ((r0 >> SHIFT) == 0)
|
|
\\ // r1 += SHIFT;
|
|
\\ // else
|
|
\\ // r0 >>= SHIFT;
|
|
\\ // for descending powers of two as SHIFT.
|
|
\\ lsrs r2, r0, #16
|
|
\\ movne r0, r2
|
|
\\ addeq r1, #16
|
|
\\
|
|
\\ lsrs r2, r0, #8
|
|
\\ movne r0, r2
|
|
\\ addeq r1, #8
|
|
\\
|
|
\\ lsrs r2, r0, #4
|
|
\\ movne r0, r2
|
|
\\ addeq r1, #4
|
|
\\
|
|
\\ lsrs r2, r0, #2
|
|
\\ movne r0, r2
|
|
\\ addeq r1, #2
|
|
\\
|
|
\\ // The basic block invariants at this point are (r0 >> 2) == 0 and
|
|
\\ // r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
|
|
\\ //
|
|
\\ // r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)f
|
|
\\ // ---+----------------+----------------+------------+--------------
|
|
\\ // 1 | 1 | 0 | 0 | 1
|
|
\\ // 2 | 0 | 1 | -1 | 0
|
|
\\ // 3 | 0 | 1 | -1 | 0
|
|
\\ //
|
|
\\ // The r1's initial value of 1 compensates for the 1 here.
|
|
\\ sub r0, r1, r0, lsr #1
|
|
\\ bx lr
|
|
);
|
|
|
|
unreachable;
|
|
}
|
|
|
|
fn clzsi2_generic(a: i32) callconv(.C) i32 {
|
|
return clzXi2(i32, a);
|
|
}
|
|
|
|
pub const __clzsi2 = switch (builtin.cpu.arch) {
|
|
.arm, .armeb, .thumb, .thumbeb => impl: {
|
|
const use_thumb1 =
|
|
(builtin.cpu.arch.isThumb() or
|
|
std.Target.arm.featureSetHas(builtin.cpu.features, .noarm)) and
|
|
!std.Target.arm.featureSetHas(builtin.cpu.features, .thumb2);
|
|
|
|
if (use_thumb1) {
|
|
break :impl __clzsi2_thumb1;
|
|
}
|
|
// From here on we're either targeting Thumb2 or ARM.
|
|
else if (!builtin.cpu.arch.isThumb()) {
|
|
break :impl __clzsi2_arm32;
|
|
}
|
|
// Use the generic implementation otherwise.
|
|
else break :impl clzsi2_generic;
|
|
},
|
|
else => clzsi2_generic,
|
|
};
|
|
|
|
pub fn __clzdi2(a: i64) callconv(.C) i32 {
|
|
return clzXi2(i64, a);
|
|
}
|
|
|
|
pub fn __clzti2(a: i128) callconv(.C) i32 {
|
|
return clzXi2(i128, a);
|
|
}
|
|
|
|
inline fn ctzXi2(comptime T: type, a: T) i32 {
|
|
var x = switch (@bitSizeOf(T)) {
|
|
32 => @bitCast(u32, a),
|
|
64 => @bitCast(u64, a),
|
|
128 => @bitCast(u128, a),
|
|
else => unreachable,
|
|
};
|
|
var n: T = 1;
|
|
// Number of trailing zeroes as binary search, from Hacker's Delight
|
|
var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
|
|
comptime var shift = @bitSizeOf(T);
|
|
if (x == 0) return shift;
|
|
inline while (shift > 1) {
|
|
shift = shift >> 1;
|
|
mask = mask >> shift;
|
|
if ((x & mask) == 0) {
|
|
n = n + shift;
|
|
x = x >> shift;
|
|
}
|
|
}
|
|
return @intCast(i32, n - @bitCast(T, (x & 1)));
|
|
}
|
|
|
|
pub fn __ctzsi2(a: i32) callconv(.C) i32 {
|
|
return ctzXi2(i32, a);
|
|
}
|
|
|
|
pub fn __ctzdi2(a: i64) callconv(.C) i32 {
|
|
return ctzXi2(i64, a);
|
|
}
|
|
|
|
pub fn __ctzti2(a: i128) callconv(.C) i32 {
|
|
return ctzXi2(i128, a);
|
|
}
|
|
|
|
inline fn ffsXi2(comptime T: type, a: T) i32 {
|
|
var x = switch (@bitSizeOf(T)) {
|
|
32 => @bitCast(u32, a),
|
|
64 => @bitCast(u64, a),
|
|
128 => @bitCast(u128, a),
|
|
else => unreachable,
|
|
};
|
|
var n: T = 1;
|
|
// adapted from Number of trailing zeroes (see ctzXi2)
|
|
var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
|
|
comptime var shift = @bitSizeOf(T);
|
|
// In contrast to ctz return 0
|
|
if (x == 0) return 0;
|
|
inline while (shift > 1) {
|
|
shift = shift >> 1;
|
|
mask = mask >> shift;
|
|
if ((x & mask) == 0) {
|
|
n = n + shift;
|
|
x = x >> shift;
|
|
}
|
|
}
|
|
// return ctz + 1
|
|
return @intCast(i32, n - @bitCast(T, (x & 1))) + @as(i32, 1);
|
|
}
|
|
|
|
pub fn __ffssi2(a: i32) callconv(.C) i32 {
|
|
return ffsXi2(i32, a);
|
|
}
|
|
|
|
pub fn __ffsdi2(a: i64) callconv(.C) i32 {
|
|
return ffsXi2(i64, a);
|
|
}
|
|
|
|
pub fn __ffsti2(a: i128) callconv(.C) i32 {
|
|
return ffsXi2(i128, a);
|
|
}
|
|
|
|
test {
|
|
_ = @import("clzsi2_test.zig");
|
|
_ = @import("clzdi2_test.zig");
|
|
_ = @import("clzti2_test.zig");
|
|
|
|
_ = @import("ctzsi2_test.zig");
|
|
_ = @import("ctzdi2_test.zig");
|
|
_ = @import("ctzti2_test.zig");
|
|
|
|
_ = @import("ffssi2_test.zig");
|
|
_ = @import("ffsdi2_test.zig");
|
|
_ = @import("ffsti2_test.zig");
|
|
}
|