mirror of
https://github.com/ziglang/zig.git
synced 2024-11-26 15:12:31 +00:00
Use builtin inference over @as where possible
This commit is contained in:
parent
77b96231a6
commit
d82b359010
@ -38,14 +38,14 @@ pub inline fn addf3(comptime T: type, a: T, b: T) T {
|
||||
bAbs -% @as(Z, 1) >= infRep - @as(Z, 1))
|
||||
{
|
||||
// NaN + anything = qNaN
|
||||
if (aAbs > infRep) return @as(T, @bitCast(@as(Z, @bitCast(a)) | quietBit));
|
||||
if (aAbs > infRep) return @bitCast(@as(Z, @bitCast(a)) | quietBit);
|
||||
// anything + NaN = qNaN
|
||||
if (bAbs > infRep) return @as(T, @bitCast(@as(Z, @bitCast(b)) | quietBit));
|
||||
if (bAbs > infRep) return @bitCast(@as(Z, @bitCast(b)) | quietBit);
|
||||
|
||||
if (aAbs == infRep) {
|
||||
// +/-infinity + -/+infinity = qNaN
|
||||
if ((@as(Z, @bitCast(a)) ^ @as(Z, @bitCast(b))) == signBit) {
|
||||
return @as(T, @bitCast(qnanRep));
|
||||
return @bitCast(qnanRep);
|
||||
}
|
||||
// +/-infinity + anything remaining = +/- infinity
|
||||
else {
|
||||
@ -60,7 +60,7 @@ pub inline fn addf3(comptime T: type, a: T, b: T) T {
|
||||
if (aAbs == 0) {
|
||||
// but we need to get the sign right for zero + zero
|
||||
if (bAbs == 0) {
|
||||
return @as(T, @bitCast(@as(Z, @bitCast(a)) & @as(Z, @bitCast(b))));
|
||||
return @bitCast(@as(Z, @bitCast(a)) & @as(Z, @bitCast(b)));
|
||||
} else {
|
||||
return b;
|
||||
}
|
||||
@ -78,8 +78,8 @@ pub inline fn addf3(comptime T: type, a: T, b: T) T {
|
||||
}
|
||||
|
||||
// Extract the exponent and significand from the (possibly swapped) a and b.
|
||||
var aExponent = @as(i32, @intCast((aRep >> significandBits) & maxExponent));
|
||||
var bExponent = @as(i32, @intCast((bRep >> significandBits) & maxExponent));
|
||||
var aExponent: i32 = @intCast((aRep >> significandBits) & maxExponent);
|
||||
var bExponent: i32 = @intCast((bRep >> significandBits) & maxExponent);
|
||||
var aSignificand = aRep & significandMask;
|
||||
var bSignificand = bRep & significandMask;
|
||||
|
||||
@ -101,7 +101,7 @@ pub inline fn addf3(comptime T: type, a: T, b: T) T {
|
||||
|
||||
// Shift the significand of b by the difference in exponents, with a sticky
|
||||
// bottom bit to get rounding correct.
|
||||
const @"align" = @as(u32, @intCast(aExponent - bExponent));
|
||||
const @"align": u32 = @intCast(aExponent - bExponent);
|
||||
if (@"align" != 0) {
|
||||
if (@"align" < typeWidth) {
|
||||
const sticky = if (bSignificand << @as(S, @intCast(typeWidth - @"align")) != 0) @as(Z, 1) else 0;
|
||||
@ -113,7 +113,7 @@ pub inline fn addf3(comptime T: type, a: T, b: T) T {
|
||||
if (subtraction) {
|
||||
aSignificand -= bSignificand;
|
||||
// If a == -b, return +zero.
|
||||
if (aSignificand == 0) return @as(T, @bitCast(@as(Z, 0)));
|
||||
if (aSignificand == 0) return @bitCast(@as(Z, 0));
|
||||
|
||||
// If partial cancellation occured, we need to left-shift the result
|
||||
// and adjust the exponent:
|
||||
@ -135,13 +135,13 @@ pub inline fn addf3(comptime T: type, a: T, b: T) T {
|
||||
}
|
||||
|
||||
// If we have overflowed the type, return +/- infinity:
|
||||
if (aExponent >= maxExponent) return @as(T, @bitCast(infRep | resultSign));
|
||||
if (aExponent >= maxExponent) return @bitCast(infRep | resultSign);
|
||||
|
||||
if (aExponent <= 0) {
|
||||
// Result is denormal; the exponent and round/sticky bits are zero.
|
||||
// All we need to do is shift the significand and apply the correct sign.
|
||||
aSignificand >>= @as(S, @intCast(4 - aExponent));
|
||||
return @as(T, @bitCast(resultSign | aSignificand));
|
||||
return @bitCast(resultSign | aSignificand);
|
||||
}
|
||||
|
||||
// Low three bits are round, guard, and sticky.
|
||||
@ -164,7 +164,7 @@ pub inline fn addf3(comptime T: type, a: T, b: T) T {
|
||||
if ((result >> significandBits) != 0) result |= integerBit;
|
||||
}
|
||||
|
||||
return @as(T, @bitCast(result));
|
||||
return @bitCast(result);
|
||||
}
|
||||
|
||||
test {
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
const std = @import("std");
|
||||
const math = std.math;
|
||||
const qnan128 = @as(f128, @bitCast(@as(u128, 0x7fff800000000000) << 64));
|
||||
const qnan128: f128 = @bitCast(@as(u128, 0x7fff800000000000) << 64);
|
||||
|
||||
const __addtf3 = @import("addtf3.zig").__addtf3;
|
||||
const __addxf3 = @import("addxf3.zig").__addxf3;
|
||||
@ -14,9 +14,9 @@ const __subtf3 = @import("subtf3.zig").__subtf3;
|
||||
fn test__addtf3(a: f128, b: f128, expected_hi: u64, expected_lo: u64) !void {
|
||||
const x = __addtf3(a, b);
|
||||
|
||||
const rep = @as(u128, @bitCast(x));
|
||||
const hi = @as(u64, @intCast(rep >> 64));
|
||||
const lo = @as(u64, @truncate(rep));
|
||||
const rep: u128 = @bitCast(x);
|
||||
const hi: u64 = @intCast(rep >> 64);
|
||||
const lo: u64 = @truncate(rep);
|
||||
|
||||
if (hi == expected_hi and lo == expected_lo) {
|
||||
return;
|
||||
@ -53,9 +53,9 @@ test "addtf3" {
|
||||
fn test__subtf3(a: f128, b: f128, expected_hi: u64, expected_lo: u64) !void {
|
||||
const x = __subtf3(a, b);
|
||||
|
||||
const rep = @as(u128, @bitCast(x));
|
||||
const hi = @as(u64, @intCast(rep >> 64));
|
||||
const lo = @as(u64, @truncate(rep));
|
||||
const rep: u128 = @bitCast(x);
|
||||
const hi: u64 = @intCast(rep >> 64);
|
||||
const lo: u64 = @truncate(rep);
|
||||
|
||||
if (hi == expected_hi and lo == expected_lo) {
|
||||
return;
|
||||
@ -87,11 +87,11 @@ test "subtf3" {
|
||||
try test__subtf3(0x1.ee9d7c52354a6936ab8d7654321fp-1, 0x1.234567829a3bcdef5678ade36734p+5, 0xc0041b8af1915166, 0xa44a7bca780a166c);
|
||||
}
|
||||
|
||||
const qnan80 = @as(f80, @bitCast(@as(u80, @bitCast(math.nan(f80))) | (1 << (math.floatFractionalBits(f80) - 1))));
|
||||
const qnan80: f80 = @bitCast(@as(u80, @bitCast(math.nan(f80))) | (1 << (math.floatFractionalBits(f80) - 1)));
|
||||
|
||||
fn test__addxf3(a: f80, b: f80, expected: u80) !void {
|
||||
const x = __addxf3(a, b);
|
||||
const rep = @as(u80, @bitCast(x));
|
||||
const rep: u80 = @bitCast(x);
|
||||
|
||||
if (rep == expected)
|
||||
return;
|
||||
|
@ -192,6 +192,6 @@ pub fn __aeabi_ldivmod() callconv(.Naked) void {
|
||||
}
|
||||
|
||||
pub fn __aeabi_drsub(a: f64, b: f64) callconv(.AAPCS) f64 {
|
||||
const neg_a = @as(f64, @bitCast(@as(u64, @bitCast(a)) ^ (@as(u64, 1) << 63)));
|
||||
const neg_a: f64 = @bitCast(@as(u64, @bitCast(a)) ^ (@as(u64, 1) << 63));
|
||||
return b + neg_a;
|
||||
}
|
||||
|
@ -27,11 +27,11 @@ comptime {
|
||||
|
||||
pub fn __ceilh(x: f16) callconv(.C) f16 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f16, @floatCast(ceilf(x)));
|
||||
return @floatCast(ceilf(x));
|
||||
}
|
||||
|
||||
pub fn ceilf(x: f32) callconv(.C) f32 {
|
||||
var u = @as(u32, @bitCast(x));
|
||||
var u: u32 = @bitCast(x);
|
||||
var e = @as(i32, @intCast((u >> 23) & 0xFF)) - 0x7F;
|
||||
var m: u32 = undefined;
|
||||
|
||||
@ -52,7 +52,7 @@ pub fn ceilf(x: f32) callconv(.C) f32 {
|
||||
u += m;
|
||||
}
|
||||
u &= ~m;
|
||||
return @as(f32, @bitCast(u));
|
||||
return @bitCast(u);
|
||||
} else {
|
||||
math.doNotOptimizeAway(x + 0x1.0p120);
|
||||
if (u >> 31 != 0) {
|
||||
@ -66,7 +66,7 @@ pub fn ceilf(x: f32) callconv(.C) f32 {
|
||||
pub fn ceil(x: f64) callconv(.C) f64 {
|
||||
const f64_toint = 1.0 / math.floatEps(f64);
|
||||
|
||||
const u = @as(u64, @bitCast(x));
|
||||
const u: u64 = @bitCast(x);
|
||||
const e = (u >> 52) & 0x7FF;
|
||||
var y: f64 = undefined;
|
||||
|
||||
@ -96,13 +96,13 @@ pub fn ceil(x: f64) callconv(.C) f64 {
|
||||
|
||||
pub fn __ceilx(x: f80) callconv(.C) f80 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f80, @floatCast(ceilq(x)));
|
||||
return @floatCast(ceilq(x));
|
||||
}
|
||||
|
||||
pub fn ceilq(x: f128) callconv(.C) f128 {
|
||||
const f128_toint = 1.0 / math.floatEps(f128);
|
||||
|
||||
const u = @as(u128, @bitCast(x));
|
||||
const u: u128 = @bitCast(x);
|
||||
const e = (u >> 112) & 0x7FFF;
|
||||
var y: f128 = undefined;
|
||||
|
||||
|
@ -2,7 +2,7 @@ const clz = @import("count0bits.zig");
|
||||
const testing = @import("std").testing;
|
||||
|
||||
fn test__clzdi2(a: u64, expected: i64) !void {
|
||||
var x = @as(i64, @bitCast(a));
|
||||
var x: i64 = @bitCast(a);
|
||||
var result = clz.__clzdi2(x);
|
||||
try testing.expectEqual(expected, result);
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ const testing = @import("std").testing;
|
||||
fn test__clzsi2(a: u32, expected: i32) !void {
|
||||
const nakedClzsi2 = clz.__clzsi2;
|
||||
const actualClzsi2 = @as(*const fn (a: i32) callconv(.C) i32, @ptrCast(&nakedClzsi2));
|
||||
const x = @as(i32, @bitCast(a));
|
||||
const x: i32 = @bitCast(a);
|
||||
const result = actualClzsi2(x);
|
||||
try testing.expectEqual(expected, result);
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ const clz = @import("count0bits.zig");
|
||||
const testing = @import("std").testing;
|
||||
|
||||
fn test__clzti2(a: u128, expected: i64) !void {
|
||||
var x = @as(i128, @bitCast(a));
|
||||
var x: i128 = @bitCast(a);
|
||||
var result = clz.__clzti2(x);
|
||||
try testing.expectEqual(expected, result);
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ pub fn cosf(x: f32) callconv(.C) f32 {
|
||||
const c3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
|
||||
const c4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
|
||||
|
||||
var ix = @as(u32, @bitCast(x));
|
||||
var ix: u32 = @bitCast(x);
|
||||
const sign = ix >> 31 != 0;
|
||||
ix &= 0x7fffffff;
|
||||
|
||||
@ -116,12 +116,12 @@ pub fn cos(x: f64) callconv(.C) f64 {
|
||||
|
||||
pub fn __cosx(a: f80) callconv(.C) f80 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f80, @floatCast(cosq(a)));
|
||||
return @floatCast(cosq(a));
|
||||
}
|
||||
|
||||
pub fn cosq(a: f128) callconv(.C) f128 {
|
||||
// TODO: more correct implementation
|
||||
return cos(@as(f64, @floatCast(a)));
|
||||
return cos(@floatCast(a));
|
||||
}
|
||||
|
||||
pub fn cosl(x: c_longdouble) callconv(.C) c_longdouble {
|
||||
|
@ -49,7 +49,7 @@ inline fn clzXi2(comptime T: type, a: T) i32 {
|
||||
x = y;
|
||||
}
|
||||
}
|
||||
return @as(i32, @intCast(n - @as(T, @bitCast(x))));
|
||||
return @intCast(n - @as(T, @bitCast(x)));
|
||||
}
|
||||
|
||||
fn __clzsi2_thumb1() callconv(.Naked) void {
|
||||
@ -187,7 +187,7 @@ inline fn ctzXi2(comptime T: type, a: T) i32 {
|
||||
x = x >> shift;
|
||||
}
|
||||
}
|
||||
return @as(i32, @intCast(n - @as(T, @bitCast((x & 1)))));
|
||||
return @intCast(n - @as(T, @bitCast((x & 1))));
|
||||
}
|
||||
|
||||
pub fn __ctzsi2(a: i32) callconv(.C) i32 {
|
||||
@ -224,7 +224,7 @@ inline fn ffsXi2(comptime T: type, a: T) i32 {
|
||||
}
|
||||
}
|
||||
// return ctz + 1
|
||||
return @as(i32, @intCast(n - @as(T, @bitCast((x & 1))))) + @as(i32, 1);
|
||||
return @as(i32, @intCast(n - @as(T, @bitCast((x & 1))))) + 1;
|
||||
}
|
||||
|
||||
pub fn __ffssi2(a: i32) callconv(.C) i32 {
|
||||
|
@ -2,7 +2,7 @@ const ctz = @import("count0bits.zig");
|
||||
const testing = @import("std").testing;
|
||||
|
||||
fn test__ctzdi2(a: u64, expected: i32) !void {
|
||||
var x = @as(i64, @bitCast(a));
|
||||
var x: i64 = @bitCast(a);
|
||||
var result = ctz.__ctzdi2(x);
|
||||
try testing.expectEqual(expected, result);
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ const ctz = @import("count0bits.zig");
|
||||
const testing = @import("std").testing;
|
||||
|
||||
fn test__ctzsi2(a: u32, expected: i32) !void {
|
||||
var x = @as(i32, @bitCast(a));
|
||||
var x: i32 = @bitCast(a);
|
||||
var result = ctz.__ctzsi2(x);
|
||||
try testing.expectEqual(expected, result);
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ const ctz = @import("count0bits.zig");
|
||||
const testing = @import("std").testing;
|
||||
|
||||
fn test__ctzti2(a: u128, expected: i32) !void {
|
||||
var x = @as(i128, @bitCast(a));
|
||||
var x: i128 = @bitCast(a);
|
||||
var result = ctz.__ctzti2(x);
|
||||
try testing.expectEqual(expected, result);
|
||||
}
|
||||
|
@ -49,8 +49,8 @@ inline fn div(a: f64, b: f64) f64 {
|
||||
const qnanRep = exponentMask | quietBit;
|
||||
const infRep = @as(Z, @bitCast(std.math.inf(f64)));
|
||||
|
||||
const aExponent = @as(u32, @truncate((@as(Z, @bitCast(a)) >> significandBits) & maxExponent));
|
||||
const bExponent = @as(u32, @truncate((@as(Z, @bitCast(b)) >> significandBits) & maxExponent));
|
||||
const aExponent: u32 = @truncate((@as(Z, @bitCast(a)) >> significandBits) & maxExponent);
|
||||
const bExponent: u32 = @truncate((@as(Z, @bitCast(b)) >> significandBits) & maxExponent);
|
||||
const quotientSign: Z = (@as(Z, @bitCast(a)) ^ @as(Z, @bitCast(b))) & signBit;
|
||||
|
||||
var aSignificand: Z = @as(Z, @bitCast(a)) & significandMask;
|
||||
@ -63,36 +63,36 @@ inline fn div(a: f64, b: f64) f64 {
|
||||
const bAbs: Z = @as(Z, @bitCast(b)) & absMask;
|
||||
|
||||
// NaN / anything = qNaN
|
||||
if (aAbs > infRep) return @as(f64, @bitCast(@as(Z, @bitCast(a)) | quietBit));
|
||||
if (aAbs > infRep) return @bitCast(@as(Z, @bitCast(a)) | quietBit);
|
||||
// anything / NaN = qNaN
|
||||
if (bAbs > infRep) return @as(f64, @bitCast(@as(Z, @bitCast(b)) | quietBit));
|
||||
if (bAbs > infRep) return @bitCast(@as(Z, @bitCast(b)) | quietBit);
|
||||
|
||||
if (aAbs == infRep) {
|
||||
// infinity / infinity = NaN
|
||||
if (bAbs == infRep) {
|
||||
return @as(f64, @bitCast(qnanRep));
|
||||
return @bitCast(qnanRep);
|
||||
}
|
||||
// infinity / anything else = +/- infinity
|
||||
else {
|
||||
return @as(f64, @bitCast(aAbs | quotientSign));
|
||||
return @bitCast(aAbs | quotientSign);
|
||||
}
|
||||
}
|
||||
|
||||
// anything else / infinity = +/- 0
|
||||
if (bAbs == infRep) return @as(f64, @bitCast(quotientSign));
|
||||
if (bAbs == infRep) return @bitCast(quotientSign);
|
||||
|
||||
if (aAbs == 0) {
|
||||
// zero / zero = NaN
|
||||
if (bAbs == 0) {
|
||||
return @as(f64, @bitCast(qnanRep));
|
||||
return @bitCast(qnanRep);
|
||||
}
|
||||
// zero / anything else = +/- zero
|
||||
else {
|
||||
return @as(f64, @bitCast(quotientSign));
|
||||
return @bitCast(quotientSign);
|
||||
}
|
||||
}
|
||||
// anything else / zero = +/- infinity
|
||||
if (bAbs == 0) return @as(f64, @bitCast(infRep | quotientSign));
|
||||
if (bAbs == 0) return @bitCast(infRep | quotientSign);
|
||||
|
||||
// one or both of a or b is denormal, the other (if applicable) is a
|
||||
// normal number. Renormalize one or both of a and b, and set scale to
|
||||
@ -112,7 +112,7 @@ inline fn div(a: f64, b: f64) f64 {
|
||||
// [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
|
||||
// polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
|
||||
// is accurate to about 3.5 binary digits.
|
||||
const q31b: u32 = @as(u32, @truncate(bSignificand >> 21));
|
||||
const q31b: u32 = @truncate(bSignificand >> 21);
|
||||
var recip32 = @as(u32, 0x7504f333) -% q31b;
|
||||
|
||||
// Now refine the reciprocal estimate using a Newton-Raphson iteration:
|
||||
@ -123,12 +123,12 @@ inline fn div(a: f64, b: f64) f64 {
|
||||
// with each iteration, so after three iterations, we have about 28 binary
|
||||
// digits of accuracy.
|
||||
var correction32: u32 = undefined;
|
||||
correction32 = @as(u32, @truncate(~(@as(u64, recip32) *% q31b >> 32) +% 1));
|
||||
recip32 = @as(u32, @truncate(@as(u64, recip32) *% correction32 >> 31));
|
||||
correction32 = @as(u32, @truncate(~(@as(u64, recip32) *% q31b >> 32) +% 1));
|
||||
recip32 = @as(u32, @truncate(@as(u64, recip32) *% correction32 >> 31));
|
||||
correction32 = @as(u32, @truncate(~(@as(u64, recip32) *% q31b >> 32) +% 1));
|
||||
recip32 = @as(u32, @truncate(@as(u64, recip32) *% correction32 >> 31));
|
||||
correction32 = @truncate(~(@as(u64, recip32) *% q31b >> 32) +% 1);
|
||||
recip32 = @truncate(@as(u64, recip32) *% correction32 >> 31);
|
||||
correction32 = @truncate(~(@as(u64, recip32) *% q31b >> 32) +% 1);
|
||||
recip32 = @truncate(@as(u64, recip32) *% correction32 >> 31);
|
||||
correction32 = @truncate(~(@as(u64, recip32) *% q31b >> 32) +% 1);
|
||||
recip32 = @truncate(@as(u64, recip32) *% correction32 >> 31);
|
||||
|
||||
// recip32 might have overflowed to exactly zero in the preceding
|
||||
// computation if the high word of b is exactly 1.0. This would sabotage
|
||||
@ -138,12 +138,12 @@ inline fn div(a: f64, b: f64) f64 {
|
||||
|
||||
// We need to perform one more iteration to get us to 56 binary digits;
|
||||
// The last iteration needs to happen with extra precision.
|
||||
const q63blo: u32 = @as(u32, @truncate(bSignificand << 11));
|
||||
const q63blo: u32 = @truncate(bSignificand << 11);
|
||||
var correction: u64 = undefined;
|
||||
var reciprocal: u64 = undefined;
|
||||
correction = ~(@as(u64, recip32) *% q31b +% (@as(u64, recip32) *% q63blo >> 32)) +% 1;
|
||||
const cHi = @as(u32, @truncate(correction >> 32));
|
||||
const cLo = @as(u32, @truncate(correction));
|
||||
const cHi: u32 = @truncate(correction >> 32);
|
||||
const cLo: u32 = @truncate(correction);
|
||||
reciprocal = @as(u64, recip32) *% cHi +% (@as(u64, recip32) *% cLo >> 32);
|
||||
|
||||
// We already adjusted the 32-bit estimate, now we need to adjust the final
|
||||
@ -195,7 +195,7 @@ inline fn div(a: f64, b: f64) f64 {
|
||||
|
||||
if (writtenExponent >= maxExponent) {
|
||||
// If we have overflowed the exponent, return infinity.
|
||||
return @as(f64, @bitCast(infRep | quotientSign));
|
||||
return @bitCast(infRep | quotientSign);
|
||||
} else if (writtenExponent < 1) {
|
||||
if (writtenExponent == 0) {
|
||||
// Check whether the rounded result is normal.
|
||||
@ -206,12 +206,12 @@ inline fn div(a: f64, b: f64) f64 {
|
||||
absResult += round;
|
||||
if ((absResult & ~significandMask) != 0) {
|
||||
// The rounded result is normal; return it.
|
||||
return @as(f64, @bitCast(absResult | quotientSign));
|
||||
return @bitCast(absResult | quotientSign);
|
||||
}
|
||||
}
|
||||
// Flush denormals to zero. In the future, it would be nice to add
|
||||
// code to round them correctly.
|
||||
return @as(f64, @bitCast(quotientSign));
|
||||
return @bitCast(quotientSign);
|
||||
} else {
|
||||
const round = @intFromBool((residual << 1) > bSignificand);
|
||||
// Clear the implicit bit
|
||||
@ -221,7 +221,7 @@ inline fn div(a: f64, b: f64) f64 {
|
||||
// Round
|
||||
absResult +%= round;
|
||||
// Insert the sign and return
|
||||
return @as(f64, @bitCast(absResult | quotientSign));
|
||||
return @bitCast(absResult | quotientSign);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@ const __divdf3 = @import("divdf3.zig").__divdf3;
|
||||
const testing = @import("std").testing;
|
||||
|
||||
fn compareResultD(result: f64, expected: u64) bool {
|
||||
const rep = @as(u64, @bitCast(result));
|
||||
const rep: u64 = @bitCast(result);
|
||||
|
||||
if (rep == expected) {
|
||||
return true;
|
||||
|
@ -7,5 +7,5 @@ comptime {
|
||||
|
||||
pub fn __divhf3(a: f16, b: f16) callconv(.C) f16 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f16, @floatCast(divsf3.__divsf3(a, b)));
|
||||
return @floatCast(divsf3.__divsf3(a, b));
|
||||
}
|
||||
|
@ -44,10 +44,10 @@ inline fn div(a: f32, b: f32) f32 {
|
||||
const absMask = signBit - 1;
|
||||
const exponentMask = absMask ^ significandMask;
|
||||
const qnanRep = exponentMask | quietBit;
|
||||
const infRep = @as(Z, @bitCast(std.math.inf(f32)));
|
||||
const infRep: Z = @bitCast(std.math.inf(f32));
|
||||
|
||||
const aExponent = @as(u32, @truncate((@as(Z, @bitCast(a)) >> significandBits) & maxExponent));
|
||||
const bExponent = @as(u32, @truncate((@as(Z, @bitCast(b)) >> significandBits) & maxExponent));
|
||||
const aExponent: u32 = @truncate((@as(Z, @bitCast(a)) >> significandBits) & maxExponent);
|
||||
const bExponent: u32 = @truncate((@as(Z, @bitCast(b)) >> significandBits) & maxExponent);
|
||||
const quotientSign: Z = (@as(Z, @bitCast(a)) ^ @as(Z, @bitCast(b))) & signBit;
|
||||
|
||||
var aSignificand: Z = @as(Z, @bitCast(a)) & significandMask;
|
||||
@ -60,36 +60,36 @@ inline fn div(a: f32, b: f32) f32 {
|
||||
const bAbs: Z = @as(Z, @bitCast(b)) & absMask;
|
||||
|
||||
// NaN / anything = qNaN
|
||||
if (aAbs > infRep) return @as(f32, @bitCast(@as(Z, @bitCast(a)) | quietBit));
|
||||
if (aAbs > infRep) return @bitCast(@as(Z, @bitCast(a)) | quietBit);
|
||||
// anything / NaN = qNaN
|
||||
if (bAbs > infRep) return @as(f32, @bitCast(@as(Z, @bitCast(b)) | quietBit));
|
||||
if (bAbs > infRep) return @bitCast(@as(Z, @bitCast(b)) | quietBit);
|
||||
|
||||
if (aAbs == infRep) {
|
||||
// infinity / infinity = NaN
|
||||
if (bAbs == infRep) {
|
||||
return @as(f32, @bitCast(qnanRep));
|
||||
return @bitCast(qnanRep);
|
||||
}
|
||||
// infinity / anything else = +/- infinity
|
||||
else {
|
||||
return @as(f32, @bitCast(aAbs | quotientSign));
|
||||
return @bitCast(aAbs | quotientSign);
|
||||
}
|
||||
}
|
||||
|
||||
// anything else / infinity = +/- 0
|
||||
if (bAbs == infRep) return @as(f32, @bitCast(quotientSign));
|
||||
if (bAbs == infRep) return @bitCast(quotientSign);
|
||||
|
||||
if (aAbs == 0) {
|
||||
// zero / zero = NaN
|
||||
if (bAbs == 0) {
|
||||
return @as(f32, @bitCast(qnanRep));
|
||||
return @bitCast(qnanRep);
|
||||
}
|
||||
// zero / anything else = +/- zero
|
||||
else {
|
||||
return @as(f32, @bitCast(quotientSign));
|
||||
return @bitCast(quotientSign);
|
||||
}
|
||||
}
|
||||
// anything else / zero = +/- infinity
|
||||
if (bAbs == 0) return @as(f32, @bitCast(infRep | quotientSign));
|
||||
if (bAbs == 0) return @bitCast(infRep | quotientSign);
|
||||
|
||||
// one or both of a or b is denormal, the other (if applicable) is a
|
||||
// normal number. Renormalize one or both of a and b, and set scale to
|
||||
@ -120,12 +120,12 @@ inline fn div(a: f32, b: f32) f32 {
|
||||
// with each iteration, so after three iterations, we have about 28 binary
|
||||
// digits of accuracy.
|
||||
var correction: u32 = undefined;
|
||||
correction = @as(u32, @truncate(~(@as(u64, reciprocal) *% q31b >> 32) +% 1));
|
||||
reciprocal = @as(u32, @truncate(@as(u64, reciprocal) *% correction >> 31));
|
||||
correction = @as(u32, @truncate(~(@as(u64, reciprocal) *% q31b >> 32) +% 1));
|
||||
reciprocal = @as(u32, @truncate(@as(u64, reciprocal) *% correction >> 31));
|
||||
correction = @as(u32, @truncate(~(@as(u64, reciprocal) *% q31b >> 32) +% 1));
|
||||
reciprocal = @as(u32, @truncate(@as(u64, reciprocal) *% correction >> 31));
|
||||
correction = @truncate(~(@as(u64, reciprocal) *% q31b >> 32) +% 1);
|
||||
reciprocal = @truncate(@as(u64, reciprocal) *% correction >> 31);
|
||||
correction = @truncate(~(@as(u64, reciprocal) *% q31b >> 32) +% 1);
|
||||
reciprocal = @truncate(@as(u64, reciprocal) *% correction >> 31);
|
||||
correction = @truncate(~(@as(u64, reciprocal) *% q31b >> 32) +% 1);
|
||||
reciprocal = @truncate(@as(u64, reciprocal) *% correction >> 31);
|
||||
|
||||
// Exhaustive testing shows that the error in reciprocal after three steps
|
||||
// is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our
|
||||
@ -147,7 +147,7 @@ inline fn div(a: f32, b: f32) f32 {
|
||||
// is the error in the reciprocal of b scaled by the maximum
|
||||
// possible value of a. As a consequence of this error bound,
|
||||
// either q or nextafter(q) is the correctly rounded
|
||||
var quotient: Z = @as(u32, @truncate(@as(u64, reciprocal) *% (aSignificand << 1) >> 32));
|
||||
var quotient: Z = @truncate(@as(u64, reciprocal) *% (aSignificand << 1) >> 32);
|
||||
|
||||
// Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
|
||||
// In either case, we are going to compute a residual of the form
|
||||
@ -175,7 +175,7 @@ inline fn div(a: f32, b: f32) f32 {
|
||||
|
||||
if (writtenExponent >= maxExponent) {
|
||||
// If we have overflowed the exponent, return infinity.
|
||||
return @as(f32, @bitCast(infRep | quotientSign));
|
||||
return @bitCast(infRep | quotientSign);
|
||||
} else if (writtenExponent < 1) {
|
||||
if (writtenExponent == 0) {
|
||||
// Check whether the rounded result is normal.
|
||||
@ -186,12 +186,12 @@ inline fn div(a: f32, b: f32) f32 {
|
||||
absResult += round;
|
||||
if ((absResult & ~significandMask) > 0) {
|
||||
// The rounded result is normal; return it.
|
||||
return @as(f32, @bitCast(absResult | quotientSign));
|
||||
return @bitCast(absResult | quotientSign);
|
||||
}
|
||||
}
|
||||
// Flush denormals to zero. In the future, it would be nice to add
|
||||
// code to round them correctly.
|
||||
return @as(f32, @bitCast(quotientSign));
|
||||
return @bitCast(quotientSign);
|
||||
} else {
|
||||
const round = @intFromBool((residual << 1) > bSignificand);
|
||||
// Clear the implicit bit
|
||||
@ -201,7 +201,7 @@ inline fn div(a: f32, b: f32) f32 {
|
||||
// Round
|
||||
absResult +%= round;
|
||||
// Insert the sign and return
|
||||
return @as(f32, @bitCast(absResult | quotientSign));
|
||||
return @bitCast(absResult | quotientSign);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@ const __divsf3 = @import("divsf3.zig").__divsf3;
|
||||
const testing = @import("std").testing;
|
||||
|
||||
fn compareResultF(result: f32, expected: u32) bool {
|
||||
const rep = @as(u32, @bitCast(result));
|
||||
const rep: u32 = @bitCast(result);
|
||||
|
||||
if (rep == expected) {
|
||||
return true;
|
||||
|
@ -41,10 +41,10 @@ inline fn div(a: f128, b: f128) f128 {
|
||||
const absMask = signBit - 1;
|
||||
const exponentMask = absMask ^ significandMask;
|
||||
const qnanRep = exponentMask | quietBit;
|
||||
const infRep = @as(Z, @bitCast(std.math.inf(f128)));
|
||||
const infRep: Z = @bitCast(std.math.inf(f128));
|
||||
|
||||
const aExponent = @as(u32, @truncate((@as(Z, @bitCast(a)) >> significandBits) & maxExponent));
|
||||
const bExponent = @as(u32, @truncate((@as(Z, @bitCast(b)) >> significandBits) & maxExponent));
|
||||
const aExponent: u32 = @truncate((@as(Z, @bitCast(a)) >> significandBits) & maxExponent);
|
||||
const bExponent: u32 = @truncate((@as(Z, @bitCast(b)) >> significandBits) & maxExponent);
|
||||
const quotientSign: Z = (@as(Z, @bitCast(a)) ^ @as(Z, @bitCast(b))) & signBit;
|
||||
|
||||
var aSignificand: Z = @as(Z, @bitCast(a)) & significandMask;
|
||||
@ -57,36 +57,36 @@ inline fn div(a: f128, b: f128) f128 {
|
||||
const bAbs: Z = @as(Z, @bitCast(b)) & absMask;
|
||||
|
||||
// NaN / anything = qNaN
|
||||
if (aAbs > infRep) return @as(f128, @bitCast(@as(Z, @bitCast(a)) | quietBit));
|
||||
if (aAbs > infRep) return @bitCast(@as(Z, @bitCast(a)) | quietBit);
|
||||
// anything / NaN = qNaN
|
||||
if (bAbs > infRep) return @as(f128, @bitCast(@as(Z, @bitCast(b)) | quietBit));
|
||||
if (bAbs > infRep) return @bitCast(@as(Z, @bitCast(b)) | quietBit);
|
||||
|
||||
if (aAbs == infRep) {
|
||||
// infinity / infinity = NaN
|
||||
if (bAbs == infRep) {
|
||||
return @as(f128, @bitCast(qnanRep));
|
||||
return @bitCast(qnanRep);
|
||||
}
|
||||
// infinity / anything else = +/- infinity
|
||||
else {
|
||||
return @as(f128, @bitCast(aAbs | quotientSign));
|
||||
return @bitCast(aAbs | quotientSign);
|
||||
}
|
||||
}
|
||||
|
||||
// anything else / infinity = +/- 0
|
||||
if (bAbs == infRep) return @as(f128, @bitCast(quotientSign));
|
||||
if (bAbs == infRep) return @bitCast(quotientSign);
|
||||
|
||||
if (aAbs == 0) {
|
||||
// zero / zero = NaN
|
||||
if (bAbs == 0) {
|
||||
return @as(f128, @bitCast(qnanRep));
|
||||
return @bitCast(qnanRep);
|
||||
}
|
||||
// zero / anything else = +/- zero
|
||||
else {
|
||||
return @as(f128, @bitCast(quotientSign));
|
||||
return @bitCast(quotientSign);
|
||||
}
|
||||
}
|
||||
// anything else / zero = +/- infinity
|
||||
if (bAbs == 0) return @as(f128, @bitCast(infRep | quotientSign));
|
||||
if (bAbs == 0) return @bitCast(infRep | quotientSign);
|
||||
|
||||
// one or both of a or b is denormal, the other (if applicable) is a
|
||||
// normal number. Renormalize one or both of a and b, and set scale to
|
||||
@ -106,7 +106,7 @@ inline fn div(a: f128, b: f128) f128 {
|
||||
// [1, 2.0) and get a Q64 approximate reciprocal using a small minimax
|
||||
// polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
|
||||
// is accurate to about 3.5 binary digits.
|
||||
const q63b = @as(u64, @truncate(bSignificand >> 49));
|
||||
const q63b: u64 = @truncate(bSignificand >> 49);
|
||||
var recip64 = @as(u64, 0x7504f333F9DE6484) -% q63b;
|
||||
// 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2)
|
||||
|
||||
@ -117,16 +117,16 @@ inline fn div(a: f128, b: f128) f128 {
|
||||
// This doubles the number of correct binary digits in the approximation
|
||||
// with each iteration.
|
||||
var correction64: u64 = undefined;
|
||||
correction64 = @as(u64, @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1));
|
||||
recip64 = @as(u64, @truncate(@as(u128, recip64) *% correction64 >> 63));
|
||||
correction64 = @as(u64, @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1));
|
||||
recip64 = @as(u64, @truncate(@as(u128, recip64) *% correction64 >> 63));
|
||||
correction64 = @as(u64, @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1));
|
||||
recip64 = @as(u64, @truncate(@as(u128, recip64) *% correction64 >> 63));
|
||||
correction64 = @as(u64, @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1));
|
||||
recip64 = @as(u64, @truncate(@as(u128, recip64) *% correction64 >> 63));
|
||||
correction64 = @as(u64, @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1));
|
||||
recip64 = @as(u64, @truncate(@as(u128, recip64) *% correction64 >> 63));
|
||||
correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
|
||||
recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
|
||||
correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
|
||||
recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
|
||||
correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
|
||||
recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
|
||||
correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
|
||||
recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
|
||||
correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
|
||||
recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
|
||||
|
||||
// The reciprocal may have overflowed to zero if the upper half of b is
|
||||
// exactly 1.0. This would sabatoge the full-width final stage of the
|
||||
@ -135,7 +135,7 @@ inline fn div(a: f128, b: f128) f128 {
|
||||
|
||||
// We need to perform one more iteration to get us to 112 binary digits;
|
||||
// The last iteration needs to happen with extra precision.
|
||||
const q127blo: u64 = @as(u64, @truncate(bSignificand << 15));
|
||||
const q127blo: u64 = @truncate(bSignificand << 15);
|
||||
var correction: u128 = undefined;
|
||||
var reciprocal: u128 = undefined;
|
||||
|
||||
@ -151,8 +151,8 @@ inline fn div(a: f128, b: f128) f128 {
|
||||
|
||||
correction = -%(r64q63 + (r64q127 >> 64));
|
||||
|
||||
const cHi = @as(u64, @truncate(correction >> 64));
|
||||
const cLo = @as(u64, @truncate(correction));
|
||||
const cHi: u64 = @truncate(correction >> 64);
|
||||
const cLo: u64 = @truncate(correction);
|
||||
|
||||
wideMultiply(u128, recip64, cHi, &dummy, &r64cH);
|
||||
wideMultiply(u128, recip64, cLo, &dummy, &r64cL);
|
||||
@ -210,7 +210,7 @@ inline fn div(a: f128, b: f128) f128 {
|
||||
|
||||
if (writtenExponent >= maxExponent) {
|
||||
// If we have overflowed the exponent, return infinity.
|
||||
return @as(f128, @bitCast(infRep | quotientSign));
|
||||
return @bitCast(infRep | quotientSign);
|
||||
} else if (writtenExponent < 1) {
|
||||
if (writtenExponent == 0) {
|
||||
// Check whether the rounded result is normal.
|
||||
@ -221,12 +221,12 @@ inline fn div(a: f128, b: f128) f128 {
|
||||
absResult += round;
|
||||
if ((absResult & ~significandMask) > 0) {
|
||||
// The rounded result is normal; return it.
|
||||
return @as(f128, @bitCast(absResult | quotientSign));
|
||||
return @bitCast(absResult | quotientSign);
|
||||
}
|
||||
}
|
||||
// Flush denormals to zero. In the future, it would be nice to add
|
||||
// code to round them correctly.
|
||||
return @as(f128, @bitCast(quotientSign));
|
||||
return @bitCast(quotientSign);
|
||||
} else {
|
||||
const round = @intFromBool((residual << 1) >= bSignificand);
|
||||
// Clear the implicit bit
|
||||
@ -236,7 +236,7 @@ inline fn div(a: f128, b: f128) f128 {
|
||||
// Round
|
||||
absResult +%= round;
|
||||
// Insert the sign and return
|
||||
return @as(f128, @bitCast(absResult | quotientSign));
|
||||
return @bitCast(absResult | quotientSign);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,9 +5,9 @@ const testing = std.testing;
|
||||
const __divtf3 = @import("divtf3.zig").__divtf3;
|
||||
|
||||
fn compareResultLD(result: f128, expectedHi: u64, expectedLo: u64) bool {
|
||||
const rep = @as(u128, @bitCast(result));
|
||||
const hi = @as(u64, @truncate(rep >> 64));
|
||||
const lo = @as(u64, @truncate(rep));
|
||||
const rep: u128 = @bitCast(result);
|
||||
const hi: u64 = @truncate(rep >> 64);
|
||||
const lo: u64 = @truncate(rep);
|
||||
|
||||
if (hi == expectedHi and lo == expectedLo) {
|
||||
return true;
|
||||
|
@ -30,10 +30,10 @@ pub fn __divxf3(a: f80, b: f80) callconv(.C) f80 {
|
||||
|
||||
const absMask = signBit - 1;
|
||||
const qnanRep = @as(Z, @bitCast(std.math.nan(T))) | quietBit;
|
||||
const infRep = @as(Z, @bitCast(std.math.inf(T)));
|
||||
const infRep: Z = @bitCast(std.math.inf(T));
|
||||
|
||||
const aExponent = @as(u32, @truncate((@as(Z, @bitCast(a)) >> significandBits) & maxExponent));
|
||||
const bExponent = @as(u32, @truncate((@as(Z, @bitCast(b)) >> significandBits) & maxExponent));
|
||||
const aExponent: u32 = @truncate((@as(Z, @bitCast(a)) >> significandBits) & maxExponent);
|
||||
const bExponent: u32 = @truncate((@as(Z, @bitCast(b)) >> significandBits) & maxExponent);
|
||||
const quotientSign: Z = (@as(Z, @bitCast(a)) ^ @as(Z, @bitCast(b))) & signBit;
|
||||
|
||||
var aSignificand: Z = @as(Z, @bitCast(a)) & significandMask;
|
||||
@ -46,36 +46,36 @@ pub fn __divxf3(a: f80, b: f80) callconv(.C) f80 {
|
||||
const bAbs: Z = @as(Z, @bitCast(b)) & absMask;
|
||||
|
||||
// NaN / anything = qNaN
|
||||
if (aAbs > infRep) return @as(T, @bitCast(@as(Z, @bitCast(a)) | quietBit));
|
||||
if (aAbs > infRep) return @bitCast(@as(Z, @bitCast(a)) | quietBit);
|
||||
// anything / NaN = qNaN
|
||||
if (bAbs > infRep) return @as(T, @bitCast(@as(Z, @bitCast(b)) | quietBit));
|
||||
if (bAbs > infRep) return @bitCast(@as(Z, @bitCast(b)) | quietBit);
|
||||
|
||||
if (aAbs == infRep) {
|
||||
// infinity / infinity = NaN
|
||||
if (bAbs == infRep) {
|
||||
return @as(T, @bitCast(qnanRep));
|
||||
return @bitCast(qnanRep);
|
||||
}
|
||||
// infinity / anything else = +/- infinity
|
||||
else {
|
||||
return @as(T, @bitCast(aAbs | quotientSign));
|
||||
return @bitCast(aAbs | quotientSign);
|
||||
}
|
||||
}
|
||||
|
||||
// anything else / infinity = +/- 0
|
||||
if (bAbs == infRep) return @as(T, @bitCast(quotientSign));
|
||||
if (bAbs == infRep) return @bitCast(quotientSign);
|
||||
|
||||
if (aAbs == 0) {
|
||||
// zero / zero = NaN
|
||||
if (bAbs == 0) {
|
||||
return @as(T, @bitCast(qnanRep));
|
||||
return @bitCast(qnanRep);
|
||||
}
|
||||
// zero / anything else = +/- zero
|
||||
else {
|
||||
return @as(T, @bitCast(quotientSign));
|
||||
return @bitCast(quotientSign);
|
||||
}
|
||||
}
|
||||
// anything else / zero = +/- infinity
|
||||
if (bAbs == 0) return @as(T, @bitCast(infRep | quotientSign));
|
||||
if (bAbs == 0) return @bitCast(infRep | quotientSign);
|
||||
|
||||
// one or both of a or b is denormal, the other (if applicable) is a
|
||||
// normal number. Renormalize one or both of a and b, and set scale to
|
||||
@ -89,7 +89,7 @@ pub fn __divxf3(a: f80, b: f80) callconv(.C) f80 {
|
||||
// [1, 2.0) and get a Q64 approximate reciprocal using a small minimax
|
||||
// polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This
|
||||
// is accurate to about 3.5 binary digits.
|
||||
const q63b = @as(u64, @intCast(bSignificand));
|
||||
const q63b: u64 = @intCast(bSignificand);
|
||||
var recip64 = @as(u64, 0x7504f333F9DE6484) -% q63b;
|
||||
// 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2)
|
||||
|
||||
@ -100,16 +100,16 @@ pub fn __divxf3(a: f80, b: f80) callconv(.C) f80 {
|
||||
// This doubles the number of correct binary digits in the approximation
|
||||
// with each iteration.
|
||||
var correction64: u64 = undefined;
|
||||
correction64 = @as(u64, @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1));
|
||||
recip64 = @as(u64, @truncate(@as(u128, recip64) *% correction64 >> 63));
|
||||
correction64 = @as(u64, @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1));
|
||||
recip64 = @as(u64, @truncate(@as(u128, recip64) *% correction64 >> 63));
|
||||
correction64 = @as(u64, @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1));
|
||||
recip64 = @as(u64, @truncate(@as(u128, recip64) *% correction64 >> 63));
|
||||
correction64 = @as(u64, @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1));
|
||||
recip64 = @as(u64, @truncate(@as(u128, recip64) *% correction64 >> 63));
|
||||
correction64 = @as(u64, @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1));
|
||||
recip64 = @as(u64, @truncate(@as(u128, recip64) *% correction64 >> 63));
|
||||
correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
|
||||
recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
|
||||
correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
|
||||
recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
|
||||
correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
|
||||
recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
|
||||
correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
|
||||
recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
|
||||
correction64 = @truncate(~(@as(u128, recip64) *% q63b >> 64) +% 1);
|
||||
recip64 = @truncate(@as(u128, recip64) *% correction64 >> 63);
|
||||
|
||||
// The reciprocal may have overflowed to zero if the upper half of b is
|
||||
// exactly 1.0. This would sabatoge the full-width final stage of the
|
||||
@ -128,8 +128,8 @@ pub fn __divxf3(a: f80, b: f80) callconv(.C) f80 {
|
||||
|
||||
correction = -%correction;
|
||||
|
||||
const cHi = @as(u64, @truncate(correction >> 64));
|
||||
const cLo = @as(u64, @truncate(correction));
|
||||
const cHi: u64 = @truncate(correction >> 64);
|
||||
const cLo: u64 = @truncate(correction);
|
||||
|
||||
var r64cH: u128 = undefined;
|
||||
var r64cL: u128 = undefined;
|
||||
|
@ -5,7 +5,7 @@ const testing = std.testing;
|
||||
const __divxf3 = @import("divxf3.zig").__divxf3;
|
||||
|
||||
fn compareResult(result: f80, expected: u80) bool {
|
||||
const rep = @as(u80, @bitCast(result));
|
||||
const rep: u80 = @bitCast(result);
|
||||
|
||||
if (rep == expected) return true;
|
||||
// test other possible NaN representations (signal NaN)
|
||||
@ -25,9 +25,9 @@ fn test__divxf3(a: f80, b: f80) !void {
|
||||
const x = __divxf3(a, b);
|
||||
|
||||
// Next float (assuming normal, non-zero result)
|
||||
const x_plus_eps = @as(f80, @bitCast((@as(u80, @bitCast(x)) + 1) | integerBit));
|
||||
const x_plus_eps: f80 = @bitCast((@as(u80, @bitCast(x)) + 1) | integerBit);
|
||||
// Prev float (assuming normal, non-zero result)
|
||||
const x_minus_eps = @as(f80, @bitCast((@as(u80, @bitCast(x)) - 1) | integerBit));
|
||||
const x_minus_eps: f80 = @bitCast((@as(u80, @bitCast(x)) - 1) | integerBit);
|
||||
|
||||
// Make sure result is more accurate than the adjacent floats
|
||||
const err_x = @fabs(@mulAdd(f80, x, b, -a));
|
||||
|
@ -125,7 +125,7 @@ const ObjectArray = struct {
|
||||
if (self.slots[index] == null) {
|
||||
// initialize the slot
|
||||
const size = control.size;
|
||||
const alignment = @as(u29, @truncate(control.alignment));
|
||||
const alignment: u29 = @truncate(control.alignment);
|
||||
|
||||
var data = simple_allocator.advancedAlloc(alignment, size);
|
||||
errdefer simple_allocator.free(data);
|
||||
|
@ -39,8 +39,8 @@ pub fn expf(x_: f32) callconv(.C) f32 {
|
||||
const P2 = -2.7667332906e-3;
|
||||
|
||||
var x = x_;
|
||||
var hx = @as(u32, @bitCast(x));
|
||||
const sign = @as(i32, @intCast(hx >> 31));
|
||||
var hx: u32 = @bitCast(x);
|
||||
const sign: i32 = @intCast(hx >> 31);
|
||||
hx &= 0x7FFFFFFF;
|
||||
|
||||
if (math.isNan(x)) {
|
||||
@ -74,12 +74,12 @@ pub fn expf(x_: f32) callconv(.C) f32 {
|
||||
if (hx > 0x3EB17218) {
|
||||
// |x| > 1.5 * ln2
|
||||
if (hx > 0x3F851592) {
|
||||
k = @as(i32, @intFromFloat(invln2 * x + half[@as(usize, @intCast(sign))]));
|
||||
k = @intFromFloat(invln2 * x + half[@as(usize, @intCast(sign))]);
|
||||
} else {
|
||||
k = 1 - sign - sign;
|
||||
}
|
||||
|
||||
const fk = @as(f32, @floatFromInt(k));
|
||||
const fk: f32 = @floatFromInt(k);
|
||||
hi = x - fk * ln2hi;
|
||||
lo = fk * ln2lo;
|
||||
x = hi - lo;
|
||||
@ -117,9 +117,9 @@ pub fn exp(x_: f64) callconv(.C) f64 {
|
||||
const P5: f64 = 4.13813679705723846039e-08;
|
||||
|
||||
var x = x_;
|
||||
var ux = @as(u64, @bitCast(x));
|
||||
var ux: u64 = @bitCast(x);
|
||||
var hx = ux >> 32;
|
||||
const sign = @as(i32, @intCast(hx >> 31));
|
||||
const sign: i32 = @intCast(hx >> 31);
|
||||
hx &= 0x7FFFFFFF;
|
||||
|
||||
if (math.isNan(x)) {
|
||||
@ -157,12 +157,12 @@ pub fn exp(x_: f64) callconv(.C) f64 {
|
||||
if (hx > 0x3FD62E42) {
|
||||
// |x| >= 1.5 * ln2
|
||||
if (hx > 0x3FF0A2B2) {
|
||||
k = @as(i32, @intFromFloat(invln2 * x + half[@as(usize, @intCast(sign))]));
|
||||
k = @intFromFloat(invln2 * x + half[@as(usize, @intCast(sign))]);
|
||||
} else {
|
||||
k = 1 - sign - sign;
|
||||
}
|
||||
|
||||
const dk = @as(f64, @floatFromInt(k));
|
||||
const dk: f64 = @floatFromInt(k);
|
||||
hi = x - dk * ln2hi;
|
||||
lo = dk * ln2lo;
|
||||
x = hi - lo;
|
||||
@ -191,12 +191,12 @@ pub fn exp(x_: f64) callconv(.C) f64 {
|
||||
|
||||
pub fn __expx(a: f80) callconv(.C) f80 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f80, @floatCast(expq(a)));
|
||||
return @floatCast(expq(a));
|
||||
}
|
||||
|
||||
pub fn expq(a: f128) callconv(.C) f128 {
|
||||
// TODO: more correct implementation
|
||||
return exp(@as(f64, @floatCast(a)));
|
||||
return exp(@floatCast(a));
|
||||
}
|
||||
|
||||
pub fn expl(x: c_longdouble) callconv(.C) c_longdouble {
|
||||
|
@ -31,14 +31,14 @@ pub fn __exp2h(x: f16) callconv(.C) f16 {
|
||||
}
|
||||
|
||||
pub fn exp2f(x: f32) callconv(.C) f32 {
|
||||
const tblsiz = @as(u32, @intCast(exp2ft.len));
|
||||
const tblsiz: u32 = @intCast(exp2ft.len);
|
||||
const redux: f32 = 0x1.8p23 / @as(f32, @floatFromInt(tblsiz));
|
||||
const P1: f32 = 0x1.62e430p-1;
|
||||
const P2: f32 = 0x1.ebfbe0p-3;
|
||||
const P3: f32 = 0x1.c6b348p-5;
|
||||
const P4: f32 = 0x1.3b2c9cp-7;
|
||||
|
||||
var u = @as(u32, @bitCast(x));
|
||||
var u: u32 = @bitCast(x);
|
||||
const ix = u & 0x7FFFFFFF;
|
||||
|
||||
// |x| > 126
|
||||
@ -72,11 +72,11 @@ pub fn exp2f(x: f32) callconv(.C) f32 {
|
||||
// intended result but should confirm how GCC/Clang handle this to ensure.
|
||||
|
||||
var uf = x + redux;
|
||||
var i_0 = @as(u32, @bitCast(uf));
|
||||
var i_0: u32 = @bitCast(uf);
|
||||
i_0 +%= tblsiz / 2;
|
||||
|
||||
const k = i_0 / tblsiz;
|
||||
const uk = @as(f64, @bitCast(@as(u64, 0x3FF + k) << 52));
|
||||
const uk: f64 = @bitCast(@as(u64, 0x3FF + k) << 52);
|
||||
i_0 &= tblsiz - 1;
|
||||
uf -= redux;
|
||||
|
||||
@ -84,11 +84,11 @@ pub fn exp2f(x: f32) callconv(.C) f32 {
|
||||
var r: f64 = exp2ft[@as(usize, @intCast(i_0))];
|
||||
const t: f64 = r * z;
|
||||
r = r + t * (P1 + z * P2) + t * (z * z) * (P3 + z * P4);
|
||||
return @as(f32, @floatCast(r * uk));
|
||||
return @floatCast(r * uk);
|
||||
}
|
||||
|
||||
pub fn exp2(x: f64) callconv(.C) f64 {
|
||||
const tblsiz: u32 = @as(u32, @intCast(exp2dt.len / 2));
|
||||
const tblsiz: u32 = @intCast(exp2dt.len / 2);
|
||||
const redux: f64 = 0x1.8p52 / @as(f64, @floatFromInt(tblsiz));
|
||||
const P1: f64 = 0x1.62e42fefa39efp-1;
|
||||
const P2: f64 = 0x1.ebfbdff82c575p-3;
|
||||
@ -96,7 +96,7 @@ pub fn exp2(x: f64) callconv(.C) f64 {
|
||||
const P4: f64 = 0x1.3b2ab88f70400p-7;
|
||||
const P5: f64 = 0x1.5d88003875c74p-10;
|
||||
|
||||
const ux = @as(u64, @bitCast(x));
|
||||
const ux: u64 = @bitCast(x);
|
||||
const ix = @as(u32, @intCast(ux >> 32)) & 0x7FFFFFFF;
|
||||
|
||||
// TODO: This should be handled beneath.
|
||||
@ -139,7 +139,7 @@ pub fn exp2(x: f64) callconv(.C) f64 {
|
||||
// reduce x
|
||||
var uf: f64 = x + redux;
|
||||
// NOTE: musl performs an implicit 64-bit to 32-bit u32 truncation here
|
||||
var i_0: u32 = @as(u32, @truncate(@as(u64, @bitCast(uf))));
|
||||
var i_0: u32 = @truncate(@as(u64, @bitCast(uf)));
|
||||
i_0 +%= tblsiz / 2;
|
||||
|
||||
const k: u32 = i_0 / tblsiz * tblsiz;
|
||||
@ -158,12 +158,12 @@ pub fn exp2(x: f64) callconv(.C) f64 {
|
||||
|
||||
pub fn __exp2x(x: f80) callconv(.C) f80 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f80, @floatCast(exp2q(x)));
|
||||
return @floatCast(exp2q(x));
|
||||
}
|
||||
|
||||
pub fn exp2q(x: f128) callconv(.C) f128 {
|
||||
// TODO: more correct implementation
|
||||
return exp2(@as(f64, @floatCast(x)));
|
||||
return exp2(@floatCast(x));
|
||||
}
|
||||
|
||||
pub fn exp2l(x: c_longdouble) callconv(.C) c_longdouble {
|
||||
|
@ -33,7 +33,7 @@ pub inline fn extendf(
|
||||
const dstMinNormal: dst_rep_t = @as(dst_rep_t, 1) << dstSigBits;
|
||||
|
||||
// Break a into a sign and representation of the absolute value
|
||||
const aRep: src_rep_t = @as(src_rep_t, @bitCast(a));
|
||||
const aRep: src_rep_t = @bitCast(a);
|
||||
const aAbs: src_rep_t = aRep & srcAbsMask;
|
||||
const sign: src_rep_t = aRep & srcSignMask;
|
||||
var absResult: dst_rep_t = undefined;
|
||||
@ -104,7 +104,7 @@ pub inline fn extend_f80(comptime src_t: type, a: std.meta.Int(.unsigned, @typeI
|
||||
// a is a normal number.
|
||||
// Extend to the destination type by shifting the significand and
|
||||
// exponent into the proper position and rebiasing the exponent.
|
||||
dst.exp = @as(u16, @intCast(a_abs >> src_sig_bits));
|
||||
dst.exp = @intCast(a_abs >> src_sig_bits);
|
||||
dst.exp += dst_exp_bias - src_exp_bias;
|
||||
dst.fraction = @as(u64, a_abs) << (dst_sig_bits - src_sig_bits);
|
||||
dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers
|
||||
@ -126,7 +126,7 @@ pub inline fn extend_f80(comptime src_t: type, a: std.meta.Int(.unsigned, @typeI
|
||||
|
||||
dst.fraction = @as(u64, a_abs) << @as(u6, @intCast(dst_sig_bits - src_sig_bits + scale));
|
||||
dst.fraction |= dst_int_bit; // bit 64 is always set for normal numbers
|
||||
dst.exp = @as(u16, @truncate(a_abs >> @as(SrcShift, @intCast(src_sig_bits - scale))));
|
||||
dst.exp = @truncate(a_abs >> @as(SrcShift, @intCast(src_sig_bits - scale)));
|
||||
dst.exp ^= 1;
|
||||
dst.exp |= dst_exp_bias - src_exp_bias - scale + 1;
|
||||
} else {
|
||||
|
@ -11,7 +11,7 @@ const F16T = @import("./common.zig").F16T;
|
||||
fn test__extenddfxf2(a: f64, expected: u80) !void {
|
||||
const x = __extenddfxf2(a);
|
||||
|
||||
const rep = @as(u80, @bitCast(x));
|
||||
const rep: u80 = @bitCast(x);
|
||||
if (rep == expected)
|
||||
return;
|
||||
|
||||
@ -25,9 +25,9 @@ fn test__extenddfxf2(a: f64, expected: u80) !void {
|
||||
fn test__extenddftf2(a: f64, expected_hi: u64, expected_lo: u64) !void {
|
||||
const x = __extenddftf2(a);
|
||||
|
||||
const rep = @as(u128, @bitCast(x));
|
||||
const hi = @as(u64, @intCast(rep >> 64));
|
||||
const lo = @as(u64, @truncate(rep));
|
||||
const rep: u128 = @bitCast(x);
|
||||
const hi: u64 = @intCast(rep >> 64);
|
||||
const lo: u64 = @truncate(rep);
|
||||
|
||||
if (hi == expected_hi and lo == expected_lo)
|
||||
return;
|
||||
@ -46,7 +46,7 @@ fn test__extenddftf2(a: f64, expected_hi: u64, expected_lo: u64) !void {
|
||||
|
||||
fn test__extendhfsf2(a: u16, expected: u32) !void {
|
||||
const x = __extendhfsf2(@as(F16T(f32), @bitCast(a)));
|
||||
const rep = @as(u32, @bitCast(x));
|
||||
const rep: u32 = @bitCast(x);
|
||||
|
||||
if (rep == expected) {
|
||||
if (rep & 0x7fffffff > 0x7f800000) {
|
||||
@ -63,9 +63,9 @@ fn test__extendhfsf2(a: u16, expected: u32) !void {
|
||||
fn test__extendsftf2(a: f32, expected_hi: u64, expected_lo: u64) !void {
|
||||
const x = __extendsftf2(a);
|
||||
|
||||
const rep = @as(u128, @bitCast(x));
|
||||
const hi = @as(u64, @intCast(rep >> 64));
|
||||
const lo = @as(u64, @truncate(rep));
|
||||
const rep: u128 = @bitCast(x);
|
||||
const hi: u64 = @intCast(rep >> 64);
|
||||
const lo: u64 = @truncate(rep);
|
||||
|
||||
if (hi == expected_hi and lo == expected_lo)
|
||||
return;
|
||||
@ -184,35 +184,35 @@ test "extendsftf2" {
|
||||
}
|
||||
|
||||
fn makeQNaN64() f64 {
|
||||
return @as(f64, @bitCast(@as(u64, 0x7ff8000000000000)));
|
||||
return @bitCast(@as(u64, 0x7ff8000000000000));
|
||||
}
|
||||
|
||||
fn makeInf64() f64 {
|
||||
return @as(f64, @bitCast(@as(u64, 0x7ff0000000000000)));
|
||||
return @bitCast(@as(u64, 0x7ff0000000000000));
|
||||
}
|
||||
|
||||
fn makeNaN64(rand: u64) f64 {
|
||||
return @as(f64, @bitCast(0x7ff0000000000000 | (rand & 0xfffffffffffff)));
|
||||
return @bitCast(0x7ff0000000000000 | (rand & 0xfffffffffffff));
|
||||
}
|
||||
|
||||
fn makeQNaN32() f32 {
|
||||
return @as(f32, @bitCast(@as(u32, 0x7fc00000)));
|
||||
return @bitCast(@as(u32, 0x7fc00000));
|
||||
}
|
||||
|
||||
fn makeNaN32(rand: u32) f32 {
|
||||
return @as(f32, @bitCast(0x7f800000 | (rand & 0x7fffff)));
|
||||
return @bitCast(0x7f800000 | (rand & 0x7fffff));
|
||||
}
|
||||
|
||||
fn makeInf32() f32 {
|
||||
return @as(f32, @bitCast(@as(u32, 0x7f800000)));
|
||||
return @bitCast(@as(u32, 0x7f800000));
|
||||
}
|
||||
|
||||
fn test__extendhftf2(a: u16, expected_hi: u64, expected_lo: u64) !void {
|
||||
const x = __extendhftf2(@as(F16T(f128), @bitCast(a)));
|
||||
|
||||
const rep = @as(u128, @bitCast(x));
|
||||
const hi = @as(u64, @intCast(rep >> 64));
|
||||
const lo = @as(u64, @truncate(rep));
|
||||
const rep: u128 = @bitCast(x);
|
||||
const hi: u64 = @intCast(rep >> 64);
|
||||
const lo: u64 = @truncate(rep);
|
||||
|
||||
if (hi == expected_hi and lo == expected_lo)
|
||||
return;
|
||||
|
@ -520,9 +520,9 @@ test "floatsitf" {
|
||||
fn test__floatunsitf(a: u32, expected_hi: u64, expected_lo: u64) !void {
|
||||
const x = __floatunsitf(a);
|
||||
|
||||
const x_repr = @as(u128, @bitCast(x));
|
||||
const x_hi = @as(u64, @intCast(x_repr >> 64));
|
||||
const x_lo = @as(u64, @truncate(x_repr));
|
||||
const x_repr: u128 = @bitCast(x);
|
||||
const x_hi: u64 = @intCast(x_repr >> 64);
|
||||
const x_lo: u64 = @truncate(x_repr);
|
||||
|
||||
if (x_hi == expected_hi and x_lo == expected_lo) {
|
||||
return;
|
||||
@ -552,9 +552,9 @@ fn test__floatditf(a: i64, expected: f128) !void {
|
||||
fn test__floatunditf(a: u64, expected_hi: u64, expected_lo: u64) !void {
|
||||
const x = __floatunditf(a);
|
||||
|
||||
const x_repr = @as(u128, @bitCast(x));
|
||||
const x_hi = @as(u64, @intCast(x_repr >> 64));
|
||||
const x_lo = @as(u64, @truncate(x_repr));
|
||||
const x_repr: u128 = @bitCast(x);
|
||||
const x_hi: u64 = @intCast(x_repr >> 64);
|
||||
const x_lo: u64 = @truncate(x_repr);
|
||||
|
||||
if (x_hi == expected_hi and x_lo == expected_lo) {
|
||||
return;
|
||||
|
@ -26,7 +26,7 @@ comptime {
|
||||
}
|
||||
|
||||
pub fn __floorh(x: f16) callconv(.C) f16 {
|
||||
var u = @as(u16, @bitCast(x));
|
||||
var u: u16 = @bitCast(x);
|
||||
const e = @as(i16, @intCast((u >> 10) & 31)) - 15;
|
||||
var m: u16 = undefined;
|
||||
|
||||
@ -132,7 +132,7 @@ pub fn __floorx(x: f80) callconv(.C) f80 {
|
||||
pub fn floorq(x: f128) callconv(.C) f128 {
|
||||
const f128_toint = 1.0 / math.floatEps(f128);
|
||||
|
||||
const u = @as(u128, @bitCast(x));
|
||||
const u: u128 = @bitCast(x);
|
||||
const e = (u >> 112) & 0x7FFF;
|
||||
var y: f128 = undefined;
|
||||
|
||||
|
@ -22,7 +22,7 @@ comptime {
|
||||
|
||||
pub fn __fmodh(x: f16, y: f16) callconv(.C) f16 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f16, @floatCast(fmodf(x, y)));
|
||||
return @floatCast(fmodf(x, y));
|
||||
}
|
||||
|
||||
pub fn fmodf(x: f32, y: f32) callconv(.C) f32 {
|
||||
@ -46,12 +46,12 @@ pub fn __fmodx(a: f80, b: f80) callconv(.C) f80 {
|
||||
const signBit = (@as(Z, 1) << (significandBits + exponentBits));
|
||||
const maxExponent = ((1 << exponentBits) - 1);
|
||||
|
||||
var aRep = @as(Z, @bitCast(a));
|
||||
var bRep = @as(Z, @bitCast(b));
|
||||
var aRep: Z = @bitCast(a);
|
||||
var bRep: Z = @bitCast(b);
|
||||
|
||||
const signA = aRep & signBit;
|
||||
var expA = @as(i32, @intCast((@as(Z, @bitCast(a)) >> significandBits) & maxExponent));
|
||||
var expB = @as(i32, @intCast((@as(Z, @bitCast(b)) >> significandBits) & maxExponent));
|
||||
var expA: i32 = @intCast((@as(Z, @bitCast(a)) >> significandBits) & maxExponent);
|
||||
var expB: i32 = @intCast((@as(Z, @bitCast(b)) >> significandBits) & maxExponent);
|
||||
|
||||
// There are 3 cases where the answer is undefined, check for:
|
||||
// - fmodx(val, 0)
|
||||
@ -123,11 +123,11 @@ pub fn __fmodx(a: f80, b: f80) callconv(.C) f80 {
|
||||
|
||||
// Combine the exponent with the sign and significand, normalize if happened to be denormalized
|
||||
if (expA < -fractionalBits) {
|
||||
return @as(T, @bitCast(signA));
|
||||
return @bitCast(signA);
|
||||
} else if (expA <= 0) {
|
||||
return @as(T, @bitCast((lowA >> @as(math.Log2Int(u64), @intCast(1 - expA))) | signA));
|
||||
return @bitCast((lowA >> @as(math.Log2Int(u64), @intCast(1 - expA))) | signA);
|
||||
} else {
|
||||
return @as(T, @bitCast(lowA | (@as(Z, @as(u16, @intCast(expA))) << significandBits) | signA));
|
||||
return @bitCast(lowA | (@as(Z, @as(u16, @intCast(expA))) << significandBits) | signA);
|
||||
}
|
||||
}
|
||||
|
||||
@ -155,8 +155,8 @@ pub fn fmodq(a: f128, b: f128) callconv(.C) f128 {
|
||||
};
|
||||
|
||||
const signA = aPtr_u16[exp_and_sign_index] & 0x8000;
|
||||
var expA = @as(i32, @intCast((aPtr_u16[exp_and_sign_index] & 0x7fff)));
|
||||
var expB = @as(i32, @intCast((bPtr_u16[exp_and_sign_index] & 0x7fff)));
|
||||
var expA: i32 = @intCast((aPtr_u16[exp_and_sign_index] & 0x7fff));
|
||||
var expB: i32 = @intCast((bPtr_u16[exp_and_sign_index] & 0x7fff));
|
||||
|
||||
// There are 3 cases where the answer is undefined, check for:
|
||||
// - fmodq(val, 0)
|
||||
@ -270,10 +270,10 @@ inline fn generic_fmod(comptime T: type, x: T, y: T) T {
|
||||
const exp_bits = if (T == f32) 9 else 12;
|
||||
const bits_minus_1 = bits - 1;
|
||||
const mask = if (T == f32) 0xff else 0x7ff;
|
||||
var ux = @as(uint, @bitCast(x));
|
||||
var uy = @as(uint, @bitCast(y));
|
||||
var ex = @as(i32, @intCast((ux >> digits) & mask));
|
||||
var ey = @as(i32, @intCast((uy >> digits) & mask));
|
||||
var ux: uint = @bitCast(x);
|
||||
var uy: uint = @bitCast(y);
|
||||
var ex: i32 = @intCast((ux >> digits) & mask);
|
||||
var ey: i32 = @intCast((uy >> digits) & mask);
|
||||
const sx = if (T == f32) @as(u32, @intCast(ux & 0x80000000)) else @as(i32, @intCast(ux >> bits_minus_1));
|
||||
var i: uint = undefined;
|
||||
|
||||
@ -343,7 +343,7 @@ inline fn generic_fmod(comptime T: type, x: T, y: T) T {
|
||||
} else {
|
||||
ux |= @as(uint, @intCast(sx)) << bits_minus_1;
|
||||
}
|
||||
return @as(T, @bitCast(ux));
|
||||
return @bitCast(ux);
|
||||
}
|
||||
|
||||
test "fmodf" {
|
||||
|
@ -27,7 +27,7 @@ comptime {
|
||||
|
||||
pub fn __logh(a: f16) callconv(.C) f16 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f16, @floatCast(logf(a)));
|
||||
return @floatCast(logf(a));
|
||||
}
|
||||
|
||||
pub fn logf(x_: f32) callconv(.C) f32 {
|
||||
@ -39,7 +39,7 @@ pub fn logf(x_: f32) callconv(.C) f32 {
|
||||
const Lg4: f32 = 0xf89e26.0p-26;
|
||||
|
||||
var x = x_;
|
||||
var ix = @as(u32, @bitCast(x));
|
||||
var ix: u32 = @bitCast(x);
|
||||
var k: i32 = 0;
|
||||
|
||||
// x < 2^(-126)
|
||||
@ -56,7 +56,7 @@ pub fn logf(x_: f32) callconv(.C) f32 {
|
||||
// subnormal, scale x
|
||||
k -= 25;
|
||||
x *= 0x1.0p25;
|
||||
ix = @as(u32, @bitCast(x));
|
||||
ix = @bitCast(x);
|
||||
} else if (ix >= 0x7F800000) {
|
||||
return x;
|
||||
} else if (ix == 0x3F800000) {
|
||||
@ -67,7 +67,7 @@ pub fn logf(x_: f32) callconv(.C) f32 {
|
||||
ix += 0x3F800000 - 0x3F3504F3;
|
||||
k += @as(i32, @intCast(ix >> 23)) - 0x7F;
|
||||
ix = (ix & 0x007FFFFF) + 0x3F3504F3;
|
||||
x = @as(f32, @bitCast(ix));
|
||||
x = @bitCast(ix);
|
||||
|
||||
const f = x - 1.0;
|
||||
const s = f / (2.0 + f);
|
||||
@ -77,7 +77,7 @@ pub fn logf(x_: f32) callconv(.C) f32 {
|
||||
const t2 = z * (Lg1 + w * Lg3);
|
||||
const R = t2 + t1;
|
||||
const hfsq = 0.5 * f * f;
|
||||
const dk = @as(f32, @floatFromInt(k));
|
||||
const dk: f32 = @floatFromInt(k);
|
||||
|
||||
return s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi;
|
||||
}
|
||||
@ -94,8 +94,8 @@ pub fn log(x_: f64) callconv(.C) f64 {
|
||||
const Lg7: f64 = 1.479819860511658591e-01;
|
||||
|
||||
var x = x_;
|
||||
var ix = @as(u64, @bitCast(x));
|
||||
var hx = @as(u32, @intCast(ix >> 32));
|
||||
var ix: u64 = @bitCast(x);
|
||||
var hx: u32 = @intCast(ix >> 32);
|
||||
var k: i32 = 0;
|
||||
|
||||
if (hx < 0x00100000 or hx >> 31 != 0) {
|
||||
@ -111,7 +111,7 @@ pub fn log(x_: f64) callconv(.C) f64 {
|
||||
// subnormal, scale x
|
||||
k -= 54;
|
||||
x *= 0x1.0p54;
|
||||
hx = @as(u32, @intCast(@as(u64, @bitCast(ix)) >> 32));
|
||||
hx = @intCast(@as(u64, @bitCast(ix)) >> 32);
|
||||
} else if (hx >= 0x7FF00000) {
|
||||
return x;
|
||||
} else if (hx == 0x3FF00000 and ix << 32 == 0) {
|
||||
@ -123,7 +123,7 @@ pub fn log(x_: f64) callconv(.C) f64 {
|
||||
k += @as(i32, @intCast(hx >> 20)) - 0x3FF;
|
||||
hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
|
||||
ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
|
||||
x = @as(f64, @bitCast(ix));
|
||||
x = @bitCast(ix);
|
||||
|
||||
const f = x - 1.0;
|
||||
const hfsq = 0.5 * f * f;
|
||||
@ -133,19 +133,19 @@ pub fn log(x_: f64) callconv(.C) f64 {
|
||||
const t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
|
||||
const t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
|
||||
const R = t2 + t1;
|
||||
const dk = @as(f64, @floatFromInt(k));
|
||||
const dk: f64 = @floatFromInt(k);
|
||||
|
||||
return s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi;
|
||||
}
|
||||
|
||||
pub fn __logx(a: f80) callconv(.C) f80 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f80, @floatCast(logq(a)));
|
||||
return @floatCast(logq(a));
|
||||
}
|
||||
|
||||
pub fn logq(a: f128) callconv(.C) f128 {
|
||||
// TODO: more correct implementation
|
||||
return log(@as(f64, @floatCast(a)));
|
||||
return log(@floatCast(a));
|
||||
}
|
||||
|
||||
pub fn logl(x: c_longdouble) callconv(.C) c_longdouble {
|
||||
|
@ -82,11 +82,11 @@ pub fn log10f(x_: f32) callconv(.C) f32 {
|
||||
const hfsq = 0.5 * f * f;
|
||||
|
||||
var hi = f - hfsq;
|
||||
u = @as(u32, @bitCast(hi));
|
||||
u = @bitCast(hi);
|
||||
u &= 0xFFFFF000;
|
||||
hi = @as(f32, @bitCast(u));
|
||||
hi = @bitCast(u);
|
||||
const lo = f - hi - hfsq + s * (hfsq + R);
|
||||
const dk = @as(f32, @floatFromInt(k));
|
||||
const dk: f32 = @floatFromInt(k);
|
||||
|
||||
return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
|
||||
}
|
||||
@ -105,8 +105,8 @@ pub fn log10(x_: f64) callconv(.C) f64 {
|
||||
const Lg7: f64 = 1.479819860511658591e-01;
|
||||
|
||||
var x = x_;
|
||||
var ix = @as(u64, @bitCast(x));
|
||||
var hx = @as(u32, @intCast(ix >> 32));
|
||||
var ix: u64 = @bitCast(x);
|
||||
var hx: u32 = @intCast(ix >> 32);
|
||||
var k: i32 = 0;
|
||||
|
||||
if (hx < 0x00100000 or hx >> 31 != 0) {
|
||||
@ -122,7 +122,7 @@ pub fn log10(x_: f64) callconv(.C) f64 {
|
||||
// subnormal, scale x
|
||||
k -= 54;
|
||||
x *= 0x1.0p54;
|
||||
hx = @as(u32, @intCast(@as(u64, @bitCast(x)) >> 32));
|
||||
hx = @intCast(@as(u64, @bitCast(x)) >> 32);
|
||||
} else if (hx >= 0x7FF00000) {
|
||||
return x;
|
||||
} else if (hx == 0x3FF00000 and ix << 32 == 0) {
|
||||
@ -134,7 +134,7 @@ pub fn log10(x_: f64) callconv(.C) f64 {
|
||||
k += @as(i32, @intCast(hx >> 20)) - 0x3FF;
|
||||
hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
|
||||
ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
|
||||
x = @as(f64, @bitCast(ix));
|
||||
x = @bitCast(ix);
|
||||
|
||||
const f = x - 1.0;
|
||||
const hfsq = 0.5 * f * f;
|
||||
@ -147,14 +147,14 @@ pub fn log10(x_: f64) callconv(.C) f64 {
|
||||
|
||||
// hi + lo = f - hfsq + s * (hfsq + R) ~ log(1 + f)
|
||||
var hi = f - hfsq;
|
||||
var hii = @as(u64, @bitCast(hi));
|
||||
var hii: u64 = @bitCast(hi);
|
||||
hii &= @as(u64, maxInt(u64)) << 32;
|
||||
hi = @as(f64, @bitCast(hii));
|
||||
hi = @bitCast(hii);
|
||||
const lo = f - hi - hfsq + s * (hfsq + R);
|
||||
|
||||
// val_hi + val_lo ~ log10(1 + f) + k * log10(2)
|
||||
var val_hi = hi * ivln10hi;
|
||||
const dk = @as(f64, @floatFromInt(k));
|
||||
const dk: f64 = @floatFromInt(k);
|
||||
const y = dk * log10_2hi;
|
||||
var val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
|
||||
|
||||
|
@ -28,7 +28,7 @@ comptime {
|
||||
|
||||
pub fn __log2h(a: f16) callconv(.C) f16 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f16, @floatCast(log2f(a)));
|
||||
return @floatCast(log2f(a));
|
||||
}
|
||||
|
||||
pub fn log2f(x_: f32) callconv(.C) f32 {
|
||||
@ -40,7 +40,7 @@ pub fn log2f(x_: f32) callconv(.C) f32 {
|
||||
const Lg4: f32 = 0xf89e26.0p-26;
|
||||
|
||||
var x = x_;
|
||||
var u = @as(u32, @bitCast(x));
|
||||
var u: u32 = @bitCast(x);
|
||||
var ix = u;
|
||||
var k: i32 = 0;
|
||||
|
||||
@ -57,7 +57,7 @@ pub fn log2f(x_: f32) callconv(.C) f32 {
|
||||
|
||||
k -= 25;
|
||||
x *= 0x1.0p25;
|
||||
ix = @as(u32, @bitCast(x));
|
||||
ix = @bitCast(x);
|
||||
} else if (ix >= 0x7F800000) {
|
||||
return x;
|
||||
} else if (ix == 0x3F800000) {
|
||||
@ -68,7 +68,7 @@ pub fn log2f(x_: f32) callconv(.C) f32 {
|
||||
ix += 0x3F800000 - 0x3F3504F3;
|
||||
k += @as(i32, @intCast(ix >> 23)) - 0x7F;
|
||||
ix = (ix & 0x007FFFFF) + 0x3F3504F3;
|
||||
x = @as(f32, @bitCast(ix));
|
||||
x = @bitCast(ix);
|
||||
|
||||
const f = x - 1.0;
|
||||
const s = f / (2.0 + f);
|
||||
@ -80,9 +80,9 @@ pub fn log2f(x_: f32) callconv(.C) f32 {
|
||||
const hfsq = 0.5 * f * f;
|
||||
|
||||
var hi = f - hfsq;
|
||||
u = @as(u32, @bitCast(hi));
|
||||
u = @bitCast(hi);
|
||||
u &= 0xFFFFF000;
|
||||
hi = @as(f32, @bitCast(u));
|
||||
hi = @bitCast(u);
|
||||
const lo = f - hi - hfsq + s * (hfsq + R);
|
||||
return (lo + hi) * ivln2lo + lo * ivln2hi + hi * ivln2hi + @as(f32, @floatFromInt(k));
|
||||
}
|
||||
@ -99,8 +99,8 @@ pub fn log2(x_: f64) callconv(.C) f64 {
|
||||
const Lg7: f64 = 1.479819860511658591e-01;
|
||||
|
||||
var x = x_;
|
||||
var ix = @as(u64, @bitCast(x));
|
||||
var hx = @as(u32, @intCast(ix >> 32));
|
||||
var ix: u64 = @bitCast(x);
|
||||
var hx: u32 = @intCast(ix >> 32);
|
||||
var k: i32 = 0;
|
||||
|
||||
if (hx < 0x00100000 or hx >> 31 != 0) {
|
||||
@ -116,7 +116,7 @@ pub fn log2(x_: f64) callconv(.C) f64 {
|
||||
// subnormal, scale x
|
||||
k -= 54;
|
||||
x *= 0x1.0p54;
|
||||
hx = @as(u32, @intCast(@as(u64, @bitCast(x)) >> 32));
|
||||
hx = @intCast(@as(u64, @bitCast(x)) >> 32);
|
||||
} else if (hx >= 0x7FF00000) {
|
||||
return x;
|
||||
} else if (hx == 0x3FF00000 and ix << 32 == 0) {
|
||||
@ -128,7 +128,7 @@ pub fn log2(x_: f64) callconv(.C) f64 {
|
||||
k += @as(i32, @intCast(hx >> 20)) - 0x3FF;
|
||||
hx = (hx & 0x000FFFFF) + 0x3FE6A09E;
|
||||
ix = (@as(u64, hx) << 32) | (ix & 0xFFFFFFFF);
|
||||
x = @as(f64, @bitCast(ix));
|
||||
x = @bitCast(ix);
|
||||
|
||||
const f = x - 1.0;
|
||||
const hfsq = 0.5 * f * f;
|
||||
@ -143,14 +143,14 @@ pub fn log2(x_: f64) callconv(.C) f64 {
|
||||
var hi = f - hfsq;
|
||||
var hii = @as(u64, @bitCast(hi));
|
||||
hii &= @as(u64, maxInt(u64)) << 32;
|
||||
hi = @as(f64, @bitCast(hii));
|
||||
hi = @bitCast(hii);
|
||||
const lo = f - hi - hfsq + s * (hfsq + R);
|
||||
|
||||
var val_hi = hi * ivln2hi;
|
||||
var val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
|
||||
|
||||
// spadd(val_hi, val_lo, y)
|
||||
const y = @as(f64, @floatFromInt(k));
|
||||
const y: f64 = @floatFromInt(k);
|
||||
const ww = y + val_hi;
|
||||
val_lo += (y - ww) + val_hi;
|
||||
val_hi = ww;
|
||||
@ -160,12 +160,12 @@ pub fn log2(x_: f64) callconv(.C) f64 {
|
||||
|
||||
pub fn __log2x(a: f80) callconv(.C) f80 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f80, @floatCast(log2q(a)));
|
||||
return @floatCast(log2q(a));
|
||||
}
|
||||
|
||||
pub fn log2q(a: f128) callconv(.C) f128 {
|
||||
// TODO: more correct implementation
|
||||
return log2(@as(f64, @floatCast(a)));
|
||||
return log2(@floatCast(a));
|
||||
}
|
||||
|
||||
pub fn log2l(x: c_longdouble) callconv(.C) c_longdouble {
|
||||
|
@ -24,7 +24,7 @@ pub fn __modti3(a: i128, b: i128) callconv(.C) i128 {
|
||||
const v2u64 = @Vector(2, u64);
|
||||
|
||||
fn __modti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
|
||||
return @as(v2u64, @bitCast(mod(@as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
|
||||
return @bitCast(mod(@as(i128, @bitCast(a)), @as(i128, @bitCast(b))));
|
||||
}
|
||||
|
||||
inline fn mod(a: i128, b: i128) i128 {
|
||||
|
@ -21,8 +21,8 @@ comptime {
|
||||
}
|
||||
|
||||
pub fn __mulsi3(a: i32, b: i32) callconv(.C) i32 {
|
||||
var ua = @as(u32, @bitCast(a));
|
||||
var ub = @as(u32, @bitCast(b));
|
||||
var ua: u32 = @bitCast(a);
|
||||
var ub: u32 = @bitCast(b);
|
||||
var r: u32 = 0;
|
||||
|
||||
while (ua > 0) {
|
||||
@ -31,7 +31,7 @@ pub fn __mulsi3(a: i32, b: i32) callconv(.C) i32 {
|
||||
ub <<= 1;
|
||||
}
|
||||
|
||||
return @as(i32, @bitCast(r));
|
||||
return @bitCast(r);
|
||||
}
|
||||
|
||||
pub fn __muldi3(a: i64, b: i64) callconv(.C) i64 {
|
||||
@ -93,7 +93,7 @@ pub fn __multi3(a: i128, b: i128) callconv(.C) i128 {
|
||||
const v2u64 = @Vector(2, u64);
|
||||
|
||||
fn __multi3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
|
||||
return @as(v2u64, @bitCast(mulX(i128, @as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
|
||||
return @bitCast(mulX(i128, @as(i128, @bitCast(a)), @as(i128, @bitCast(b))));
|
||||
}
|
||||
|
||||
test {
|
||||
|
@ -54,27 +54,27 @@ pub inline fn mulf3(comptime T: type, a: T, b: T) T {
|
||||
if (aAbs == infRep) {
|
||||
// infinity * non-zero = +/- infinity
|
||||
if (bAbs != 0) {
|
||||
return @as(T, @bitCast(aAbs | productSign));
|
||||
return @bitCast(aAbs | productSign);
|
||||
} else {
|
||||
// infinity * zero = NaN
|
||||
return @as(T, @bitCast(qnanRep));
|
||||
return @bitCast(qnanRep);
|
||||
}
|
||||
}
|
||||
|
||||
if (bAbs == infRep) {
|
||||
//? non-zero * infinity = +/- infinity
|
||||
if (aAbs != 0) {
|
||||
return @as(T, @bitCast(bAbs | productSign));
|
||||
return @bitCast(bAbs | productSign);
|
||||
} else {
|
||||
// zero * infinity = NaN
|
||||
return @as(T, @bitCast(qnanRep));
|
||||
return @bitCast(qnanRep);
|
||||
}
|
||||
}
|
||||
|
||||
// zero * anything = +/- zero
|
||||
if (aAbs == 0) return @as(T, @bitCast(productSign));
|
||||
if (aAbs == 0) return @bitCast(productSign);
|
||||
// anything * zero = +/- zero
|
||||
if (bAbs == 0) return @as(T, @bitCast(productSign));
|
||||
if (bAbs == 0) return @bitCast(productSign);
|
||||
|
||||
// one or both of a or b is denormal, the other (if applicable) is a
|
||||
// normal number. Renormalize one or both of a and b, and set scale to
|
||||
|
@ -4,8 +4,8 @@
|
||||
|
||||
const std = @import("std");
|
||||
const math = std.math;
|
||||
const qnan128 = @as(f128, @bitCast(@as(u128, 0x7fff800000000000) << 64));
|
||||
const inf128 = @as(f128, @bitCast(@as(u128, 0x7fff000000000000) << 64));
|
||||
const qnan128: f128 = @bitCast(@as(u128, 0x7fff800000000000) << 64);
|
||||
const inf128: f128 = @bitCast(@as(u128, 0x7fff000000000000) << 64);
|
||||
|
||||
const __multf3 = @import("multf3.zig").__multf3;
|
||||
const __mulxf3 = @import("mulxf3.zig").__mulxf3;
|
||||
@ -16,9 +16,9 @@ const __mulsf3 = @import("mulsf3.zig").__mulsf3;
|
||||
// use two 64-bit integers intead of one 128-bit integer
|
||||
// because 128-bit integer constant can't be assigned directly
|
||||
fn compareResultLD(result: f128, expectedHi: u64, expectedLo: u64) bool {
|
||||
const rep = @as(u128, @bitCast(result));
|
||||
const hi = @as(u64, @intCast(rep >> 64));
|
||||
const lo = @as(u64, @truncate(rep));
|
||||
const rep: u128 = @bitCast(result);
|
||||
const hi: u64 = @intCast(rep >> 64);
|
||||
const lo: u64 = @truncate(rep);
|
||||
|
||||
if (hi == expectedHi and lo == expectedLo) {
|
||||
return true;
|
||||
@ -45,8 +45,7 @@ fn test__multf3(a: f128, b: f128, expected_hi: u64, expected_lo: u64) !void {
|
||||
|
||||
fn makeNaN128(rand: u64) f128 {
|
||||
const int_result = @as(u128, 0x7fff000000000000 | (rand & 0xffffffffffff)) << 64;
|
||||
const float_result = @as(f128, @bitCast(int_result));
|
||||
return float_result;
|
||||
return @bitCast(int_result);
|
||||
}
|
||||
test "multf3" {
|
||||
// qNaN * any = qNaN
|
||||
@ -108,11 +107,11 @@ test "multf3" {
|
||||
try test__multf3(2.0, math.floatTrueMin(f128), 0x0000_0000_0000_0000, 0x0000_0000_0000_0002);
|
||||
}
|
||||
|
||||
const qnan80 = @as(f80, @bitCast(@as(u80, @bitCast(math.nan(f80))) | (1 << (math.floatFractionalBits(f80) - 1))));
|
||||
const qnan80: f80 = @bitCast(@as(u80, @bitCast(math.nan(f80))) | (1 << (math.floatFractionalBits(f80) - 1)));
|
||||
|
||||
fn test__mulxf3(a: f80, b: f80, expected: u80) !void {
|
||||
const x = __mulxf3(a, b);
|
||||
const rep = @as(u80, @bitCast(x));
|
||||
const rep: u80 = @bitCast(x);
|
||||
|
||||
if (rep == expected)
|
||||
return;
|
||||
|
@ -3,7 +3,7 @@ const parity = @import("parity.zig");
|
||||
const testing = std.testing;
|
||||
|
||||
fn parityti2Naive(a: i128) i32 {
|
||||
var x = @as(u128, @bitCast(a));
|
||||
var x: u128 = @bitCast(a);
|
||||
var has_parity: bool = false;
|
||||
while (x > 0) {
|
||||
has_parity = !has_parity;
|
||||
|
@ -57,17 +57,17 @@ fn medium(ix: u32, x: f64, y: *[2]f64) i32 {
|
||||
w = @"fn" * pio2_1t;
|
||||
}
|
||||
y[0] = r - w;
|
||||
ui = @as(u64, @bitCast(y[0]));
|
||||
ey = @as(i32, @intCast((ui >> 52) & 0x7ff));
|
||||
ex = @as(i32, @intCast(ix >> 20));
|
||||
ui = @bitCast(y[0]);
|
||||
ey = @intCast((ui >> 52) & 0x7ff);
|
||||
ex = @intCast(ix >> 20);
|
||||
if (ex - ey > 16) { // 2nd round, good to 118 bits
|
||||
t = r;
|
||||
w = @"fn" * pio2_2;
|
||||
r = t - w;
|
||||
w = @"fn" * pio2_2t - ((t - r) - w);
|
||||
y[0] = r - w;
|
||||
ui = @as(u64, @bitCast(y[0]));
|
||||
ey = @as(i32, @intCast((ui >> 52) & 0x7ff));
|
||||
ui = @bitCast(y[0]);
|
||||
ey = @intCast((ui >> 52) & 0x7ff);
|
||||
if (ex - ey > 49) { // 3rd round, good to 151 bits, covers all cases
|
||||
t = r;
|
||||
w = @"fn" * pio2_3;
|
||||
@ -95,9 +95,9 @@ pub fn rem_pio2(x: f64, y: *[2]f64) i32 {
|
||||
var i: i32 = undefined;
|
||||
var ui: u64 = undefined;
|
||||
|
||||
ui = @as(u64, @bitCast(x));
|
||||
ui = @bitCast(x);
|
||||
sign = ui >> 63 != 0;
|
||||
ix = @as(u32, @truncate((ui >> 32) & 0x7fffffff));
|
||||
ix = @truncate((ui >> 32) & 0x7fffffff);
|
||||
if (ix <= 0x400f6a7a) { // |x| ~<= 5pi/4
|
||||
if ((ix & 0xfffff) == 0x921fb) { // |x| ~= pi/2 or 2pi/2
|
||||
return medium(ix, x, y);
|
||||
@ -171,7 +171,7 @@ pub fn rem_pio2(x: f64, y: *[2]f64) i32 {
|
||||
return 0;
|
||||
}
|
||||
// set z = scalbn(|x|,-ilogb(x)+23)
|
||||
ui = @as(u64, @bitCast(x));
|
||||
ui = @bitCast(x);
|
||||
ui &= std.math.maxInt(u64) >> 12;
|
||||
ui |= @as(u64, 0x3ff + 23) << 52;
|
||||
z = @as(f64, @bitCast(ui));
|
||||
|
@ -322,7 +322,7 @@ pub fn rem_pio2_large(x: []f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 {
|
||||
i += 1;
|
||||
j -= 1;
|
||||
}) {
|
||||
fw = @as(f64, @floatFromInt(@as(i32, @intFromFloat(0x1p-24 * z))));
|
||||
fw = @floatFromInt(@as(i32, @intFromFloat(0x1p-24 * z)));
|
||||
iq[U(i)] = @as(i32, @intFromFloat(z - 0x1p24 * fw));
|
||||
z = q[U(j - 1)] + fw;
|
||||
}
|
||||
@ -330,7 +330,7 @@ pub fn rem_pio2_large(x: []f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 {
|
||||
// compute n
|
||||
z = math.scalbn(z, q0); // actual value of z
|
||||
z -= 8.0 * @floor(z * 0.125); // trim off integer >= 8
|
||||
n = @as(i32, @intFromFloat(z));
|
||||
n = @intFromFloat(z);
|
||||
z -= @as(f64, @floatFromInt(n));
|
||||
ih = 0;
|
||||
if (q0 > 0) { // need iq[jz-1] to determine n
|
||||
@ -414,7 +414,7 @@ pub fn rem_pio2_large(x: []f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 {
|
||||
} else { // break z into 24-bit if necessary
|
||||
z = math.scalbn(z, -q0);
|
||||
if (z >= 0x1p24) {
|
||||
fw = @as(f64, @floatFromInt(@as(i32, @intFromFloat(0x1p-24 * z))));
|
||||
fw = @floatFromInt(@as(i32, @intFromFloat(0x1p-24 * z)));
|
||||
iq[U(jz)] = @as(i32, @intFromFloat(z - 0x1p24 * fw));
|
||||
jz += 1;
|
||||
q0 += 24;
|
||||
|
@ -30,14 +30,14 @@ pub fn rem_pio2f(x: f32, y: *f64) i32 {
|
||||
var e0: u32 = undefined;
|
||||
var ui: u32 = undefined;
|
||||
|
||||
ui = @as(u32, @bitCast(x));
|
||||
ui = @bitCast(x);
|
||||
ix = ui & 0x7fffffff;
|
||||
|
||||
// 25+53 bit pi is good enough for medium size
|
||||
if (ix < 0x4dc90fdb) { // |x| ~< 2^28*(pi/2), medium size
|
||||
// Use a specialized rint() to get fn.
|
||||
@"fn" = @as(f64, @floatCast(x)) * invpio2 + toint - toint;
|
||||
n = @as(i32, @intFromFloat(@"fn"));
|
||||
n = @intFromFloat(@"fn");
|
||||
y.* = x - @"fn" * pio2_1 - @"fn" * pio2_1t;
|
||||
// Matters with directed rounding.
|
||||
if (y.* < -pio4) {
|
||||
|
@ -27,14 +27,14 @@ comptime {
|
||||
|
||||
pub fn __roundh(x: f16) callconv(.C) f16 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f16, @floatCast(roundf(x)));
|
||||
return @floatCast(roundf(x));
|
||||
}
|
||||
|
||||
pub fn roundf(x_: f32) callconv(.C) f32 {
|
||||
const f32_toint = 1.0 / math.floatEps(f32);
|
||||
|
||||
var x = x_;
|
||||
const u = @as(u32, @bitCast(x));
|
||||
const u: u32 = @bitCast(x);
|
||||
const e = (u >> 23) & 0xFF;
|
||||
var y: f32 = undefined;
|
||||
|
||||
@ -69,7 +69,7 @@ pub fn round(x_: f64) callconv(.C) f64 {
|
||||
const f64_toint = 1.0 / math.floatEps(f64);
|
||||
|
||||
var x = x_;
|
||||
const u = @as(u64, @bitCast(x));
|
||||
const u: u64 = @bitCast(x);
|
||||
const e = (u >> 52) & 0x7FF;
|
||||
var y: f64 = undefined;
|
||||
|
||||
@ -102,14 +102,14 @@ pub fn round(x_: f64) callconv(.C) f64 {
|
||||
|
||||
pub fn __roundx(x: f80) callconv(.C) f80 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f80, @floatCast(roundq(x)));
|
||||
return @floatCast(roundq(x));
|
||||
}
|
||||
|
||||
pub fn roundq(x_: f128) callconv(.C) f128 {
|
||||
const f128_toint = 1.0 / math.floatEps(f128);
|
||||
|
||||
var x = x_;
|
||||
const u = @as(u128, @bitCast(x));
|
||||
const u: u128 = @bitCast(x);
|
||||
const e = (u >> 112) & 0x7FFF;
|
||||
var y: f128 = undefined;
|
||||
|
||||
|
@ -218,7 +218,7 @@ inline fn sincos_generic(comptime F: type, x: F, r_sin: *F, r_cos: *F) void {
|
||||
const bits = @typeInfo(F).Float.bits;
|
||||
const I = std.meta.Int(.unsigned, bits);
|
||||
const ix = @as(I, @bitCast(x)) & (math.maxInt(I) >> 1);
|
||||
const se = @as(u16, @truncate(ix >> (bits - 16)));
|
||||
const se: u16 = @truncate(ix >> (bits - 16));
|
||||
|
||||
if (se == 0x7fff) {
|
||||
const result = x - x;
|
||||
|
@ -125,7 +125,7 @@ pub fn sqrt(x: f64) callconv(.C) f64 {
|
||||
}
|
||||
|
||||
// normalize x
|
||||
var m = @as(i32, @intCast(ix0 >> 20));
|
||||
var m: i32 = @intCast(ix0 >> 20);
|
||||
if (m == 0) {
|
||||
// subnormal
|
||||
while (ix0 == 0) {
|
||||
|
@ -33,7 +33,7 @@ comptime {
|
||||
|
||||
pub fn __tanh(x: f16) callconv(.C) f16 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f16, @floatCast(tanf(x)));
|
||||
return @floatCast(tanf(x));
|
||||
}
|
||||
|
||||
pub fn tanf(x: f32) callconv(.C) f32 {
|
||||
@ -43,7 +43,7 @@ pub fn tanf(x: f32) callconv(.C) f32 {
|
||||
const t3pio2: f64 = 3.0 * math.pi / 2.0; // 0x4012D97C, 0x7F3321D2
|
||||
const t4pio2: f64 = 4.0 * math.pi / 2.0; // 0x401921FB, 0x54442D18
|
||||
|
||||
var ix = @as(u32, @bitCast(x));
|
||||
var ix: u32 = @bitCast(x);
|
||||
const sign = ix >> 31 != 0;
|
||||
ix &= 0x7fffffff;
|
||||
|
||||
|
@ -199,7 +199,7 @@ pub fn __tan(x_: f64, y_: f64, odd: bool) f64 {
|
||||
var hx: u32 = undefined;
|
||||
var sign: bool = undefined;
|
||||
|
||||
hx = @as(u32, @intCast(@as(u64, @bitCast(x)) >> 32));
|
||||
hx = @intCast(@as(u64, @bitCast(x)) >> 32);
|
||||
const big = (hx & 0x7fffffff) >= 0x3FE59428; // |x| >= 0.6744
|
||||
if (big) {
|
||||
sign = hx >> 31 != 0;
|
||||
|
@ -27,11 +27,11 @@ comptime {
|
||||
|
||||
pub fn __trunch(x: f16) callconv(.C) f16 {
|
||||
// TODO: more efficient implementation
|
||||
return @as(f16, @floatCast(truncf(x)));
|
||||
return @floatCast(truncf(x));
|
||||
}
|
||||
|
||||
pub fn truncf(x: f32) callconv(.C) f32 {
|
||||
const u = @as(u32, @bitCast(x));
|
||||
const u: u32 = @bitCast(x);
|
||||
var e = @as(i32, @intCast(((u >> 23) & 0xFF))) - 0x7F + 9;
|
||||
var m: u32 = undefined;
|
||||
|
||||
@ -47,12 +47,12 @@ pub fn truncf(x: f32) callconv(.C) f32 {
|
||||
return x;
|
||||
} else {
|
||||
math.doNotOptimizeAway(x + 0x1p120);
|
||||
return @as(f32, @bitCast(u & ~m));
|
||||
return @bitCast(u & ~m);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn trunc(x: f64) callconv(.C) f64 {
|
||||
const u = @as(u64, @bitCast(x));
|
||||
const u: u64 = @bitCast(x);
|
||||
var e = @as(i32, @intCast(((u >> 52) & 0x7FF))) - 0x3FF + 12;
|
||||
var m: u64 = undefined;
|
||||
|
||||
@ -68,7 +68,7 @@ pub fn trunc(x: f64) callconv(.C) f64 {
|
||||
return x;
|
||||
} else {
|
||||
math.doNotOptimizeAway(x + 0x1p120);
|
||||
return @as(f64, @bitCast(u & ~m));
|
||||
return @bitCast(u & ~m);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -72,8 +72,8 @@ pub inline fn truncf(comptime dst_t: type, comptime src_t: type, a: src_t) dst_t
|
||||
// a underflows on conversion to the destination type or is an exact
|
||||
// zero. The result may be a denormal or zero. Extract the exponent
|
||||
// to get the shift amount for the denormalization.
|
||||
const aExp = @as(u32, @intCast(aAbs >> srcSigBits));
|
||||
const shift = @as(u32, @intCast(srcExpBias - dstExpBias - aExp + 1));
|
||||
const aExp: u32 = @intCast(aAbs >> srcSigBits);
|
||||
const shift: u32 = @intCast(srcExpBias - dstExpBias - aExp + 1);
|
||||
|
||||
const significand: src_rep_t = (aRep & srcSignificandMask) | srcMinNormal;
|
||||
|
||||
|
@ -10,7 +10,7 @@ const __trunctfdf2 = @import("trunctfdf2.zig").__trunctfdf2;
|
||||
const __trunctfxf2 = @import("trunctfxf2.zig").__trunctfxf2;
|
||||
|
||||
fn test__truncsfhf2(a: u32, expected: u16) !void {
|
||||
const actual = @as(u16, @bitCast(__truncsfhf2(@as(f32, @bitCast(a)))));
|
||||
const actual: u16 = @bitCast(__truncsfhf2(@bitCast(a)));
|
||||
|
||||
if (actual == expected) {
|
||||
return;
|
||||
@ -73,7 +73,7 @@ test "truncsfhf2" {
|
||||
}
|
||||
|
||||
fn test__truncdfhf2(a: f64, expected: u16) void {
|
||||
const rep = @as(u16, @bitCast(__truncdfhf2(a)));
|
||||
const rep: u16 = @bitCast(__truncdfhf2(a));
|
||||
|
||||
if (rep == expected) {
|
||||
return;
|
||||
@ -89,7 +89,7 @@ fn test__truncdfhf2(a: f64, expected: u16) void {
|
||||
}
|
||||
|
||||
fn test__truncdfhf2_raw(a: u64, expected: u16) void {
|
||||
const actual = @as(u16, @bitCast(__truncdfhf2(@as(f64, @bitCast(a)))));
|
||||
const actual: u16 = @bitCast(__truncdfhf2(@bitCast(a)));
|
||||
|
||||
if (actual == expected) {
|
||||
return;
|
||||
@ -141,7 +141,7 @@ test "truncdfhf2" {
|
||||
fn test__trunctfsf2(a: f128, expected: u32) void {
|
||||
const x = __trunctfsf2(a);
|
||||
|
||||
const rep = @as(u32, @bitCast(x));
|
||||
const rep: u32 = @bitCast(x);
|
||||
if (rep == expected) {
|
||||
return;
|
||||
}
|
||||
@ -157,11 +157,11 @@ fn test__trunctfsf2(a: f128, expected: u32) void {
|
||||
|
||||
test "trunctfsf2" {
|
||||
// qnan
|
||||
test__trunctfsf2(@as(f128, @bitCast(@as(u128, 0x7fff800000000000 << 64))), 0x7fc00000);
|
||||
test__trunctfsf2(@bitCast(@as(u128, 0x7fff800000000000 << 64)), 0x7fc00000);
|
||||
// nan
|
||||
test__trunctfsf2(@as(f128, @bitCast(@as(u128, (0x7fff000000000000 | (0x810000000000 & 0xffffffffffff)) << 64))), 0x7fc08000);
|
||||
test__trunctfsf2(@bitCast(@as(u128, (0x7fff000000000000 | (0x810000000000 & 0xffffffffffff)) << 64)), 0x7fc08000);
|
||||
// inf
|
||||
test__trunctfsf2(@as(f128, @bitCast(@as(u128, 0x7fff000000000000 << 64))), 0x7f800000);
|
||||
test__trunctfsf2(@bitCast(@as(u128, 0x7fff000000000000 << 64)), 0x7f800000);
|
||||
// zero
|
||||
test__trunctfsf2(0.0, 0x0);
|
||||
|
||||
@ -174,7 +174,7 @@ test "trunctfsf2" {
|
||||
fn test__trunctfdf2(a: f128, expected: u64) void {
|
||||
const x = __trunctfdf2(a);
|
||||
|
||||
const rep = @as(u64, @bitCast(x));
|
||||
const rep: u64 = @bitCast(x);
|
||||
if (rep == expected) {
|
||||
return;
|
||||
}
|
||||
@ -190,11 +190,11 @@ fn test__trunctfdf2(a: f128, expected: u64) void {
|
||||
|
||||
test "trunctfdf2" {
|
||||
// qnan
|
||||
test__trunctfdf2(@as(f128, @bitCast(@as(u128, 0x7fff800000000000 << 64))), 0x7ff8000000000000);
|
||||
test__trunctfdf2(@bitCast(@as(u128, 0x7fff800000000000 << 64)), 0x7ff8000000000000);
|
||||
// nan
|
||||
test__trunctfdf2(@as(f128, @bitCast(@as(u128, (0x7fff000000000000 | (0x810000000000 & 0xffffffffffff)) << 64))), 0x7ff8100000000000);
|
||||
test__trunctfdf2(@bitCast(@as(u128, (0x7fff000000000000 | (0x810000000000 & 0xffffffffffff)) << 64)), 0x7ff8100000000000);
|
||||
// inf
|
||||
test__trunctfdf2(@as(f128, @bitCast(@as(u128, 0x7fff000000000000 << 64))), 0x7ff0000000000000);
|
||||
test__trunctfdf2(@bitCast(@as(u128, 0x7fff000000000000 << 64)), 0x7ff0000000000000);
|
||||
// zero
|
||||
test__trunctfdf2(0.0, 0x0);
|
||||
|
||||
@ -207,7 +207,7 @@ test "trunctfdf2" {
|
||||
fn test__truncdfsf2(a: f64, expected: u32) void {
|
||||
const x = __truncdfsf2(a);
|
||||
|
||||
const rep = @as(u32, @bitCast(x));
|
||||
const rep: u32 = @bitCast(x);
|
||||
if (rep == expected) {
|
||||
return;
|
||||
}
|
||||
@ -225,11 +225,11 @@ fn test__truncdfsf2(a: f64, expected: u32) void {
|
||||
|
||||
test "truncdfsf2" {
|
||||
// nan & qnan
|
||||
test__truncdfsf2(@as(f64, @bitCast(@as(u64, 0x7ff8000000000000))), 0x7fc00000);
|
||||
test__truncdfsf2(@as(f64, @bitCast(@as(u64, 0x7ff0000000000001))), 0x7fc00000);
|
||||
test__truncdfsf2(@bitCast(@as(u64, 0x7ff8000000000000)), 0x7fc00000);
|
||||
test__truncdfsf2(@bitCast(@as(u64, 0x7ff0000000000001)), 0x7fc00000);
|
||||
// inf
|
||||
test__truncdfsf2(@as(f64, @bitCast(@as(u64, 0x7ff0000000000000))), 0x7f800000);
|
||||
test__truncdfsf2(@as(f64, @bitCast(@as(u64, 0xfff0000000000000))), 0xff800000);
|
||||
test__truncdfsf2(@bitCast(@as(u64, 0x7ff0000000000000)), 0x7f800000);
|
||||
test__truncdfsf2(@bitCast(@as(u64, 0xfff0000000000000)), 0xff800000);
|
||||
|
||||
test__truncdfsf2(0.0, 0x0);
|
||||
test__truncdfsf2(1.0, 0x3f800000);
|
||||
@ -242,7 +242,7 @@ test "truncdfsf2" {
|
||||
fn test__trunctfhf2(a: f128, expected: u16) void {
|
||||
const x = __trunctfhf2(a);
|
||||
|
||||
const rep = @as(u16, @bitCast(x));
|
||||
const rep: u16 = @bitCast(x);
|
||||
if (rep == expected) {
|
||||
return;
|
||||
}
|
||||
@ -254,11 +254,11 @@ fn test__trunctfhf2(a: f128, expected: u16) void {
|
||||
|
||||
test "trunctfhf2" {
|
||||
// qNaN
|
||||
test__trunctfhf2(@as(f128, @bitCast(@as(u128, 0x7fff8000000000000000000000000000))), 0x7e00);
|
||||
test__trunctfhf2(@bitCast(@as(u128, 0x7fff8000000000000000000000000000)), 0x7e00);
|
||||
// NaN
|
||||
test__trunctfhf2(@as(f128, @bitCast(@as(u128, 0x7fff0000000000000000000000000001))), 0x7e00);
|
||||
test__trunctfhf2(@bitCast(@as(u128, 0x7fff0000000000000000000000000001)), 0x7e00);
|
||||
// inf
|
||||
test__trunctfhf2(@as(f128, @bitCast(@as(u128, 0x7fff0000000000000000000000000000))), 0x7c00);
|
||||
test__trunctfhf2(@bitCast(@as(u128, 0x7fff0000000000000000000000000000)), 0x7c00);
|
||||
test__trunctfhf2(-@as(f128, @bitCast(@as(u128, 0x7fff0000000000000000000000000000))), 0xfc00);
|
||||
// zero
|
||||
test__trunctfhf2(0.0, 0x0);
|
||||
|
@ -44,7 +44,7 @@ pub fn __trunctfxf2(a: f128) callconv(.C) f80 {
|
||||
// destination format. We can convert by simply right-shifting with
|
||||
// rounding, adding the explicit integer bit, and adjusting the exponent
|
||||
res.fraction = @as(u64, @truncate(a_abs >> (src_sig_bits - dst_sig_bits))) | integer_bit;
|
||||
res.exp = @as(u16, @truncate(a_abs >> src_sig_bits));
|
||||
res.exp = @truncate(a_abs >> src_sig_bits);
|
||||
|
||||
const round_bits = a_abs & round_mask;
|
||||
if (round_bits > halfway) {
|
||||
|
@ -1850,7 +1850,7 @@ pub fn hex64(x: u64) [16]u8 {
|
||||
var result: [16]u8 = undefined;
|
||||
var i: usize = 0;
|
||||
while (i < 8) : (i += 1) {
|
||||
const byte = @as(u8, @truncate(x >> @as(u6, @intCast(8 * i))));
|
||||
const byte: u8 = @truncate(x >> @as(u6, @intCast(8 * i)));
|
||||
result[i * 2 + 0] = hex_charset[byte >> 4];
|
||||
result[i * 2 + 1] = hex_charset[byte & 15];
|
||||
}
|
||||
|
@ -206,7 +206,7 @@ pub const Edwards25519 = struct {
|
||||
var q = Edwards25519.identityElement;
|
||||
var pos: usize = 252;
|
||||
while (true) : (pos -= 4) {
|
||||
const slot = @as(u4, @truncate((s[pos >> 3] >> @as(u3, @truncate(pos)))));
|
||||
const slot: u4 = @truncate((s[pos >> 3] >> @as(u3, @truncate(pos))));
|
||||
if (vartime) {
|
||||
if (slot != 0) {
|
||||
q = q.add(pc[slot]);
|
||||
|
@ -90,7 +90,7 @@ fn AesOcb(comptime Aes: anytype) type {
|
||||
nx[16 - nonce_length - 1] = 1;
|
||||
nx[nx.len - nonce_length ..].* = npub;
|
||||
|
||||
const bottom = @as(u6, @truncate(nx[15]));
|
||||
const bottom: u6 = @truncate(nx[15]);
|
||||
nx[15] &= 0xc0;
|
||||
var ktop_: Block = undefined;
|
||||
aes_enc_ctx.encrypt(&ktop_, &nx);
|
||||
|
@ -508,18 +508,18 @@ pub fn Modulus(comptime max_bits: comptime_int) type {
|
||||
var need_sub = false;
|
||||
var i: usize = t_bits - 1;
|
||||
while (true) : (i -= 1) {
|
||||
var carry = @as(u1, @truncate(math.shr(Limb, y, i)));
|
||||
var carry: u1 = @truncate(math.shr(Limb, y, i));
|
||||
var borrow: u1 = 0;
|
||||
for (0..self.limbs_count()) |j| {
|
||||
const l = ct.select(need_sub, d_limbs[j], x_limbs[j]);
|
||||
var res = (l << 1) + carry;
|
||||
x_limbs[j] = @as(TLimb, @truncate(res));
|
||||
carry = @as(u1, @truncate(res >> t_bits));
|
||||
carry = @truncate(res >> t_bits);
|
||||
|
||||
res = x_limbs[j] -% m_limbs[j] -% borrow;
|
||||
d_limbs[j] = @as(TLimb, @truncate(res));
|
||||
|
||||
borrow = @as(u1, @truncate(res >> t_bits));
|
||||
borrow = @truncate(res >> t_bits);
|
||||
}
|
||||
need_sub = ct.eql(carry, borrow);
|
||||
if (i == 0) break;
|
||||
@ -531,7 +531,7 @@ pub fn Modulus(comptime max_bits: comptime_int) type {
|
||||
pub fn add(self: Self, x: Fe, y: Fe) Fe {
|
||||
var out = x;
|
||||
const overflow = out.v.addWithOverflow(y.v);
|
||||
const underflow = @as(u1, @bitCast(ct.limbsCmpLt(out.v, self.v)));
|
||||
const underflow: u1 = @bitCast(ct.limbsCmpLt(out.v, self.v));
|
||||
const need_sub = ct.eql(overflow, underflow);
|
||||
_ = out.v.conditionalSubWithOverflow(need_sub, self.v);
|
||||
return out;
|
||||
@ -540,7 +540,7 @@ pub fn Modulus(comptime max_bits: comptime_int) type {
|
||||
/// Subtracts two field elements (mod m).
|
||||
pub fn sub(self: Self, x: Fe, y: Fe) Fe {
|
||||
var out = x;
|
||||
const underflow = @as(bool, @bitCast(out.v.subWithOverflow(y.v)));
|
||||
const underflow: bool = @bitCast(out.v.subWithOverflow(y.v));
|
||||
_ = out.v.conditionalAddWithOverflow(underflow, self.v);
|
||||
return out;
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ pub const IsapA128A = struct {
|
||||
var i: usize = 0;
|
||||
while (i < y.len * 8 - 1) : (i += 1) {
|
||||
const cur_byte_pos = i / 8;
|
||||
const cur_bit_pos = @as(u3, @truncate(7 - (i % 8)));
|
||||
const cur_bit_pos: u3 = @truncate(7 - (i % 8));
|
||||
const cur_bit = ((y[cur_byte_pos] >> cur_bit_pos) & 1) << 7;
|
||||
isap.st.addByte(cur_bit, 0);
|
||||
isap.st.permuteR(1);
|
||||
|
@ -638,7 +638,7 @@ fn montReduce(x: i32) i16 {
|
||||
// Note that x q' might be as big as 2³² and could overflow the int32
|
||||
// multiplication in the last line. However for any int32s a and b,
|
||||
// we have int32(int64(a)*int64(b)) = int32(a*b) and so the result is ok.
|
||||
const m = @as(i16, @truncate(@as(i32, @truncate(x *% qInv))));
|
||||
const m: i16 = @truncate(@as(i32, @truncate(x *% qInv)));
|
||||
|
||||
// Note that x - m q is divisible by R; indeed modulo R we have
|
||||
//
|
||||
@ -652,7 +652,7 @@ fn montReduce(x: i32) i16 {
|
||||
// and as both 2¹⁵ q ≤ m q, x < 2¹⁵ q, we have
|
||||
// 2¹⁶ q ≤ x - m q < 2¹⁶ and so q ≤ (x - m q) / R < q as desired.
|
||||
const yR = x - @as(i32, m) * @as(i32, Q);
|
||||
return @as(i16, @bitCast(@as(u16, @truncate(@as(u32, @bitCast(yR)) >> 16))));
|
||||
return @bitCast(@as(u16, @truncate(@as(u32, @bitCast(yR)) >> 16)));
|
||||
}
|
||||
|
||||
test "Test montReduce" {
|
||||
|
@ -142,7 +142,7 @@ pub fn Crc32WithPoly(comptime poly: Polynomial) type {
|
||||
var crc = tables[0][i];
|
||||
var j: usize = 1;
|
||||
while (j < 8) : (j += 1) {
|
||||
const index = @as(u8, @truncate(crc));
|
||||
const index: u8 = @truncate(crc);
|
||||
crc = tables[0][index] ^ (crc >> 8);
|
||||
tables[j][i] = crc;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ pub const Murmur2_32 = struct {
|
||||
|
||||
pub fn hashWithSeed(str: []const u8, seed: u32) u32 {
|
||||
const m: u32 = 0x5bd1e995;
|
||||
const len = @as(u32, @truncate(str.len));
|
||||
const len: u32 = @truncate(str.len);
|
||||
var h1: u32 = seed ^ len;
|
||||
for (@as([*]align(1) const u32, @ptrCast(str.ptr))[0..(len >> 2)]) |v| {
|
||||
var k1: u32 = v;
|
||||
@ -178,7 +178,7 @@ pub const Murmur3_32 = struct {
|
||||
pub fn hashWithSeed(str: []const u8, seed: u32) u32 {
|
||||
const c1: u32 = 0xcc9e2d51;
|
||||
const c2: u32 = 0x1b873593;
|
||||
const len = @as(u32, @truncate(str.len));
|
||||
const len: u32 = @truncate(str.len);
|
||||
var h1: u32 = seed;
|
||||
for (@as([*]align(1) const u32, @ptrCast(str.ptr))[0..(len >> 2)]) |v| {
|
||||
var k1: u32 = v;
|
||||
|
@ -899,7 +899,7 @@ pub fn HashMapUnmanaged(
|
||||
}
|
||||
|
||||
fn capacityForSize(size: Size) Size {
|
||||
var new_cap = @as(u32, @truncate((@as(u64, size) * 100) / max_load_percentage + 1));
|
||||
var new_cap: u32 = @truncate((@as(u64, size) * 100) / max_load_percentage + 1);
|
||||
new_cap = math.ceilPowerOfTwo(u32, new_cap) catch unreachable;
|
||||
return new_cap;
|
||||
}
|
||||
@ -1480,7 +1480,7 @@ pub fn HashMapUnmanaged(
|
||||
const new_cap = capacityForSize(self.size);
|
||||
try other.allocate(allocator, new_cap);
|
||||
other.initMetadatas();
|
||||
other.available = @as(u32, @truncate((new_cap * max_load_percentage) / 100));
|
||||
other.available = @truncate((new_cap * max_load_percentage) / 100);
|
||||
|
||||
var i: Size = 0;
|
||||
var metadata = self.metadata.?;
|
||||
@ -1515,7 +1515,7 @@ pub fn HashMapUnmanaged(
|
||||
defer map.deinit(allocator);
|
||||
try map.allocate(allocator, new_cap);
|
||||
map.initMetadatas();
|
||||
map.available = @as(u32, @truncate((new_cap * max_load_percentage) / 100));
|
||||
map.available = @truncate((new_cap * max_load_percentage) / 100);
|
||||
|
||||
if (self.size != 0) {
|
||||
const old_capacity = self.capacity();
|
||||
|
@ -10,8 +10,8 @@ pub fn readULEB128(comptime T: type, reader: anytype) !T {
|
||||
|
||||
const max_group = (@typeInfo(U).Int.bits + 6) / 7;
|
||||
|
||||
var value = @as(U, 0);
|
||||
var group = @as(ShiftT, 0);
|
||||
var value: U = 0;
|
||||
var group: ShiftT = 0;
|
||||
|
||||
while (group < max_group) : (group += 1) {
|
||||
const byte = try reader.readByte();
|
||||
@ -37,10 +37,10 @@ pub fn readULEB128(comptime T: type, reader: anytype) !T {
|
||||
pub fn writeULEB128(writer: anytype, uint_value: anytype) !void {
|
||||
const T = @TypeOf(uint_value);
|
||||
const U = if (@typeInfo(T).Int.bits < 8) u8 else T;
|
||||
var value = @as(U, @intCast(uint_value));
|
||||
var value: U = @intCast(uint_value);
|
||||
|
||||
while (true) {
|
||||
const byte = @as(u8, @truncate(value & 0x7f));
|
||||
const byte: u8 = @truncate(value & 0x7f);
|
||||
value >>= 7;
|
||||
if (value == 0) {
|
||||
try writer.writeByte(byte);
|
||||
@ -115,11 +115,11 @@ pub fn writeILEB128(writer: anytype, int_value: anytype) !void {
|
||||
const S = if (@typeInfo(T).Int.bits < 8) i8 else T;
|
||||
const U = std.meta.Int(.unsigned, @typeInfo(S).Int.bits);
|
||||
|
||||
var value = @as(S, @intCast(int_value));
|
||||
var value: S = @intCast(int_value);
|
||||
|
||||
while (true) {
|
||||
const uvalue = @as(U, @bitCast(value));
|
||||
const byte = @as(u8, @truncate(uvalue));
|
||||
const uvalue: U = @bitCast(value);
|
||||
const byte: u8 = @truncate(uvalue);
|
||||
value >>= 6;
|
||||
if (value == -1 or value == 0) {
|
||||
try writer.writeByte(byte & 0x7F);
|
||||
@ -141,7 +141,7 @@ pub fn writeILEB128(writer: anytype, int_value: anytype) !void {
|
||||
pub fn writeUnsignedFixed(comptime l: usize, ptr: *[l]u8, int: std.meta.Int(.unsigned, l * 7)) void {
|
||||
const T = @TypeOf(int);
|
||||
const U = if (@typeInfo(T).Int.bits < 8) u8 else T;
|
||||
var value = @as(U, @intCast(int));
|
||||
var value: U = @intCast(int);
|
||||
|
||||
comptime var i = 0;
|
||||
inline while (i < (l - 1)) : (i += 1) {
|
||||
|
@ -55,11 +55,11 @@ fn atanh_32(x: f32) f32 {
|
||||
}
|
||||
|
||||
fn atanh_64(x: f64) f64 {
|
||||
const u = @as(u64, @bitCast(x));
|
||||
const u: u64 = @bitCast(x);
|
||||
const e = (u >> 52) & 0x7FF;
|
||||
const s = u >> 63;
|
||||
|
||||
var y = @as(f64, @bitCast(u & (maxInt(u64) >> 1))); // |x|
|
||||
var y: f64 = @bitCast(u & (maxInt(u64) >> 1)); // |x|
|
||||
|
||||
if (y == 1.0) {
|
||||
return math.copysign(math.inf(f64), x);
|
||||
|
@ -26,10 +26,10 @@ fn cosh32(z: Complex(f32)) Complex(f32) {
|
||||
const x = z.re;
|
||||
const y = z.im;
|
||||
|
||||
const hx = @as(u32, @bitCast(x));
|
||||
const hx: u32 = @bitCast(x);
|
||||
const ix = hx & 0x7fffffff;
|
||||
|
||||
const hy = @as(u32, @bitCast(y));
|
||||
const hy: u32 = @bitCast(y);
|
||||
const iy = hy & 0x7fffffff;
|
||||
|
||||
if (ix < 0x7f800000 and iy < 0x7f800000) {
|
||||
@ -89,14 +89,14 @@ fn cosh64(z: Complex(f64)) Complex(f64) {
|
||||
const x = z.re;
|
||||
const y = z.im;
|
||||
|
||||
const fx = @as(u64, @bitCast(x));
|
||||
const hx = @as(u32, @intCast(fx >> 32));
|
||||
const lx = @as(u32, @truncate(fx));
|
||||
const fx: u64 = @bitCast(x);
|
||||
const hx: u32 = @intCast(fx >> 32);
|
||||
const lx: u32 = @truncate(fx);
|
||||
const ix = hx & 0x7fffffff;
|
||||
|
||||
const fy = @as(u64, @bitCast(y));
|
||||
const hy = @as(u32, @intCast(fy >> 32));
|
||||
const ly = @as(u32, @truncate(fy));
|
||||
const fy: u64 = @bitCast(y);
|
||||
const hy: u32 = @intCast(fy >> 32);
|
||||
const ly: u32 = @truncate(fy);
|
||||
const iy = hy & 0x7fffffff;
|
||||
|
||||
// nearly non-exceptional case where x, y are finite
|
||||
|
@ -75,18 +75,18 @@ fn exp64(z: Complex(f64)) Complex(f64) {
|
||||
const x = z.re;
|
||||
const y = z.im;
|
||||
|
||||
const fy = @as(u64, @bitCast(y));
|
||||
const hy = @as(u32, @intCast((fy >> 32) & 0x7fffffff));
|
||||
const ly = @as(u32, @truncate(fy));
|
||||
const fy: u64 = @bitCast(y);
|
||||
const hy: u32 = @intCast((fy >> 32) & 0x7fffffff);
|
||||
const ly: u32 = @truncate(fy);
|
||||
|
||||
// cexp(x + i0) = exp(x) + i0
|
||||
if (hy | ly == 0) {
|
||||
return Complex(f64).init(@exp(x), y);
|
||||
}
|
||||
|
||||
const fx = @as(u64, @bitCast(x));
|
||||
const hx = @as(u32, @intCast(fx >> 32));
|
||||
const lx = @as(u32, @truncate(fx));
|
||||
const fx: u64 = @bitCast(x);
|
||||
const hx: u32 = @intCast(fx >> 32);
|
||||
const lx: u32 = @truncate(fx);
|
||||
|
||||
// cexp(0 + iy) = cos(y) + isin(y)
|
||||
if ((hx & 0x7fffffff) | lx == 0) {
|
||||
|
@ -89,14 +89,14 @@ fn sinh64(z: Complex(f64)) Complex(f64) {
|
||||
const x = z.re;
|
||||
const y = z.im;
|
||||
|
||||
const fx = @as(u64, @bitCast(x));
|
||||
const hx = @as(u32, @intCast(fx >> 32));
|
||||
const lx = @as(u32, @truncate(fx));
|
||||
const fx: u64 = @bitCast(x);
|
||||
const hx: u32 = @intCast(fx >> 32);
|
||||
const lx: u32 = @truncate(fx);
|
||||
const ix = hx & 0x7fffffff;
|
||||
|
||||
const fy = @as(u64, @bitCast(y));
|
||||
const hy = @as(u32, @intCast(fy >> 32));
|
||||
const ly = @as(u32, @truncate(fy));
|
||||
const fy: u64 = @bitCast(y);
|
||||
const hy: u32 = @intCast(fy >> 32);
|
||||
const ly: u32 = @truncate(fy);
|
||||
const iy = hy & 0x7fffffff;
|
||||
|
||||
if (ix < 0x7ff00000 and iy < 0x7ff00000) {
|
||||
|
@ -62,11 +62,11 @@ fn tanh64(z: Complex(f64)) Complex(f64) {
|
||||
const x = z.re;
|
||||
const y = z.im;
|
||||
|
||||
const fx = @as(u64, @bitCast(x));
|
||||
const fx: u64 = @bitCast(x);
|
||||
// TODO: zig should allow this conversion implicitly because it can notice that the value necessarily
|
||||
// fits in range.
|
||||
const hx = @as(u32, @intCast(fx >> 32));
|
||||
const lx = @as(u32, @truncate(fx));
|
||||
const hx: u32 = @intCast(fx >> 32);
|
||||
const lx: u32 = @truncate(fx);
|
||||
const ix = hx & 0x7fffffff;
|
||||
|
||||
if (ix >= 0x7ff00000) {
|
||||
@ -75,7 +75,7 @@ fn tanh64(z: Complex(f64)) Complex(f64) {
|
||||
return Complex(f64).init(x, r);
|
||||
}
|
||||
|
||||
const xx = @as(f64, @bitCast((@as(u64, hx - 0x40000000) << 32) | lx));
|
||||
const xx: f64 = @bitCast((@as(u64, hx - 0x40000000) << 32) | lx);
|
||||
const r = if (math.isInf(y)) y else @sin(y) * @cos(y);
|
||||
return Complex(f64).init(xx, math.copysign(@as(f64, 0.0), r));
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ pub fn modf(x: anytype) modf_result(@TypeOf(x)) {
|
||||
fn modf32(x: f32) modf32_result {
|
||||
var result: modf32_result = undefined;
|
||||
|
||||
const u = @as(u32, @bitCast(x));
|
||||
const u: u32 = @bitCast(x);
|
||||
const e = @as(i32, @intCast((u >> 23) & 0xFF)) - 0x7F;
|
||||
const us = u & 0x80000000;
|
||||
|
||||
@ -73,7 +73,7 @@ fn modf32(x: f32) modf32_result {
|
||||
return result;
|
||||
}
|
||||
|
||||
const uf = @as(f32, @bitCast(u & ~mask));
|
||||
const uf: f32 = @bitCast(u & ~mask);
|
||||
result.ipart = uf;
|
||||
result.fpart = x - uf;
|
||||
return result;
|
||||
@ -82,7 +82,7 @@ fn modf32(x: f32) modf32_result {
|
||||
fn modf64(x: f64) modf64_result {
|
||||
var result: modf64_result = undefined;
|
||||
|
||||
const u = @as(u64, @bitCast(x));
|
||||
const u: u64 = @bitCast(x);
|
||||
const e = @as(i32, @intCast((u >> 52) & 0x7FF)) - 0x3FF;
|
||||
const us = u & (1 << 63);
|
||||
|
||||
|
@ -176,21 +176,21 @@ const require_aligned_register_pair =
|
||||
// Split a 64bit value into a {LSB,MSB} pair.
|
||||
// The LE/BE variants specify the endianness to assume.
|
||||
fn splitValueLE64(val: i64) [2]u32 {
|
||||
const u = @as(u64, @bitCast(val));
|
||||
const u: u64 = @bitCast(val);
|
||||
return [2]u32{
|
||||
@as(u32, @truncate(u)),
|
||||
@as(u32, @truncate(u >> 32)),
|
||||
};
|
||||
}
|
||||
fn splitValueBE64(val: i64) [2]u32 {
|
||||
const u = @as(u64, @bitCast(val));
|
||||
const u: u64 = @bitCast(val);
|
||||
return [2]u32{
|
||||
@as(u32, @truncate(u >> 32)),
|
||||
@as(u32, @truncate(u)),
|
||||
};
|
||||
}
|
||||
fn splitValue64(val: i64) [2]u32 {
|
||||
const u = @as(u64, @bitCast(val));
|
||||
const u: u64 = @bitCast(val);
|
||||
switch (native_endian) {
|
||||
.Little => return [2]u32{
|
||||
@as(u32, @truncate(u)),
|
||||
@ -467,7 +467,7 @@ pub fn read(fd: i32, buf: [*]u8, count: usize) usize {
|
||||
}
|
||||
|
||||
pub fn preadv(fd: i32, iov: [*]const iovec, count: usize, offset: i64) usize {
|
||||
const offset_u = @as(u64, @bitCast(offset));
|
||||
const offset_u: u64 = @bitCast(offset);
|
||||
return syscall5(
|
||||
.preadv,
|
||||
@as(usize, @bitCast(@as(isize, fd))),
|
||||
@ -482,7 +482,7 @@ pub fn preadv(fd: i32, iov: [*]const iovec, count: usize, offset: i64) usize {
|
||||
}
|
||||
|
||||
pub fn preadv2(fd: i32, iov: [*]const iovec, count: usize, offset: i64, flags: kernel_rwf) usize {
|
||||
const offset_u = @as(u64, @bitCast(offset));
|
||||
const offset_u: u64 = @bitCast(offset);
|
||||
return syscall6(
|
||||
.preadv2,
|
||||
@as(usize, @bitCast(@as(isize, fd))),
|
||||
@ -504,7 +504,7 @@ pub fn writev(fd: i32, iov: [*]const iovec_const, count: usize) usize {
|
||||
}
|
||||
|
||||
pub fn pwritev(fd: i32, iov: [*]const iovec_const, count: usize, offset: i64) usize {
|
||||
const offset_u = @as(u64, @bitCast(offset));
|
||||
const offset_u: u64 = @bitCast(offset);
|
||||
return syscall5(
|
||||
.pwritev,
|
||||
@as(usize, @bitCast(@as(isize, fd))),
|
||||
@ -517,7 +517,7 @@ pub fn pwritev(fd: i32, iov: [*]const iovec_const, count: usize, offset: i64) us
|
||||
}
|
||||
|
||||
pub fn pwritev2(fd: i32, iov: [*]const iovec_const, count: usize, offset: i64, flags: kernel_rwf) usize {
|
||||
const offset_u = @as(u64, @bitCast(offset));
|
||||
const offset_u: u64 = @bitCast(offset);
|
||||
return syscall6(
|
||||
.pwritev2,
|
||||
@as(usize, @bitCast(@as(isize, fd))),
|
||||
|
@ -1507,7 +1507,7 @@ pub fn io_uring_prep_renameat(
|
||||
0,
|
||||
@intFromPtr(new_path),
|
||||
);
|
||||
sqe.len = @as(u32, @bitCast(new_dir_fd));
|
||||
sqe.len = @bitCast(new_dir_fd);
|
||||
sqe.rw_flags = flags;
|
||||
}
|
||||
|
||||
@ -1562,7 +1562,7 @@ pub fn io_uring_prep_linkat(
|
||||
0,
|
||||
@intFromPtr(new_path),
|
||||
);
|
||||
sqe.len = @as(u32, @bitCast(new_dir_fd));
|
||||
sqe.len = @bitCast(new_dir_fd);
|
||||
sqe.rw_flags = flags;
|
||||
}
|
||||
|
||||
@ -1576,7 +1576,7 @@ pub fn io_uring_prep_provide_buffers(
|
||||
) void {
|
||||
const ptr = @intFromPtr(buffers);
|
||||
io_uring_prep_rw(.PROVIDE_BUFFERS, sqe, @as(i32, @intCast(num)), ptr, buffer_len, buffer_id);
|
||||
sqe.buf_index = @as(u16, @intCast(group_id));
|
||||
sqe.buf_index = @intCast(group_id);
|
||||
}
|
||||
|
||||
pub fn io_uring_prep_remove_buffers(
|
||||
@ -1585,7 +1585,7 @@ pub fn io_uring_prep_remove_buffers(
|
||||
group_id: usize,
|
||||
) void {
|
||||
io_uring_prep_rw(.REMOVE_BUFFERS, sqe, @as(i32, @intCast(num)), 0, 0, 0);
|
||||
sqe.buf_index = @as(u16, @intCast(group_id));
|
||||
sqe.buf_index = @intCast(group_id);
|
||||
}
|
||||
|
||||
test "structs/offsets/entries" {
|
||||
|
@ -1918,7 +1918,7 @@ pub fn fileTimeToNanoSeconds(ft: FILETIME) i128 {
|
||||
|
||||
/// Converts a number of nanoseconds since the POSIX epoch to a Windows FILETIME.
|
||||
pub fn nanoSecondsToFileTime(ns: i128) FILETIME {
|
||||
const adjusted = @as(u64, @bitCast(toSysTime(ns)));
|
||||
const adjusted: u64 = @bitCast(toSysTime(ns));
|
||||
return FILETIME{
|
||||
.dwHighDateTime = @as(u32, @truncate(adjusted >> 32)),
|
||||
.dwLowDateTime = @as(u32, @truncate(adjusted)),
|
||||
|
@ -1275,7 +1275,7 @@ pub const WS_EX_LAYERED = 0x00080000;
|
||||
pub const WS_EX_OVERLAPPEDWINDOW = WS_EX_WINDOWEDGE | WS_EX_CLIENTEDGE;
|
||||
pub const WS_EX_PALETTEWINDOW = WS_EX_WINDOWEDGE | WS_EX_TOOLWINDOW | WS_EX_TOPMOST;
|
||||
|
||||
pub const CW_USEDEFAULT = @as(i32, @bitCast(@as(u32, 0x80000000)));
|
||||
pub const CW_USEDEFAULT: i32 = @bitCast(@as(u32, 0x80000000));
|
||||
|
||||
pub extern "user32" fn CreateWindowExA(dwExStyle: DWORD, lpClassName: [*:0]const u8, lpWindowName: [*:0]const u8, dwStyle: DWORD, X: i32, Y: i32, nWidth: i32, nHeight: i32, hWindParent: ?HWND, hMenu: ?HMENU, hInstance: HINSTANCE, lpParam: ?LPVOID) callconv(WINAPI) ?HWND;
|
||||
pub fn createWindowExA(dwExStyle: u32, lpClassName: [*:0]const u8, lpWindowName: [*:0]const u8, dwStyle: u32, X: i32, Y: i32, nWidth: i32, nHeight: i32, hWindParent: ?HWND, hMenu: ?HMENU, hInstance: HINSTANCE, lpParam: ?*anyopaque) !HWND {
|
||||
|
@ -29,8 +29,8 @@ fn next(self: *Pcg) u32 {
|
||||
const l = self.s;
|
||||
self.s = l *% default_multiplier +% (self.i | 1);
|
||||
|
||||
const xor_s = @as(u32, @truncate(((l >> 18) ^ l) >> 27));
|
||||
const rot = @as(u32, @intCast(l >> 59));
|
||||
const xor_s: u32 = @truncate(((l >> 18) ^ l) >> 27);
|
||||
const rot: u32 = @intCast(l >> 59);
|
||||
|
||||
return (xor_s >> @as(u5, @intCast(rot))) | (xor_s << @as(u5, @intCast((0 -% rot) & 31)));
|
||||
}
|
||||
|
@ -206,8 +206,8 @@ pub inline fn __builtin_expect(expr: c_long, c: c_long) c_long {
|
||||
/// If tagp is empty, the function returns a NaN whose significand is zero.
|
||||
pub inline fn __builtin_nanf(tagp: []const u8) f32 {
|
||||
const parsed = std.fmt.parseUnsigned(c_ulong, tagp, 0) catch 0;
|
||||
const bits = @as(u23, @truncate(parsed)); // single-precision float trailing significand is 23 bits
|
||||
return @as(f32, @bitCast(@as(u32, bits) | std.math.qnan_u32));
|
||||
const bits: u23 = @truncate(parsed); // single-precision float trailing significand is 23 bits
|
||||
return @bitCast(@as(u32, bits) | std.math.qnan_u32);
|
||||
}
|
||||
|
||||
pub inline fn __builtin_huge_valf() f32 {
|
||||
|
@ -183,7 +183,7 @@ pub const aarch64 = struct {
|
||||
blk: {
|
||||
if (info.implementer == 0x41) {
|
||||
// ARM Ltd.
|
||||
const special_bits = @as(u4, @truncate(info.part >> 8));
|
||||
const special_bits: u4 = @truncate(info.part >> 8);
|
||||
if (special_bits == 0x0 or special_bits == 0x7) {
|
||||
// TODO Variant and arch encoded differently.
|
||||
break :blk;
|
||||
|
Loading…
Reference in New Issue
Block a user