const std = @import("../../std.zig"); const builtin = @import("builtin"); const math = std.math; const Limb = std.math.big.Limb; const limb_bits = @typeInfo(Limb).Int.bits; const HalfLimb = std.math.big.HalfLimb; const half_limb_bits = @typeInfo(HalfLimb).Int.bits; const DoubleLimb = std.math.big.DoubleLimb; const SignedDoubleLimb = std.math.big.SignedDoubleLimb; const Log2Limb = std.math.big.Log2Limb; const Allocator = std.mem.Allocator; const mem = std.mem; const maxInt = std.math.maxInt; const minInt = std.math.minInt; const assert = std.debug.assert; const Endian = std.builtin.Endian; const Signedness = std.builtin.Signedness; const native_endian = builtin.cpu.arch.endian(); const debug_safety = false; /// Returns the number of limbs needed to store `scalar`, which must be a /// primitive integer value. /// Note: A comptime-known upper bound of this value that may be used /// instead if `scalar` is not already comptime-known is /// `calcTwosCompLimbCount(@typeInfo(@TypeOf(scalar)).Int.bits)` pub fn calcLimbLen(scalar: anytype) usize { if (scalar == 0) { return 1; } const w_value = @abs(scalar); return @as(usize, @intCast(@divFloor(@as(Limb, @intCast(math.log2(w_value))), limb_bits) + 1)); } pub fn calcToStringLimbsBufferLen(a_len: usize, base: u8) usize { if (math.isPowerOfTwo(base)) return 0; return a_len + 2 + a_len + calcDivLimbsBufferLen(a_len, 1); } pub fn calcDivLimbsBufferLen(a_len: usize, b_len: usize) usize { return a_len + b_len + 4; } pub fn calcMulLimbsBufferLen(a_len: usize, b_len: usize, aliases: usize) usize { return aliases * @max(a_len, b_len); } pub fn calcMulWrapLimbsBufferLen(bit_count: usize, a_len: usize, b_len: usize, aliases: usize) usize { const req_limbs = calcTwosCompLimbCount(bit_count); return aliases * @min(req_limbs, @max(a_len, b_len)); } pub fn calcSetStringLimbsBufferLen(base: u8, string_len: usize) usize { const limb_count = calcSetStringLimbCount(base, string_len); return calcMulLimbsBufferLen(limb_count, limb_count, 2); } pub fn calcSetStringLimbCount(base: u8, string_len: usize) usize { return (string_len + (limb_bits / base - 1)) / (limb_bits / base); } pub fn calcPowLimbsBufferLen(a_bit_count: usize, y: usize) usize { // The 2 accounts for the minimum space requirement for llmulacc return 2 + (a_bit_count * y + (limb_bits - 1)) / limb_bits; } pub fn calcSqrtLimbsBufferLen(a_bit_count: usize) usize { const a_limb_count = (a_bit_count - 1) / limb_bits + 1; const shift = (a_bit_count + 1) / 2; const u_s_rem_limb_count = 1 + ((shift / limb_bits) + 1); return a_limb_count + 3 * u_s_rem_limb_count + calcDivLimbsBufferLen(a_limb_count, u_s_rem_limb_count); } // Compute the number of limbs required to store a 2s-complement number of `bit_count` bits. pub fn calcTwosCompLimbCount(bit_count: usize) usize { return std.math.divCeil(usize, bit_count, @bitSizeOf(Limb)) catch unreachable; } /// a + b * c + *carry, sets carry to the overflow bits pub fn addMulLimbWithCarry(a: Limb, b: Limb, c: Limb, carry: *Limb) Limb { @setRuntimeSafety(debug_safety); // ov1[0] = a + *carry const ov1 = @addWithOverflow(a, carry.*); // r2 = b * c const bc = @as(DoubleLimb, math.mulWide(Limb, b, c)); const r2 = @as(Limb, @truncate(bc)); const c2 = @as(Limb, @truncate(bc >> limb_bits)); // ov2[0] = ov1[0] + r2 const ov2 = @addWithOverflow(ov1[0], r2); // This never overflows, c1, c3 are either 0 or 1 and if both are 1 then // c2 is at least <= maxInt(Limb) - 2. carry.* = ov1[1] + c2 + ov2[1]; return ov2[0]; } /// a - b * c - *carry, sets carry to the overflow bits fn subMulLimbWithBorrow(a: Limb, b: Limb, c: Limb, carry: *Limb) Limb { // ov1[0] = a - *carry const ov1 = @subWithOverflow(a, carry.*); // r2 = b * c const bc = @as(DoubleLimb, std.math.mulWide(Limb, b, c)); const r2 = @as(Limb, @truncate(bc)); const c2 = @as(Limb, @truncate(bc >> limb_bits)); // ov2[0] = ov1[0] - r2 const ov2 = @subWithOverflow(ov1[0], r2); carry.* = ov1[1] + c2 + ov2[1]; return ov2[0]; } /// Used to indicate either limit of a 2s-complement integer. pub const TwosCompIntLimit = enum { // The low limit, either 0x00 (unsigned) or (-)0x80 (signed) for an 8-bit integer. min, // The high limit, either 0xFF (unsigned) or 0x7F (signed) for an 8-bit integer. max, }; /// A arbitrary-precision big integer, with a fixed set of mutable limbs. pub const Mutable = struct { /// Raw digits. These are: /// /// * Little-endian ordered /// * limbs.len >= 1 /// * Zero is represented as limbs.len == 1 with limbs[0] == 0. /// /// Accessing limbs directly should be avoided. /// These are allocated limbs; the `len` field tells the valid range. limbs: []Limb, len: usize, positive: bool, pub fn toConst(self: Mutable) Const { return .{ .limbs = self.limbs[0..self.len], .positive = self.positive, }; } /// Returns true if `a == 0`. pub fn eqlZero(self: Mutable) bool { return self.toConst().eqlZero(); } /// Asserts that the allocator owns the limbs memory. If this is not the case, /// use `toConst().toManaged()`. pub fn toManaged(self: Mutable, allocator: Allocator) Managed { return .{ .allocator = allocator, .limbs = self.limbs, .metadata = if (self.positive) self.len & ~Managed.sign_bit else self.len | Managed.sign_bit, }; } /// `value` is a primitive integer type. /// Asserts the value fits within the provided `limbs_buffer`. /// Note: `calcLimbLen` can be used to figure out how big an array to allocate for `limbs_buffer`. pub fn init(limbs_buffer: []Limb, value: anytype) Mutable { limbs_buffer[0] = 0; var self: Mutable = .{ .limbs = limbs_buffer, .len = 1, .positive = true, }; self.set(value); return self; } /// Copies the value of a Const to an existing Mutable so that they both have the same value. /// Asserts the value fits in the limbs buffer. pub fn copy(self: *Mutable, other: Const) void { if (self.limbs.ptr != other.limbs.ptr) { @memcpy(self.limbs[0..other.limbs.len], other.limbs[0..other.limbs.len]); } self.positive = other.positive; self.len = other.limbs.len; } /// Efficiently swap an Mutable with another. This swaps the limb pointers and a full copy is not /// performed. The address of the limbs field will not be the same after this function. pub fn swap(self: *Mutable, other: *Mutable) void { mem.swap(Mutable, self, other); } pub fn dump(self: Mutable) void { for (self.limbs[0..self.len]) |limb| { std.debug.print("{x} ", .{limb}); } std.debug.print("capacity={} positive={}\n", .{ self.limbs.len, self.positive }); } /// Clones an Mutable and returns a new Mutable with the same value. The new Mutable is a deep copy and /// can be modified separately from the original. /// Asserts that limbs is big enough to store the value. pub fn clone(other: Mutable, limbs: []Limb) Mutable { @memcpy(limbs[0..other.len], other.limbs[0..other.len]); return .{ .limbs = limbs, .len = other.len, .positive = other.positive, }; } pub fn negate(self: *Mutable) void { self.positive = !self.positive; } /// Modify to become the absolute value pub fn abs(self: *Mutable) void { self.positive = true; } /// Sets the Mutable to value. Value must be an primitive integer type. /// Asserts the value fits within the limbs buffer. /// Note: `calcLimbLen` can be used to figure out how big the limbs buffer /// needs to be to store a specific value. pub fn set(self: *Mutable, value: anytype) void { const T = @TypeOf(value); const needed_limbs = calcLimbLen(value); assert(needed_limbs <= self.limbs.len); // value too big self.len = needed_limbs; self.positive = value >= 0; switch (@typeInfo(T)) { .Int => |info| { var w_value = @abs(value); if (info.bits <= limb_bits) { self.limbs[0] = w_value; } else { var i: usize = 0; while (true) : (i += 1) { self.limbs[i] = @as(Limb, @truncate(w_value)); w_value >>= limb_bits; if (w_value == 0) break; } } }, .ComptimeInt => { comptime var w_value = @abs(value); if (w_value <= maxInt(Limb)) { self.limbs[0] = w_value; } else { const mask = (1 << limb_bits) - 1; comptime var i = 0; inline while (true) : (i += 1) { self.limbs[i] = w_value & mask; w_value >>= limb_bits; if (w_value == 0) break; } } }, else => @compileError("cannot set Mutable using type " ++ @typeName(T)), } } /// Set self from the string representation `value`. /// /// `value` must contain only digits <= `base` and is case insensitive. Base prefixes are /// not allowed (e.g. 0x43 should simply be 43). Underscores in the input string are /// ignored and can be used as digit separators. /// /// Asserts there is enough memory for the value in `self.limbs`. An upper bound on number of limbs can /// be determined with `calcSetStringLimbCount`. /// Asserts the base is in the range [2, 16]. /// /// Returns an error if the value has invalid digits for the requested base. /// /// `limbs_buffer` is used for temporary storage. The size required can be found with /// `calcSetStringLimbsBufferLen`. /// /// If `allocator` is provided, it will be used for temporary storage to improve /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm. pub fn setString( self: *Mutable, base: u8, value: []const u8, limbs_buffer: []Limb, allocator: ?Allocator, ) error{InvalidCharacter}!void { assert(base >= 2 and base <= 16); var i: usize = 0; var positive = true; if (value.len > 0 and value[0] == '-') { positive = false; i += 1; } const ap_base: Const = .{ .limbs = &[_]Limb{base}, .positive = true }; self.set(0); for (value[i..]) |ch| { if (ch == '_') { continue; } const d = try std.fmt.charToDigit(ch, base); const ap_d: Const = .{ .limbs = &[_]Limb{d}, .positive = true }; self.mul(self.toConst(), ap_base, limbs_buffer, allocator); self.add(self.toConst(), ap_d); } self.positive = positive; } /// Set self to either bound of a 2s-complement integer. /// Note: The result is still sign-magnitude, not twos complement! In order to convert the /// result to twos complement, it is sufficient to take the absolute value. /// /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn setTwosCompIntLimit( r: *Mutable, limit: TwosCompIntLimit, signedness: Signedness, bit_count: usize, ) void { // Handle zero-bit types. if (bit_count == 0) { r.set(0); return; } const req_limbs = calcTwosCompLimbCount(bit_count); const bit: Log2Limb = @truncate(bit_count - 1); const signmask = @as(Limb, 1) << bit; // 0b0..010..0 where 1 is the sign bit. const mask = (signmask << 1) -% 1; // 0b0..011..1 where the leftmost 1 is the sign bit. r.positive = true; switch (signedness) { .signed => switch (limit) { .min => { // Negative bound, signed = -0x80. r.len = req_limbs; @memset(r.limbs[0 .. r.len - 1], 0); r.limbs[r.len - 1] = signmask; r.positive = false; }, .max => { // Positive bound, signed = 0x7F // Note, in this branch we need to normalize because the first bit is // supposed to be 0. // Special case for 1-bit integers. if (bit_count == 1) { r.set(0); } else { const new_req_limbs = calcTwosCompLimbCount(bit_count - 1); const msb = @as(Log2Limb, @truncate(bit_count - 2)); const new_signmask = @as(Limb, 1) << msb; // 0b0..010..0 where 1 is the sign bit. const new_mask = (new_signmask << 1) -% 1; // 0b0..001..1 where the rightmost 0 is the sign bit. r.len = new_req_limbs; @memset(r.limbs[0 .. r.len - 1], maxInt(Limb)); r.limbs[r.len - 1] = new_mask; } }, }, .unsigned => switch (limit) { .min => { // Min bound, unsigned = 0x00 r.set(0); }, .max => { // Max bound, unsigned = 0xFF r.len = req_limbs; @memset(r.limbs[0 .. r.len - 1], maxInt(Limb)); r.limbs[r.len - 1] = mask; }, }, } } /// r = a + scalar /// /// r and a may be aliases. /// scalar is a primitive integer type. /// /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `@max(a.limbs.len, calcLimbLen(scalar)) + 1`. pub fn addScalar(r: *Mutable, a: Const, scalar: anytype) void { // Normally we could just determine the number of limbs needed with calcLimbLen, // but that is not comptime-known when scalar is not a comptime_int. Instead, we // use calcTwosCompLimbCount for a non-comptime_int scalar, which can be pessimistic // in the case that scalar happens to be small in magnitude within its type, but it // is well worth being able to use the stack and not needing an allocator passed in. // Note that Mutable.init still sets len to calcLimbLen(scalar) in any case. const limb_len = comptime switch (@typeInfo(@TypeOf(scalar))) { .ComptimeInt => calcLimbLen(scalar), .Int => |info| calcTwosCompLimbCount(info.bits), else => @compileError("expected scalar to be an int"), }; var limbs: [limb_len]Limb = undefined; const operand = init(&limbs, scalar).toConst(); return add(r, a, operand); } /// Base implementation for addition. Adds `@max(a.limbs.len, b.limbs.len)` elements from a and b, /// and returns whether any overflow occurred. /// r, a and b may be aliases. /// /// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`. fn addCarry(r: *Mutable, a: Const, b: Const) bool { if (a.eqlZero()) { r.copy(b); return false; } else if (b.eqlZero()) { r.copy(a); return false; } else if (a.positive != b.positive) { if (a.positive) { // (a) + (-b) => a - b return r.subCarry(a, b.abs()); } else { // (-a) + (b) => b - a return r.subCarry(b, a.abs()); } } else { r.positive = a.positive; if (a.limbs.len >= b.limbs.len) { const c = lladdcarry(r.limbs, a.limbs, b.limbs); r.normalize(a.limbs.len); return c != 0; } else { const c = lladdcarry(r.limbs, b.limbs, a.limbs); r.normalize(b.limbs.len); return c != 0; } } } /// r = a + b /// /// r, a and b may be aliases. /// /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `@max(a.limbs.len, b.limbs.len) + 1`. pub fn add(r: *Mutable, a: Const, b: Const) void { if (r.addCarry(a, b)) { // Fix up the result. Note that addCarry normalizes by a.limbs.len or b.limbs.len, // so we need to set the length here. const msl = @max(a.limbs.len, b.limbs.len); // `[add|sub]Carry` normalizes by `msl`, so we need to fix up the result manually here. // Note, the fact that it normalized means that the intermediary limbs are zero here. r.len = msl + 1; r.limbs[msl] = 1; // If this panics, there wasn't enough space in `r`. } } /// r = a + b with 2s-complement wrapping semantics. Returns whether overflow occurred. /// r, a and b may be aliases /// /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn addWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool { const req_limbs = calcTwosCompLimbCount(bit_count); // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine // if an overflow occurred. const x = Const{ .positive = a.positive, .limbs = a.limbs[0..@min(req_limbs, a.limbs.len)], }; const y = Const{ .positive = b.positive, .limbs = b.limbs[0..@min(req_limbs, b.limbs.len)], }; var carry_truncated = false; if (r.addCarry(x, y)) { // There are two possibilities here: // - We overflowed req_limbs. In this case, the carry is ignored, as it would be removed by // truncate anyway. // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled. // Note: after this we still might need to wrap. const msl = @max(a.limbs.len, b.limbs.len); if (msl < req_limbs) { r.limbs[msl] = 1; r.len = req_limbs; @memset(r.limbs[msl + 1 .. req_limbs], 0); } else { carry_truncated = true; } } if (!r.toConst().fitsInTwosComp(signedness, bit_count)) { r.truncate(r.toConst(), signedness, bit_count); return true; } return carry_truncated; } /// r = a + b with 2s-complement saturating semantics. /// r, a and b may be aliases. /// /// Assets the result fits in `r`. Upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn addSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void { const req_limbs = calcTwosCompLimbCount(bit_count); // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine // if an overflow occurred. const x = Const{ .positive = a.positive, .limbs = a.limbs[0..@min(req_limbs, a.limbs.len)], }; const y = Const{ .positive = b.positive, .limbs = b.limbs[0..@min(req_limbs, b.limbs.len)], }; if (r.addCarry(x, y)) { // There are two possibilities here: // - We overflowed req_limbs, in which case we need to saturate. // - a and b had less elements than req_limbs, and those were overflowed. // Note: In this case, might _also_ need to saturate. const msl = @max(a.limbs.len, b.limbs.len); if (msl < req_limbs) { r.limbs[msl] = 1; r.len = req_limbs; // Note: Saturation may still be required if msl == req_limbs - 1 } else { // Overflowed req_limbs, definitely saturate. r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count); } } // Saturate if the result didn't fit. r.saturate(r.toConst(), signedness, bit_count); } /// Base implementation for subtraction. Subtracts `@max(a.limbs.len, b.limbs.len)` elements from a and b, /// and returns whether any overflow occurred. /// r, a and b may be aliases. /// /// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`. fn subCarry(r: *Mutable, a: Const, b: Const) bool { if (a.eqlZero()) { r.copy(b); r.positive = !b.positive; return false; } else if (b.eqlZero()) { r.copy(a); return false; } else if (a.positive != b.positive) { if (a.positive) { // (a) - (-b) => a + b return r.addCarry(a, b.abs()); } else { // (-a) - (b) => -a + -b return r.addCarry(a, b.negate()); } } else if (a.positive) { if (a.order(b) != .lt) { // (a) - (b) => a - b const c = llsubcarry(r.limbs, a.limbs, b.limbs); r.normalize(a.limbs.len); r.positive = true; return c != 0; } else { // (a) - (b) => -b + a => -(b - a) const c = llsubcarry(r.limbs, b.limbs, a.limbs); r.normalize(b.limbs.len); r.positive = false; return c != 0; } } else { if (a.order(b) == .lt) { // (-a) - (-b) => -(a - b) const c = llsubcarry(r.limbs, a.limbs, b.limbs); r.normalize(a.limbs.len); r.positive = false; return c != 0; } else { // (-a) - (-b) => --b + -a => b - a const c = llsubcarry(r.limbs, b.limbs, a.limbs); r.normalize(b.limbs.len); r.positive = true; return c != 0; } } } /// r = a - b /// /// r, a and b may be aliases. /// /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `@max(a.limbs.len, b.limbs.len) + 1`. The +1 is not needed if both operands are positive. pub fn sub(r: *Mutable, a: Const, b: Const) void { r.add(a, b.negate()); } /// r = a - b with 2s-complement wrapping semantics. Returns whether any overflow occurred. /// /// r, a and b may be aliases /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn subWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool { return r.addWrap(a, b.negate(), signedness, bit_count); } /// r = a - b with 2s-complement saturating semantics. /// r, a and b may be aliases. /// /// Assets the result fits in `r`. Upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn subSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void { r.addSat(a, b.negate(), signedness, bit_count); } /// rma = a * b /// /// `rma` may alias with `a` or `b`. /// `a` and `b` may alias with each other. /// /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by /// rma is given by `a.limbs.len + b.limbs.len`. /// /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulLimbsBufferLen`. pub fn mul(rma: *Mutable, a: Const, b: Const, limbs_buffer: []Limb, allocator: ?Allocator) void { var buf_index: usize = 0; const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: { const start = buf_index; @memcpy(limbs_buffer[buf_index..][0..a.limbs.len], a.limbs); buf_index += a.limbs.len; break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst(); } else a; const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: { const start = buf_index; @memcpy(limbs_buffer[buf_index..][0..b.limbs.len], b.limbs); buf_index += b.limbs.len; break :blk b.toMutable(limbs_buffer[start..buf_index]).toConst(); } else b; return rma.mulNoAlias(a_copy, b_copy, allocator); } /// rma = a * b /// /// `rma` may not alias with `a` or `b`. /// `a` and `b` may alias with each other. /// /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by /// rma is given by `a.limbs.len + b.limbs.len`. /// /// If `allocator` is provided, it will be used for temporary storage to improve /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm. pub fn mulNoAlias(rma: *Mutable, a: Const, b: Const, allocator: ?Allocator) void { assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing if (a.limbs.len == 1 and b.limbs.len == 1) { const ov = @mulWithOverflow(a.limbs[0], b.limbs[0]); rma.limbs[0] = ov[0]; if (ov[1] == 0) { rma.len = 1; rma.positive = (a.positive == b.positive); return; } } @memset(rma.limbs[0 .. a.limbs.len + b.limbs.len], 0); llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs); rma.normalize(a.limbs.len + b.limbs.len); rma.positive = (a.positive == b.positive); } /// rma = a * b with 2s-complement wrapping semantics. /// /// `rma` may alias with `a` or `b`. /// `a` and `b` may alias with each other. /// /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by /// rma is given by `a.limbs.len + b.limbs.len`. /// /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulWrapLimbsBufferLen`. pub fn mulWrap( rma: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize, limbs_buffer: []Limb, allocator: ?Allocator, ) void { var buf_index: usize = 0; const req_limbs = calcTwosCompLimbCount(bit_count); const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: { const start = buf_index; const a_len = @min(req_limbs, a.limbs.len); @memcpy(limbs_buffer[buf_index..][0..a_len], a.limbs[0..a_len]); buf_index += a_len; break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst(); } else a; const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: { const start = buf_index; const b_len = @min(req_limbs, b.limbs.len); @memcpy(limbs_buffer[buf_index..][0..b_len], b.limbs[0..b_len]); buf_index += b_len; break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst(); } else b; return rma.mulWrapNoAlias(a_copy, b_copy, signedness, bit_count, allocator); } /// rma = a * b with 2s-complement wrapping semantics. /// /// `rma` may not alias with `a` or `b`. /// `a` and `b` may alias with each other. /// /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by /// rma is given by `a.limbs.len + b.limbs.len`. /// /// If `allocator` is provided, it will be used for temporary storage to improve /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm. pub fn mulWrapNoAlias( rma: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize, allocator: ?Allocator, ) void { assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing const req_limbs = calcTwosCompLimbCount(bit_count); // We can ignore the upper bits here, those results will be discarded anyway. const a_limbs = a.limbs[0..@min(req_limbs, a.limbs.len)]; const b_limbs = b.limbs[0..@min(req_limbs, b.limbs.len)]; @memset(rma.limbs[0..req_limbs], 0); llmulacc(.add, allocator, rma.limbs, a_limbs, b_limbs); rma.normalize(@min(req_limbs, a.limbs.len + b.limbs.len)); rma.positive = (a.positive == b.positive); rma.truncate(rma.toConst(), signedness, bit_count); } /// r = @bitReverse(a) with 2s-complement semantics. /// r and a may be aliases. /// /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn bitReverse(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void { if (bit_count == 0) return; r.copy(a); const limbs_required = calcTwosCompLimbCount(bit_count); if (!a.positive) { r.positive = true; // Negate. r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT. r.addScalar(r.toConst(), 1); // Add one. } else if (limbs_required > a.limbs.len) { // Zero-extend to our output length for (r.limbs[a.limbs.len..limbs_required]) |*limb| { limb.* = 0; } r.len = limbs_required; } // 0b0..01..1000 with @log2(@sizeOf(Limb)) consecutive ones const endian_mask: usize = (@sizeOf(Limb) - 1) << 3; const bytes = std.mem.sliceAsBytes(r.limbs); var bits = std.packed_int_array.PackedIntSliceEndian(u1, .little).init(bytes, limbs_required * @bitSizeOf(Limb)); var k: usize = 0; while (k < ((bit_count + 1) / 2)) : (k += 1) { var i = k; var rev_i = bit_count - i - 1; // This "endian mask" remaps a low (LE) byte to the corresponding high // (BE) byte in the Limb, without changing which limbs we are indexing if (native_endian == .big) { i ^= endian_mask; rev_i ^= endian_mask; } const bit_i = bits.get(i); const bit_rev_i = bits.get(rev_i); bits.set(i, bit_rev_i); bits.set(rev_i, bit_i); } // Calculate signed-magnitude representation for output if (signedness == .signed) { const last_bit = switch (native_endian) { .little => bits.get(bit_count - 1), .big => bits.get((bit_count - 1) ^ endian_mask), }; if (last_bit == 1) { r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT. r.addScalar(r.toConst(), 1); // Add one. r.positive = false; // Negate. } } r.normalize(r.len); } /// r = @byteSwap(a) with 2s-complement semantics. /// r and a may be aliases. /// /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(8*byte_count)`. pub fn byteSwap(r: *Mutable, a: Const, signedness: Signedness, byte_count: usize) void { if (byte_count == 0) return; r.copy(a); const limbs_required = calcTwosCompLimbCount(8 * byte_count); if (!a.positive) { r.positive = true; // Negate. r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT. r.addScalar(r.toConst(), 1); // Add one. } else if (limbs_required > a.limbs.len) { // Zero-extend to our output length for (r.limbs[a.limbs.len..limbs_required]) |*limb| { limb.* = 0; } r.len = limbs_required; } // 0b0..01..1 with @log2(@sizeOf(Limb)) trailing ones const endian_mask: usize = @sizeOf(Limb) - 1; var bytes = std.mem.sliceAsBytes(r.limbs); assert(bytes.len >= byte_count); var k: usize = 0; while (k < (byte_count + 1) / 2) : (k += 1) { var i = k; var rev_i = byte_count - k - 1; // This "endian mask" remaps a low (LE) byte to the corresponding high // (BE) byte in the Limb, without changing which limbs we are indexing if (native_endian == .big) { i ^= endian_mask; rev_i ^= endian_mask; } const byte_i = bytes[i]; const byte_rev_i = bytes[rev_i]; bytes[rev_i] = byte_i; bytes[i] = byte_rev_i; } // Calculate signed-magnitude representation for output if (signedness == .signed) { const last_byte = switch (native_endian) { .little => bytes[byte_count - 1], .big => bytes[(byte_count - 1) ^ endian_mask], }; if (last_byte & (1 << 7) != 0) { // Check sign bit of last byte r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT. r.addScalar(r.toConst(), 1); // Add one. r.positive = false; // Negate. } } r.normalize(r.len); } /// r = @popCount(a) with 2s-complement semantics. /// r and a may be aliases. /// /// Assets the result fits in `r`. Upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn popCount(r: *Mutable, a: Const, bit_count: usize) void { r.copy(a); if (!a.positive) { r.positive = true; // Negate. r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT. r.addScalar(r.toConst(), 1); // Add one. } var sum: Limb = 0; for (r.limbs[0..r.len]) |limb| { sum += @popCount(limb); } r.set(sum); } /// rma = a * a /// /// `rma` may not alias with `a`. /// /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by /// rma is given by `2 * a.limbs.len + 1`. /// /// If `allocator` is provided, it will be used for temporary storage to improve /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm. pub fn sqrNoAlias(rma: *Mutable, a: Const, opt_allocator: ?Allocator) void { _ = opt_allocator; assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing @memset(rma.limbs, 0); llsquareBasecase(rma.limbs, a.limbs); rma.normalize(2 * a.limbs.len + 1); rma.positive = true; } /// q = a / b (rem r) /// /// a / b are floored (rounded towards 0). /// q may alias with a or b. /// /// Asserts there is enough memory to store q and r. /// The upper bound for r limb count is `b.limbs.len`. /// The upper bound for q limb count is given by `a.limbs`. /// /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`. pub fn divFloor( q: *Mutable, r: *Mutable, a: Const, b: Const, limbs_buffer: []Limb, ) void { const sep = a.limbs.len + 2; var x = a.toMutable(limbs_buffer[0..sep]); var y = b.toMutable(limbs_buffer[sep..]); div(q, r, &x, &y); // Note, `div` performs truncating division, which satisfies // @divTrunc(a, b) * b + @rem(a, b) = a // so r = a - @divTrunc(a, b) * b // Note, @rem(a, -b) = @rem(-b, a) = -@rem(a, b) = -@rem(-a, -b) // For divTrunc, we want to perform // @divFloor(a, b) * b + @mod(a, b) = a // Note: // @divFloor(-a, b) // = @divFloor(a, -b) // = -@divCeil(a, b) // = -@divFloor(a + b - 1, b) // = -@divTrunc(a + b - 1, b) // Note (1): // @divTrunc(a + b - 1, b) * b + @rem(a + b - 1, b) = a + b - 1 // = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) = a + b - 1 // = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = a if (a.positive and b.positive) { // Positive-positive case, don't need to do anything. } else if (a.positive and !b.positive) { // a/-b -> q is negative, and so we need to fix flooring. // Subtract one to make the division flooring. // @divFloor(a, -b) * -b + @mod(a, -b) = a // If b divides a exactly, we have @divFloor(a, -b) * -b = a // Else, we have @divFloor(a, -b) * -b > a, so @mod(a, -b) becomes negative // We have: // @divFloor(a, -b) * -b + @mod(a, -b) = a // = -@divTrunc(a + b - 1, b) * -b + @mod(a, -b) = a // = @divTrunc(a + b - 1, b) * b + @mod(a, -b) = a // Substitute a for (1): // @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b + @mod(a, -b) // Yields: // @mod(a, -b) = @rem(a - 1, b) - b + 1 // Note that `r` holds @rem(a, b) at this point. // // If @rem(a, b) is not 0: // @rem(a - 1, b) = @rem(a, b) - 1 // => @mod(a, -b) = @rem(a, b) - 1 - b + 1 = @rem(a, b) - b // Else: // @rem(a - 1, b) = @rem(a + b - 1, b) = @rem(b - 1, b) = b - 1 // => @mod(a, -b) = b - 1 - b + 1 = 0 if (!r.eqlZero()) { q.addScalar(q.toConst(), -1); r.positive = true; r.sub(r.toConst(), y.toConst().abs()); } } else if (!a.positive and b.positive) { // -a/b -> q is negative, and so we need to fix flooring. // Subtract one to make the division flooring. // @divFloor(-a, b) * b + @mod(-a, b) = a // If b divides a exactly, we have @divFloor(-a, b) * b = -a // Else, we have @divFloor(-a, b) * b < -a, so @mod(-a, b) becomes positive // We have: // @divFloor(-a, b) * b + @mod(-a, b) = -a // = -@divTrunc(a + b - 1, b) * b + @mod(-a, b) = -a // = @divTrunc(a + b - 1, b) * b - @mod(-a, b) = a // Substitute a for (1): // @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b - @mod(-a, b) // Yields: // @rem(a - 1, b) - b + 1 = -@mod(-a, b) // => -@mod(-a, b) = @rem(a - 1, b) - b + 1 // => @mod(-a, b) = -(@rem(a - 1, b) - b + 1) = -@rem(a - 1, b) + b - 1 // // If @rem(a, b) is not 0: // @rem(a - 1, b) = @rem(a, b) - 1 // => @mod(-a, b) = -(@rem(a, b) - 1) + b - 1 = -@rem(a, b) + 1 + b - 1 = -@rem(a, b) + b // Else : // @rem(a - 1, b) = b - 1 // => @mod(-a, b) = -(b - 1) + b - 1 = 0 if (!r.eqlZero()) { q.addScalar(q.toConst(), -1); r.positive = false; r.add(r.toConst(), y.toConst().abs()); } } else if (!a.positive and !b.positive) { // a/b -> q is positive, don't need to do anything to fix flooring. // @divFloor(-a, -b) * -b + @mod(-a, -b) = -a // If b divides a exactly, we have @divFloor(-a, -b) * -b = -a // Else, we have @divFloor(-a, -b) * -b > -a, so @mod(-a, -b) becomes negative // We have: // @divFloor(-a, -b) * -b + @mod(-a, -b) = -a // = @divTrunc(a, b) * -b + @mod(-a, -b) = -a // = @divTrunc(a, b) * b - @mod(-a, -b) = a // We also have: // @divTrunc(a, b) * b + @rem(a, b) = a // Substitute a: // @divTrunc(a, b) * b + @rem(a, b) = @divTrunc(a, b) * b - @mod(-a, -b) // => @rem(a, b) = -@mod(-a, -b) // => @mod(-a, -b) = -@rem(a, b) r.positive = false; } } /// q = a / b (rem r) /// /// a / b are truncated (rounded towards -inf). /// q may alias with a or b. /// /// Asserts there is enough memory to store q and r. /// The upper bound for r limb count is `b.limbs.len`. /// The upper bound for q limb count is given by `a.limbs.len`. /// /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`. pub fn divTrunc( q: *Mutable, r: *Mutable, a: Const, b: Const, limbs_buffer: []Limb, ) void { const sep = a.limbs.len + 2; var x = a.toMutable(limbs_buffer[0..sep]); var y = b.toMutable(limbs_buffer[sep..]); div(q, r, &x, &y); } /// r = a << shift, in other words, r = a * 2^shift /// /// r and a may alias. /// /// Asserts there is enough memory to fit the result. The upper bound Limb count is /// `a.limbs.len + (shift / (@sizeOf(Limb) * 8))`. pub fn shiftLeft(r: *Mutable, a: Const, shift: usize) void { llshl(r.limbs[0..], a.limbs[0..a.limbs.len], shift); r.normalize(a.limbs.len + (shift / limb_bits) + 1); r.positive = a.positive; } /// r = a <<| shift with 2s-complement saturating semantics. /// /// r and a may alias. /// /// Asserts there is enough memory to fit the result. The upper bound Limb count is /// r is `calcTwosCompLimbCount(bit_count)`. pub fn shiftLeftSat(r: *Mutable, a: Const, shift: usize, signedness: Signedness, bit_count: usize) void { // Special case: When the argument is negative, but the result is supposed to be unsigned, // return 0 in all cases. if (!a.positive and signedness == .unsigned) { r.set(0); return; } // Check whether the shift is going to overflow. This is the case // when (in 2s complement) any bit above `bit_count - shift` is set in the unshifted value. // Note, the sign bit is not counted here. // Handle shifts larger than the target type. This also deals with // 0-bit integers. if (bit_count <= shift) { // In this case, there is only no overflow if `a` is zero. if (a.eqlZero()) { r.set(0); } else { r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count); } return; } const checkbit = bit_count - shift - @intFromBool(signedness == .signed); // If `checkbit` and more significant bits are zero, no overflow will take place. if (checkbit >= a.limbs.len * limb_bits) { // `checkbit` is outside the range of a, so definitely no overflow will take place. We // can defer to a normal shift. // Note that if `a` is normalized (which we assume), this checks for set bits in the upper limbs. // Note, in this case r should already have enough limbs required to perform the normal shift. // In this case the shift of the most significant limb may still overflow. r.shiftLeft(a, shift); return; } else if (checkbit < (a.limbs.len - 1) * limb_bits) { // `checkbit` is not in the most significant limb. If `a` is normalized the most significant // limb will not be zero, so in this case we need to saturate. Note that `a.limbs.len` must be // at least one according to normalization rules. r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count); return; } // Generate a mask with the bits to check in the most significant limb. We'll need to check // all bits with equal or more significance than checkbit. // const msb = @truncate(Log2Limb, checkbit); // const checkmask = (@as(Limb, 1) << msb) -% 1; if (a.limbs[a.limbs.len - 1] >> @as(Log2Limb, @truncate(checkbit)) != 0) { // Need to saturate. r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count); return; } // This shift should not be able to overflow, so invoke llshl and normalize manually // to avoid the extra required limb. llshl(r.limbs[0..], a.limbs[0..a.limbs.len], shift); r.normalize(a.limbs.len + (shift / limb_bits)); r.positive = a.positive; } /// r = a >> shift /// r and a may alias. /// /// Asserts there is enough memory to fit the result. The upper bound Limb count is /// `a.limbs.len - (shift / (@sizeOf(Limb) * 8))`. pub fn shiftRight(r: *Mutable, a: Const, shift: usize) void { const full_limbs_shifted_out = shift / limb_bits; const remaining_bits_shifted_out = shift % limb_bits; if (a.limbs.len <= full_limbs_shifted_out) { // Shifting negative numbers converges to -1 instead of 0 if (a.positive) { r.len = 1; r.positive = true; r.limbs[0] = 0; } else { r.len = 1; r.positive = false; r.limbs[0] = 1; } return; } const nonzero_negative_shiftout = if (a.positive) false else nonzero: { for (a.limbs[0..full_limbs_shifted_out]) |x| { if (x != 0) break :nonzero true; } if (remaining_bits_shifted_out == 0) break :nonzero false; const not_covered: Log2Limb = @intCast(limb_bits - remaining_bits_shifted_out); break :nonzero a.limbs[full_limbs_shifted_out] << not_covered != 0; }; llshr(r.limbs[0..], a.limbs[0..a.limbs.len], shift); r.len = a.limbs.len - full_limbs_shifted_out; if (nonzero_negative_shiftout) { if (full_limbs_shifted_out > 0) { r.limbs[a.limbs.len - full_limbs_shifted_out] = 0; r.len += 1; } r.addScalar(r.toConst(), -1); } r.normalize(r.len); r.positive = a.positive; } /// r = ~a under 2s complement wrapping semantics. /// r may alias with a. /// /// Assets that r has enough limbs to store the result. The upper bound Limb count is /// r is `calcTwosCompLimbCount(bit_count)`. pub fn bitNotWrap(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void { r.copy(a.negate()); const negative_one = Const{ .limbs = &.{1}, .positive = false }; _ = r.addWrap(r.toConst(), negative_one, signedness, bit_count); } /// r = a | b under 2s complement semantics. /// r may alias with a or b. /// /// a and b are zero-extended to the longer of a or b. /// /// Asserts that r has enough limbs to store the result. Upper bound is `@max(a.limbs.len, b.limbs.len)`. pub fn bitOr(r: *Mutable, a: Const, b: Const) void { // Trivial cases, llsignedor does not support zero. if (a.eqlZero()) { r.copy(b); return; } else if (b.eqlZero()) { r.copy(a); return; } if (a.limbs.len >= b.limbs.len) { r.positive = llsignedor(r.limbs, a.limbs, a.positive, b.limbs, b.positive); r.normalize(if (b.positive) a.limbs.len else b.limbs.len); } else { r.positive = llsignedor(r.limbs, b.limbs, b.positive, a.limbs, a.positive); r.normalize(if (a.positive) b.limbs.len else a.limbs.len); } } /// r = a & b under 2s complement semantics. /// r may alias with a or b. /// /// Asserts that r has enough limbs to store the result. /// If only a is positive, the upper bound is `a.limbs.len`. /// If only b is positive, the upper bound is `b.limbs.len`. /// If a and b are positive, the upper bound is `@min(a.limbs.len, b.limbs.len)`. /// If a and b are negative, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`. pub fn bitAnd(r: *Mutable, a: Const, b: Const) void { // Trivial cases, llsignedand does not support zero. if (a.eqlZero()) { r.copy(a); return; } else if (b.eqlZero()) { r.copy(b); return; } if (a.limbs.len >= b.limbs.len) { r.positive = llsignedand(r.limbs, a.limbs, a.positive, b.limbs, b.positive); r.normalize(if (b.positive) b.limbs.len else if (a.positive) a.limbs.len else a.limbs.len + 1); } else { r.positive = llsignedand(r.limbs, b.limbs, b.positive, a.limbs, a.positive); r.normalize(if (a.positive) a.limbs.len else if (b.positive) b.limbs.len else b.limbs.len + 1); } } /// r = a ^ b under 2s complement semantics. /// r may alias with a or b. /// /// Asserts that r has enough limbs to store the result. If a and b share the same signedness, the /// upper bound is `@max(a.limbs.len, b.limbs.len)`. Otherwise, if either a or b is negative /// but not both, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`. pub fn bitXor(r: *Mutable, a: Const, b: Const) void { // Trivial cases, because llsignedxor does not support negative zero. if (a.eqlZero()) { r.copy(b); return; } else if (b.eqlZero()) { r.copy(a); return; } if (a.limbs.len > b.limbs.len) { r.positive = llsignedxor(r.limbs, a.limbs, a.positive, b.limbs, b.positive); r.normalize(a.limbs.len + @intFromBool(a.positive != b.positive)); } else { r.positive = llsignedxor(r.limbs, b.limbs, b.positive, a.limbs, a.positive); r.normalize(b.limbs.len + @intFromBool(a.positive != b.positive)); } } /// rma may alias x or y. /// x and y may alias each other. /// Asserts that `rma` has enough limbs to store the result. Upper bound is /// `@min(x.limbs.len, y.limbs.len)`. /// /// `limbs_buffer` is used for temporary storage during the operation. When this function returns, /// it will have the same length as it had when the function was called. pub fn gcd(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.ArrayList(Limb)) !void { const prev_len = limbs_buffer.items.len; defer limbs_buffer.shrinkRetainingCapacity(prev_len); const x_copy = if (rma.limbs.ptr == x.limbs.ptr) blk: { const start = limbs_buffer.items.len; try limbs_buffer.appendSlice(x.limbs); break :blk x.toMutable(limbs_buffer.items[start..]).toConst(); } else x; const y_copy = if (rma.limbs.ptr == y.limbs.ptr) blk: { const start = limbs_buffer.items.len; try limbs_buffer.appendSlice(y.limbs); break :blk y.toMutable(limbs_buffer.items[start..]).toConst(); } else y; return gcdLehmer(rma, x_copy, y_copy, limbs_buffer); } /// q = a ^ b /// /// r may not alias a. /// /// Asserts that `r` has enough limbs to store the result. Upper bound is /// `calcPowLimbsBufferLen(a.bitCountAbs(), b)`. /// /// `limbs_buffer` is used for temporary storage. /// The amount required is given by `calcPowLimbsBufferLen`. pub fn pow(r: *Mutable, a: Const, b: u32, limbs_buffer: []Limb) void { assert(r.limbs.ptr != a.limbs.ptr); // illegal aliasing // Handle all the trivial cases first switch (b) { 0 => { // a^0 = 1 return r.set(1); }, 1 => { // a^1 = a return r.copy(a); }, else => {}, } if (a.eqlZero()) { // 0^b = 0 return r.set(0); } else if (a.limbs.len == 1 and a.limbs[0] == 1) { // 1^b = 1 and -1^b = ±1 r.set(1); r.positive = a.positive or (b & 1) == 0; return; } // Here a>1 and b>1 const needed_limbs = calcPowLimbsBufferLen(a.bitCountAbs(), b); assert(r.limbs.len >= needed_limbs); assert(limbs_buffer.len >= needed_limbs); llpow(r.limbs, a.limbs, b, limbs_buffer); r.normalize(needed_limbs); r.positive = a.positive or (b & 1) == 0; } /// r = ⌊√a⌋ /// /// r may alias a. /// /// Asserts that `r` has enough limbs to store the result. Upper bound is /// `(a.limbs.len - 1) / 2 + 1`. /// /// `limbs_buffer` is used for temporary storage. /// The amount required is given by `calcSqrtLimbsBufferLen`. pub fn sqrt( r: *Mutable, a: Const, limbs_buffer: []Limb, ) void { // Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 SqrtInt // https://members.loria.fr/PZimmermann/mca/pub226.html var buf_index: usize = 0; var t = b: { const start = buf_index; buf_index += a.limbs.len; break :b Mutable.init(limbs_buffer[start..buf_index], 0); }; var u = b: { const start = buf_index; const shift = (a.bitCountAbs() + 1) / 2; buf_index += 1 + ((shift / limb_bits) + 1); var m = Mutable.init(limbs_buffer[start..buf_index], 1); m.shiftLeft(m.toConst(), shift); // u must be >= ⌊√a⌋, and should be as small as possible for efficiency break :b m; }; var s = b: { const start = buf_index; buf_index += u.limbs.len; break :b u.toConst().toMutable(limbs_buffer[start..buf_index]); }; var rem = b: { const start = buf_index; buf_index += s.limbs.len; break :b Mutable.init(limbs_buffer[start..buf_index], 0); }; while (true) { t.divFloor(&rem, a, s.toConst(), limbs_buffer[buf_index..]); t.add(t.toConst(), s.toConst()); u.shiftRight(t.toConst(), 1); if (u.toConst().order(s.toConst()).compare(.gte)) { r.copy(s.toConst()); return; } // Avoid copying u to s by swapping u and s const tmp_s = s; s = u; u = tmp_s; } } /// rma may not alias x or y. /// x and y may alias each other. /// Asserts that `rma` has enough limbs to store the result. Upper bound is given by `calcGcdNoAliasLimbLen`. /// /// `limbs_buffer` is used for temporary storage during the operation. pub fn gcdNoAlias(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.ArrayList(Limb)) !void { assert(rma.limbs.ptr != x.limbs.ptr); // illegal aliasing assert(rma.limbs.ptr != y.limbs.ptr); // illegal aliasing return gcdLehmer(rma, x, y, limbs_buffer); } fn gcdLehmer(result: *Mutable, xa: Const, ya: Const, limbs_buffer: *std.ArrayList(Limb)) !void { var x = try xa.toManaged(limbs_buffer.allocator); defer x.deinit(); x.abs(); var y = try ya.toManaged(limbs_buffer.allocator); defer y.deinit(); y.abs(); if (x.toConst().order(y.toConst()) == .lt) { x.swap(&y); } var t_big = try Managed.init(limbs_buffer.allocator); defer t_big.deinit(); var r = try Managed.init(limbs_buffer.allocator); defer r.deinit(); var tmp_x = try Managed.init(limbs_buffer.allocator); defer tmp_x.deinit(); while (y.len() > 1 and !y.eqlZero()) { assert(x.isPositive() and y.isPositive()); assert(x.len() >= y.len()); var xh: SignedDoubleLimb = x.limbs[x.len() - 1]; var yh: SignedDoubleLimb = if (x.len() > y.len()) 0 else y.limbs[x.len() - 1]; var A: SignedDoubleLimb = 1; var B: SignedDoubleLimb = 0; var C: SignedDoubleLimb = 0; var D: SignedDoubleLimb = 1; while (yh + C != 0 and yh + D != 0) { const q = @divFloor(xh + A, yh + C); const qp = @divFloor(xh + B, yh + D); if (q != qp) { break; } var t = A - q * C; A = C; C = t; t = B - q * D; B = D; D = t; t = xh - q * yh; xh = yh; yh = t; } if (B == 0) { // t_big = x % y, r is unused try r.divTrunc(&t_big, &x, &y); assert(t_big.isPositive()); x.swap(&y); y.swap(&t_big); } else { var storage: [8]Limb = undefined; const Ap = fixedIntFromSignedDoubleLimb(A, storage[0..2]).toManaged(limbs_buffer.allocator); const Bp = fixedIntFromSignedDoubleLimb(B, storage[2..4]).toManaged(limbs_buffer.allocator); const Cp = fixedIntFromSignedDoubleLimb(C, storage[4..6]).toManaged(limbs_buffer.allocator); const Dp = fixedIntFromSignedDoubleLimb(D, storage[6..8]).toManaged(limbs_buffer.allocator); // t_big = Ax + By try r.mul(&x, &Ap); try t_big.mul(&y, &Bp); try t_big.add(&r, &t_big); // u = Cx + Dy, r as u try tmp_x.copy(x.toConst()); try x.mul(&tmp_x, &Cp); try r.mul(&y, &Dp); try r.add(&x, &r); x.swap(&t_big); y.swap(&r); } } // euclidean algorithm assert(x.toConst().order(y.toConst()) != .lt); while (!y.toConst().eqlZero()) { try t_big.divTrunc(&r, &x, &y); x.swap(&y); y.swap(&r); } result.copy(x.toConst()); } // Truncates by default. fn div(q: *Mutable, r: *Mutable, x: *Mutable, y: *Mutable) void { assert(!y.eqlZero()); // division by zero assert(q != r); // illegal aliasing const q_positive = (x.positive == y.positive); const r_positive = x.positive; if (x.toConst().orderAbs(y.toConst()) == .lt) { // q may alias x so handle r first. r.copy(x.toConst()); r.positive = r_positive; q.set(0); return; } // Handle trailing zero-words of divisor/dividend. These are not handled in the following // algorithms. // Note, there must be a non-zero limb for either. // const x_trailing = std.mem.indexOfScalar(Limb, x.limbs[0..x.len], 0).?; // const y_trailing = std.mem.indexOfScalar(Limb, y.limbs[0..y.len], 0).?; const x_trailing = for (x.limbs[0..x.len], 0..) |xi, i| { if (xi != 0) break i; } else unreachable; const y_trailing = for (y.limbs[0..y.len], 0..) |yi, i| { if (yi != 0) break i; } else unreachable; const xy_trailing = @min(x_trailing, y_trailing); if (y.len - xy_trailing == 1) { const divisor = y.limbs[y.len - 1]; // Optimization for small divisor. By using a half limb we can avoid requiring DoubleLimb // divisions in the hot code path. This may often require compiler_rt software-emulation. if (divisor < maxInt(HalfLimb)) { lldiv0p5(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], @as(HalfLimb, @intCast(divisor))); } else { lldiv1(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], divisor); } q.normalize(x.len - xy_trailing); q.positive = q_positive; r.len = 1; r.positive = r_positive; } else { // Shrink x, y such that the trailing zero limbs shared between are removed. var x0 = Mutable{ .limbs = x.limbs[xy_trailing..], .len = x.len - xy_trailing, .positive = true, }; var y0 = Mutable{ .limbs = y.limbs[xy_trailing..], .len = y.len - xy_trailing, .positive = true, }; divmod(q, r, &x0, &y0); q.positive = q_positive; r.positive = r_positive; } if (xy_trailing != 0 and r.limbs[r.len - 1] != 0) { // Manually shift here since we know its limb aligned. mem.copyBackwards(Limb, r.limbs[xy_trailing..], r.limbs[0..r.len]); @memset(r.limbs[0..xy_trailing], 0); r.len += xy_trailing; } } /// Handbook of Applied Cryptography, 14.20 /// /// x = qy + r where 0 <= r < y /// y is modified but returned intact. fn divmod( q: *Mutable, r: *Mutable, x: *Mutable, y: *Mutable, ) void { // 0. // Normalize so that y[t] > b/2 const lz = @clz(y.limbs[y.len - 1]); const norm_shift = if (lz == 0 and y.toConst().isOdd()) limb_bits // Force an extra limb so that y is even. else lz; x.shiftLeft(x.toConst(), norm_shift); y.shiftLeft(y.toConst(), norm_shift); const n = x.len - 1; const t = y.len - 1; const shift = n - t; // 1. // for 0 <= j <= n - t, set q[j] to 0 q.len = shift + 1; q.positive = true; @memset(q.limbs[0..q.len], 0); // 2. // while x >= y * b^(n - t): // x -= y * b^(n - t) // q[n - t] += 1 // Note, this algorithm is performed only once if y[t] > base/2 and y is even, which we // enforced in step 0. This means we can replace the while with an if. // Note, multiplication by b^(n - t) comes down to shifting to the right by n - t limbs. // We can also replace x >= y * b^(n - t) by x/b^(n - t) >= y, and use shifts for that. { // x >= y * b^(n - t) can be replaced by x/b^(n - t) >= y. // 'divide' x by b^(n - t) var tmp = Mutable{ .limbs = x.limbs[shift..], .len = x.len - shift, .positive = true, }; if (tmp.toConst().order(y.toConst()) != .lt) { // Perform x -= y * b^(n - t) // Note, we can subtract y from x[n - t..] and get the result without shifting. // We can also re-use tmp which already contains the relevant part of x. Note that // this also edits x. // Due to the check above, this cannot underflow. tmp.sub(tmp.toConst(), y.toConst()); // tmp.sub normalized tmp, but we need to normalize x now. x.limbs.len = tmp.limbs.len + shift; q.limbs[shift] += 1; } } // 3. // for i from n down to t + 1, do var i = n; while (i >= t + 1) : (i -= 1) { const k = i - t - 1; // 3.1. // if x_i == y_t: // q[i - t - 1] = b - 1 // else: // q[i - t - 1] = (x[i] * b + x[i - 1]) / y[t] if (x.limbs[i] == y.limbs[t]) { q.limbs[k] = maxInt(Limb); } else { const q0 = (@as(DoubleLimb, x.limbs[i]) << limb_bits) | @as(DoubleLimb, x.limbs[i - 1]); const n0 = @as(DoubleLimb, y.limbs[t]); q.limbs[k] = @as(Limb, @intCast(q0 / n0)); } // 3.2 // while q[i - t - 1] * (y[t] * b + y[t - 1] > x[i] * b * b + x[i - 1] + x[i - 2]: // q[i - t - 1] -= 1 // Note, if y[t] > b / 2 this part is repeated no more than twice. // Extract from y. const y0 = if (t > 0) y.limbs[t - 1] else 0; const y1 = y.limbs[t]; // Extract from x. // Note, big endian. const tmp0 = [_]Limb{ x.limbs[i], if (i >= 1) x.limbs[i - 1] else 0, if (i >= 2) x.limbs[i - 2] else 0, }; while (true) { // Ad-hoc 2x1 multiplication with q[i - t - 1]. // Note, big endian. var tmp1 = [_]Limb{ 0, undefined, undefined }; tmp1[2] = addMulLimbWithCarry(0, y0, q.limbs[k], &tmp1[0]); tmp1[1] = addMulLimbWithCarry(0, y1, q.limbs[k], &tmp1[0]); // Big-endian compare if (mem.order(Limb, &tmp1, &tmp0) != .gt) break; q.limbs[k] -= 1; } // 3.3. // x -= q[i - t - 1] * y * b^(i - t - 1) // Note, we multiply by a single limb here. // The shift doesn't need to be performed if we add the result of the first multiplication // to x[i - t - 1]. const underflow = llmulLimb(.sub, x.limbs[k..x.len], y.limbs[0..y.len], q.limbs[k]); // 3.4. // if x < 0: // x += y * b^(i - t - 1) // q[i - t - 1] -= 1 // Note, we check for x < 0 using the underflow flag from the previous operation. if (underflow) { // While we didn't properly set the signedness of x, this operation should 'flow' it back to positive. llaccum(.add, x.limbs[k..x.len], y.limbs[0..y.len]); q.limbs[k] -= 1; } } x.normalize(x.len); q.normalize(q.len); // De-normalize r and y. r.shiftRight(x.toConst(), norm_shift); y.shiftRight(y.toConst(), norm_shift); } /// If a is positive, this passes through to truncate. /// If a is negative, then r is set to positive with the bit pattern ~(a - 1). /// r may alias a. /// /// Asserts `r` has enough storage to store the result. /// The upper bound is `calcTwosCompLimbCount(a.len)`. pub fn convertToTwosComplement(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void { if (a.positive) { r.truncate(a, signedness, bit_count); return; } const req_limbs = calcTwosCompLimbCount(bit_count); if (req_limbs == 0 or a.eqlZero()) { r.set(0); return; } const bit = @as(Log2Limb, @truncate(bit_count - 1)); const signmask = @as(Limb, 1) << bit; const mask = (signmask << 1) -% 1; r.addScalar(a.abs(), -1); if (req_limbs > r.len) { @memset(r.limbs[r.len..req_limbs], 0); } assert(r.limbs.len >= req_limbs); r.len = req_limbs; llnot(r.limbs[0..r.len]); r.limbs[r.len - 1] &= mask; r.normalize(r.len); } /// Truncate an integer to a number of bits, following 2s-complement semantics. /// r may alias a. /// /// Asserts `r` has enough storage to store the result. /// The upper bound is `calcTwosCompLimbCount(a.len)`. pub fn truncate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void { const req_limbs = calcTwosCompLimbCount(bit_count); // Handle 0-bit integers. if (req_limbs == 0 or a.eqlZero()) { r.set(0); return; } const bit = @as(Log2Limb, @truncate(bit_count - 1)); const signmask = @as(Limb, 1) << bit; // 0b0..010...0 where 1 is the sign bit. const mask = (signmask << 1) -% 1; // 0b0..01..1 where the leftmost 1 is the sign bit. if (!a.positive) { // Convert the integer from sign-magnitude into twos-complement. // -x = ~(x - 1) // Note, we simply take req_limbs * @bitSizeOf(Limb) as the // target bit count. r.addScalar(a.abs(), -1); // Zero-extend the result if (req_limbs > r.len) { @memset(r.limbs[r.len..req_limbs], 0); } // Truncate to required number of limbs. assert(r.limbs.len >= req_limbs); r.len = req_limbs; // Without truncating, we can already peek at the sign bit of the result here. // Note that it will be 0 if the result is negative, as we did not apply the flip here. // If the result is negative, we have // -(-x & mask) // = ~(~(x - 1) & mask) + 1 // = ~(~((x - 1) | ~mask)) + 1 // = ((x - 1) | ~mask)) + 1 // Note, this is only valid for the target bits and not the upper bits // of the most significant limb. Those still need to be cleared. // Also note that `mask` is zero for all other bits, reducing to the identity. // This means that we still need to use & mask to clear off the upper bits. if (signedness == .signed and r.limbs[r.len - 1] & signmask == 0) { // Re-add the one and negate to get the result. r.limbs[r.len - 1] &= mask; // Note, addition cannot require extra limbs here as we did a subtraction before. r.addScalar(r.toConst(), 1); r.normalize(r.len); r.positive = false; } else { llnot(r.limbs[0..r.len]); r.limbs[r.len - 1] &= mask; r.normalize(r.len); } } else { if (a.limbs.len < req_limbs) { // Integer fits within target bits, no wrapping required. r.copy(a); return; } r.copy(.{ .positive = a.positive, .limbs = a.limbs[0..req_limbs], }); r.limbs[r.len - 1] &= mask; r.normalize(r.len); if (signedness == .signed and r.limbs[r.len - 1] & signmask != 0) { // Convert 2s-complement back to sign-magnitude. // Sign-extend the upper bits so that they are inverted correctly. r.limbs[r.len - 1] |= ~mask; llnot(r.limbs[0..r.len]); // Note, can only overflow if r holds 0xFFF...F which can only happen if // a holds 0. r.addScalar(r.toConst(), 1); r.positive = false; } } } /// Saturate an integer to a number of bits, following 2s-complement semantics. /// r may alias a. /// /// Asserts `r` has enough storage to store the result. /// The upper bound is `calcTwosCompLimbCount(a.len)`. pub fn saturate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void { if (!a.fitsInTwosComp(signedness, bit_count)) { r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count); } } /// Read the value of `x` from `buffer`. /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`. /// /// The contents of `buffer` are interpreted as if they were the contents of /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian` /// and any required padding bits are expected on the MSB end. pub fn readTwosComplement( x: *Mutable, buffer: []const u8, bit_count: usize, endian: Endian, signedness: Signedness, ) void { return readPackedTwosComplement(x, buffer, 0, bit_count, endian, signedness); } /// Read the value of `x` from a packed memory `buffer`. /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count` /// at offset `bit_offset`. /// /// This is equivalent to loading the value of an integer with `bit_count` bits as /// if it were a field in packed memory at the provided bit offset. pub fn readPackedTwosComplement( x: *Mutable, buffer: []const u8, bit_offset: usize, bit_count: usize, endian: Endian, signedness: Signedness, ) void { if (bit_count == 0) { x.limbs[0] = 0; x.len = 1; x.positive = true; return; } // Check whether the input is negative var positive = true; if (signedness == .signed) { const total_bits = bit_offset + bit_count; const last_byte = switch (endian) { .little => ((total_bits + 7) / 8) - 1, .big => buffer.len - ((total_bits + 7) / 8), }; const sign_bit = @as(u8, 1) << @as(u3, @intCast((total_bits - 1) % 8)); positive = ((buffer[last_byte] & sign_bit) == 0); } // Copy all complete limbs var carry: u1 = 1; var limb_index: usize = 0; var bit_index: usize = 0; while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) { // Read one Limb of bits var limb = mem.readPackedInt(Limb, buffer, bit_index + bit_offset, endian); bit_index += @bitSizeOf(Limb); // 2's complement (bitwise not, then add carry bit) if (!positive) { const ov = @addWithOverflow(~limb, carry); limb = ov[0]; carry = ov[1]; } x.limbs[limb_index] = limb; } // Copy the remaining bits if (bit_count != bit_index) { // Read all remaining bits var limb = switch (signedness) { .unsigned => mem.readVarPackedInt(Limb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .unsigned), .signed => b: { const SLimb = std.meta.Int(.signed, @bitSizeOf(Limb)); const limb = mem.readVarPackedInt(SLimb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .signed); break :b @as(Limb, @bitCast(limb)); }, }; // 2's complement (bitwise not, then add carry bit) if (!positive) { const ov = @addWithOverflow(~limb, carry); assert(ov[1] == 0); limb = ov[0]; } x.limbs[limb_index] = limb; limb_index += 1; } x.positive = positive; x.len = limb_index; x.normalize(x.len); } /// Normalize a possible sequence of leading zeros. /// /// [1, 2, 3, 4, 0] -> [1, 2, 3, 4] /// [1, 2, 0, 0, 0] -> [1, 2] /// [0, 0, 0, 0, 0] -> [0] pub fn normalize(r: *Mutable, length: usize) void { r.len = llnormalize(r.limbs[0..length]); } }; /// A arbitrary-precision big integer, with a fixed set of immutable limbs. pub const Const = struct { /// Raw digits. These are: /// /// * Little-endian ordered /// * limbs.len >= 1 /// * Zero is represented as limbs.len == 1 with limbs[0] == 0. /// /// Accessing limbs directly should be avoided. limbs: []const Limb, positive: bool, /// The result is an independent resource which is managed by the caller. pub fn toManaged(self: Const, allocator: Allocator) Allocator.Error!Managed { const limbs = try allocator.alloc(Limb, @max(Managed.default_capacity, self.limbs.len)); @memcpy(limbs[0..self.limbs.len], self.limbs); return Managed{ .allocator = allocator, .limbs = limbs, .metadata = if (self.positive) self.limbs.len & ~Managed.sign_bit else self.limbs.len | Managed.sign_bit, }; } /// Asserts `limbs` is big enough to store the value. pub fn toMutable(self: Const, limbs: []Limb) Mutable { @memcpy(limbs[0..self.limbs.len], self.limbs[0..self.limbs.len]); return .{ .limbs = limbs, .positive = self.positive, .len = self.limbs.len, }; } pub fn dump(self: Const) void { for (self.limbs[0..self.limbs.len]) |limb| { std.debug.print("{x} ", .{limb}); } std.debug.print("positive={}\n", .{self.positive}); } pub fn abs(self: Const) Const { return .{ .limbs = self.limbs, .positive = true, }; } pub fn negate(self: Const) Const { return .{ .limbs = self.limbs, .positive = !self.positive, }; } pub fn isOdd(self: Const) bool { return self.limbs[0] & 1 != 0; } pub fn isEven(self: Const) bool { return !self.isOdd(); } /// Returns the number of bits required to represent the absolute value of an integer. pub fn bitCountAbs(self: Const) usize { return (self.limbs.len - 1) * limb_bits + (limb_bits - @clz(self.limbs[self.limbs.len - 1])); } /// Returns the number of bits required to represent the integer in twos-complement form. /// /// If the integer is negative the value returned is the number of bits needed by a signed /// integer to represent the value. If positive the value is the number of bits for an /// unsigned integer. Any unsigned integer will fit in the signed integer with bitcount /// one greater than the returned value. /// /// e.g. -127 returns 8 as it will fit in an i8. 127 returns 7 since it fits in a u7. pub fn bitCountTwosComp(self: Const) usize { var bits = self.bitCountAbs(); // If the entire value has only one bit set (e.g. 0b100000000) then the negation in twos // complement requires one less bit. if (!self.positive) block: { bits += 1; if (@popCount(self.limbs[self.limbs.len - 1]) == 1) { for (self.limbs[0 .. self.limbs.len - 1]) |limb| { if (@popCount(limb) != 0) { break :block; } } bits -= 1; } } return bits; } /// @popCount with two's complement semantics. /// /// This returns the number of 1 bits set when the value would be represented in /// two's complement with the given integer width (bit_count). /// This includes the leading sign bit, which will be set for negative values. /// /// Asserts that bit_count is enough to represent value in two's compliment /// and that the final result fits in a usize. /// Asserts that there are no trailing empty limbs on the most significant end, /// i.e. that limb count matches `calcLimbLen()` and zero is not negative. pub fn popCount(self: Const, bit_count: usize) usize { var sum: usize = 0; if (self.positive) { for (self.limbs) |limb| { sum += @popCount(limb); } } else { assert(self.fitsInTwosComp(.signed, bit_count)); assert(self.limbs[self.limbs.len - 1] != 0); var remaining_bits = bit_count; var carry: u1 = 1; var add_res: Limb = undefined; // All but the most significant limb. for (self.limbs[0 .. self.limbs.len - 1]) |limb| { const ov = @addWithOverflow(~limb, carry); add_res = ov[0]; carry = ov[1]; sum += @popCount(add_res); remaining_bits -= limb_bits; // Asserted not to underflow by fitsInTwosComp } // The most significant limb may have fewer than @bitSizeOf(Limb) meaningful bits, // which we can detect with @clz(). // There may also be fewer limbs than needed to fill bit_count. const limb = self.limbs[self.limbs.len - 1]; const leading_zeroes = @clz(limb); // The most significant limb is asserted not to be all 0s (above), // so ~limb cannot be all 1s, and ~limb + 1 cannot overflow. sum += @popCount(~limb + carry); sum -= leading_zeroes; // All leading zeroes were flipped and added to sum, so undo those const remaining_ones = remaining_bits - (limb_bits - leading_zeroes); // All bits not covered by limbs sum += remaining_ones; } return sum; } pub fn fitsInTwosComp(self: Const, signedness: Signedness, bit_count: usize) bool { if (self.eqlZero()) { return true; } if (signedness == .unsigned and !self.positive) { return false; } const req_bits = self.bitCountTwosComp() + @intFromBool(self.positive and signedness == .signed); return bit_count >= req_bits; } /// Returns whether self can fit into an integer of the requested type. pub fn fits(self: Const, comptime T: type) bool { const info = @typeInfo(T).Int; return self.fitsInTwosComp(info.signedness, info.bits); } /// Returns the approximate size of the integer in the given base. Negative values accommodate for /// the minus sign. This is used for determining the number of characters needed to print the /// value. It is inexact and may exceed the given value by ~1-2 bytes. /// TODO See if we can make this exact. pub fn sizeInBaseUpperBound(self: Const, base: usize) usize { const bit_count = @as(usize, @intFromBool(!self.positive)) + self.bitCountAbs(); return (bit_count / math.log2(base)) + 2; } pub const ConvertError = error{ NegativeIntoUnsigned, TargetTooSmall, }; /// Convert self to type T. /// /// Returns an error if self cannot be narrowed into the requested type without truncation. pub fn to(self: Const, comptime T: type) ConvertError!T { switch (@typeInfo(T)) { .Int => |info| { // Make sure -0 is handled correctly. if (self.eqlZero()) return 0; const UT = std.meta.Int(.unsigned, info.bits); if (!self.fitsInTwosComp(info.signedness, info.bits)) { return error.TargetTooSmall; } var r: UT = 0; if (@sizeOf(UT) <= @sizeOf(Limb)) { r = @as(UT, @intCast(self.limbs[0])); } else { for (self.limbs[0..self.limbs.len], 0..) |_, ri| { const limb = self.limbs[self.limbs.len - ri - 1]; r <<= limb_bits; r |= limb; } } if (info.signedness == .unsigned) { return if (self.positive) @as(T, @intCast(r)) else error.NegativeIntoUnsigned; } else { if (self.positive) { return @intCast(r); } else { if (math.cast(T, r)) |ok| { return -ok; } else { return minInt(T); } } } }, else => @compileError("cannot convert Const to type " ++ @typeName(T)), } } /// To allow `std.fmt.format` to work with this type. /// If the absolute value of integer is greater than or equal to `pow(2, 64 * @sizeOf(usize) * 8)`, /// this function will fail to print the string, printing "(BigInt)" instead of a number. /// This is because the rendering algorithm requires reversing a string, which requires O(N) memory. /// See `toString` and `toStringAlloc` for a way to print big integers without failure. pub fn format( self: Const, comptime fmt: []const u8, options: std.fmt.FormatOptions, out_stream: anytype, ) !void { _ = options; comptime var base = 10; comptime var case: std.fmt.Case = .lower; if (fmt.len == 0 or comptime mem.eql(u8, fmt, "d")) { base = 10; case = .lower; } else if (comptime mem.eql(u8, fmt, "b")) { base = 2; case = .lower; } else if (comptime mem.eql(u8, fmt, "x")) { base = 16; case = .lower; } else if (comptime mem.eql(u8, fmt, "X")) { base = 16; case = .upper; } else { std.fmt.invalidFmtError(fmt, self); } const available_len = 64; if (self.limbs.len > available_len) return out_stream.writeAll("(BigInt)"); var limbs: [calcToStringLimbsBufferLen(available_len, base)]Limb = undefined; const biggest: Const = .{ .limbs = &([1]Limb{comptime math.maxInt(Limb)} ** available_len), .positive = false, }; var buf: [biggest.sizeInBaseUpperBound(base)]u8 = undefined; const len = self.toString(&buf, base, case, &limbs); return out_stream.writeAll(buf[0..len]); } /// Converts self to a string in the requested base. /// Caller owns returned memory. /// Asserts that `base` is in the range [2, 16]. /// See also `toString`, a lower level function than this. pub fn toStringAlloc(self: Const, allocator: Allocator, base: u8, case: std.fmt.Case) Allocator.Error![]u8 { assert(base >= 2); assert(base <= 16); if (self.eqlZero()) { return allocator.dupe(u8, "0"); } const string = try allocator.alloc(u8, self.sizeInBaseUpperBound(base)); errdefer allocator.free(string); const limbs = try allocator.alloc(Limb, calcToStringLimbsBufferLen(self.limbs.len, base)); defer allocator.free(limbs); return allocator.realloc(string, self.toString(string, base, case, limbs)); } /// Converts self to a string in the requested base. /// Asserts that `base` is in the range [2, 16]. /// `string` is a caller-provided slice of at least `sizeInBaseUpperBound` bytes, /// where the result is written to. /// Returns the length of the string. /// `limbs_buffer` is caller-provided memory for `toString` to use as a working area. It must have /// length of at least `calcToStringLimbsBufferLen`. /// In the case of power-of-two base, `limbs_buffer` is ignored. /// See also `toStringAlloc`, a higher level function than this. pub fn toString(self: Const, string: []u8, base: u8, case: std.fmt.Case, limbs_buffer: []Limb) usize { assert(base >= 2); assert(base <= 16); if (self.eqlZero()) { string[0] = '0'; return 1; } var digits_len: usize = 0; // Power of two: can do a single pass and use masks to extract digits. if (math.isPowerOfTwo(base)) { const base_shift = math.log2_int(Limb, base); outer: for (self.limbs[0..self.limbs.len]) |limb| { var shift: usize = 0; while (shift < limb_bits) : (shift += base_shift) { const r = @as(u8, @intCast((limb >> @as(Log2Limb, @intCast(shift))) & @as(Limb, base - 1))); const ch = std.fmt.digitToChar(r, case); string[digits_len] = ch; digits_len += 1; // If we hit the end, it must be all zeroes from here. if (digits_len == string.len) break :outer; } } // Always will have a non-zero digit somewhere. while (string[digits_len - 1] == '0') { digits_len -= 1; } } else { // Non power-of-two: batch divisions per word size. // We use a HalfLimb here so the division uses the faster lldiv0p5 over lldiv1 codepath. const digits_per_limb = math.log(HalfLimb, base, maxInt(HalfLimb)); var limb_base: Limb = 1; var j: usize = 0; while (j < digits_per_limb) : (j += 1) { limb_base *= base; } const b: Const = .{ .limbs = &[_]Limb{limb_base}, .positive = true }; var q: Mutable = .{ .limbs = limbs_buffer[0 .. self.limbs.len + 2], .positive = true, // Make absolute by ignoring self.positive. .len = self.limbs.len, }; @memcpy(q.limbs[0..self.limbs.len], self.limbs); var r: Mutable = .{ .limbs = limbs_buffer[q.limbs.len..][0..self.limbs.len], .positive = true, .len = 1, }; r.limbs[0] = 0; const rest_of_the_limbs_buf = limbs_buffer[q.limbs.len + r.limbs.len ..]; while (q.len >= 2) { // Passing an allocator here would not be helpful since this division is destroying // information, not creating it. [TODO citation needed] q.divTrunc(&r, q.toConst(), b, rest_of_the_limbs_buf); var r_word = r.limbs[0]; var i: usize = 0; while (i < digits_per_limb) : (i += 1) { const ch = std.fmt.digitToChar(@as(u8, @intCast(r_word % base)), case); r_word /= base; string[digits_len] = ch; digits_len += 1; } } { assert(q.len == 1); var r_word = q.limbs[0]; while (r_word != 0) { const ch = std.fmt.digitToChar(@as(u8, @intCast(r_word % base)), case); r_word /= base; string[digits_len] = ch; digits_len += 1; } } } if (!self.positive) { string[digits_len] = '-'; digits_len += 1; } const s = string[0..digits_len]; mem.reverse(u8, s); return s.len; } /// Write the value of `x` into `buffer` /// Asserts that `buffer` is large enough to store the value. /// /// `buffer` is filled so that its contents match what would be observed via /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian`, /// and any required padding bits are added on the MSB end. pub fn writeTwosComplement(x: Const, buffer: []u8, endian: Endian) void { return writePackedTwosComplement(x, buffer, 0, 8 * buffer.len, endian); } /// Write the value of `x` to a packed memory `buffer`. /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count` /// at offset `bit_offset`. /// /// This is equivalent to storing the value of an integer with `bit_count` bits as /// if it were a field in packed memory at the provided bit offset. pub fn writePackedTwosComplement(x: Const, buffer: []u8, bit_offset: usize, bit_count: usize, endian: Endian) void { assert(x.fitsInTwosComp(if (x.positive) .unsigned else .signed, bit_count)); // Copy all complete limbs var carry: u1 = 1; var limb_index: usize = 0; var bit_index: usize = 0; while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) { var limb: Limb = if (limb_index < x.limbs.len) x.limbs[limb_index] else 0; // 2's complement (bitwise not, then add carry bit) if (!x.positive) { const ov = @addWithOverflow(~limb, carry); limb = ov[0]; carry = ov[1]; } // Write one Limb of bits mem.writePackedInt(Limb, buffer, bit_index + bit_offset, limb, endian); bit_index += @bitSizeOf(Limb); } // Copy the remaining bits if (bit_count != bit_index) { var limb: Limb = if (limb_index < x.limbs.len) x.limbs[limb_index] else 0; // 2's complement (bitwise not, then add carry bit) if (!x.positive) limb = ~limb +% carry; // Write all remaining bits mem.writeVarPackedInt(buffer, bit_index + bit_offset, bit_count - bit_index, limb, endian); } } /// Returns `math.Order.lt`, `math.Order.eq`, `math.Order.gt` if /// `|a| < |b|`, `|a| == |b|`, or `|a| > |b|` respectively. pub fn orderAbs(a: Const, b: Const) math.Order { if (a.limbs.len < b.limbs.len) { return .lt; } if (a.limbs.len > b.limbs.len) { return .gt; } var i: usize = a.limbs.len - 1; while (i != 0) : (i -= 1) { if (a.limbs[i] != b.limbs[i]) { break; } } if (a.limbs[i] < b.limbs[i]) { return .lt; } else if (a.limbs[i] > b.limbs[i]) { return .gt; } else { return .eq; } } /// Returns `math.Order.lt`, `math.Order.eq`, `math.Order.gt` if `a < b`, `a == b` or `a > b` respectively. pub fn order(a: Const, b: Const) math.Order { if (a.positive != b.positive) { if (eqlZero(a) and eqlZero(b)) { return .eq; } else { return if (a.positive) .gt else .lt; } } else { const r = orderAbs(a, b); return if (a.positive) r else switch (r) { .lt => math.Order.gt, .eq => math.Order.eq, .gt => math.Order.lt, }; } } /// Same as `order` but the right-hand operand is a primitive integer. pub fn orderAgainstScalar(lhs: Const, scalar: anytype) math.Order { // Normally we could just determine the number of limbs needed with calcLimbLen, // but that is not comptime-known when scalar is not a comptime_int. Instead, we // use calcTwosCompLimbCount for a non-comptime_int scalar, which can be pessimistic // in the case that scalar happens to be small in magnitude within its type, but it // is well worth being able to use the stack and not needing an allocator passed in. // Note that Mutable.init still sets len to calcLimbLen(scalar) in any case. const limb_len = comptime switch (@typeInfo(@TypeOf(scalar))) { .ComptimeInt => calcLimbLen(scalar), .Int => |info| calcTwosCompLimbCount(info.bits), else => @compileError("expected scalar to be an int"), }; var limbs: [limb_len]Limb = undefined; const rhs = Mutable.init(&limbs, scalar); return order(lhs, rhs.toConst()); } /// Returns true if `a == 0`. pub fn eqlZero(a: Const) bool { var d: Limb = 0; for (a.limbs) |limb| d |= limb; return d == 0; } /// Returns true if `|a| == |b|`. pub fn eqlAbs(a: Const, b: Const) bool { return orderAbs(a, b) == .eq; } /// Returns true if `a == b`. pub fn eql(a: Const, b: Const) bool { return order(a, b) == .eq; } pub fn clz(a: Const, bits: Limb) Limb { // Limbs are stored in little-endian order but we need // to iterate big-endian. var total_limb_lz: Limb = 0; var i: usize = a.limbs.len; const bits_per_limb = @sizeOf(Limb) * 8; while (i != 0) { i -= 1; const limb = a.limbs[i]; const this_limb_lz = @clz(limb); total_limb_lz += this_limb_lz; if (this_limb_lz != bits_per_limb) break; } const total_limb_bits = a.limbs.len * bits_per_limb; return total_limb_lz + bits - total_limb_bits; } pub fn ctz(a: Const, bits: Limb) Limb { // Limbs are stored in little-endian order. var result: Limb = 0; for (a.limbs) |limb| { const limb_tz = @ctz(limb); result += limb_tz; if (limb_tz != @sizeOf(Limb) * 8) break; } return @min(result, bits); } }; /// An arbitrary-precision big integer along with an allocator which manages the memory. /// /// Memory is allocated as needed to ensure operations never overflow. The range /// is bounded only by available memory. pub const Managed = struct { pub const sign_bit: usize = 1 << (@typeInfo(usize).Int.bits - 1); /// Default number of limbs to allocate on creation of a `Managed`. pub const default_capacity = 4; /// Allocator used by the Managed when requesting memory. allocator: Allocator, /// Raw digits. These are: /// /// * Little-endian ordered /// * limbs.len >= 1 /// * Zero is represent as Managed.len() == 1 with limbs[0] == 0. /// /// Accessing limbs directly should be avoided. limbs: []Limb, /// High bit is the sign bit. If set, Managed is negative, else Managed is positive. /// The remaining bits represent the number of limbs used by Managed. metadata: usize, /// Creates a new `Managed`. `default_capacity` limbs will be allocated immediately. /// The integer value after initializing is `0`. pub fn init(allocator: Allocator) !Managed { return initCapacity(allocator, default_capacity); } pub fn toMutable(self: Managed) Mutable { return .{ .limbs = self.limbs, .positive = self.isPositive(), .len = self.len(), }; } pub fn toConst(self: Managed) Const { return .{ .limbs = self.limbs[0..self.len()], .positive = self.isPositive(), }; } /// Creates a new `Managed` with value `value`. /// /// This is identical to an `init`, followed by a `set`. pub fn initSet(allocator: Allocator, value: anytype) !Managed { var s = try Managed.init(allocator); errdefer s.deinit(); try s.set(value); return s; } /// Creates a new Managed with a specific capacity. If capacity < default_capacity then the /// default capacity will be used instead. /// The integer value after initializing is `0`. pub fn initCapacity(allocator: Allocator, capacity: usize) !Managed { return Managed{ .allocator = allocator, .metadata = 1, .limbs = block: { const limbs = try allocator.alloc(Limb, @max(default_capacity, capacity)); limbs[0] = 0; break :block limbs; }, }; } /// Returns the number of limbs currently in use. pub fn len(self: Managed) usize { return self.metadata & ~sign_bit; } /// Returns whether an Managed is positive. pub fn isPositive(self: Managed) bool { return self.metadata & sign_bit == 0; } /// Sets the sign of an Managed. pub fn setSign(self: *Managed, positive: bool) void { if (positive) { self.metadata &= ~sign_bit; } else { self.metadata |= sign_bit; } } /// Sets the length of an Managed. /// /// If setLen is used, then the Managed must be normalized to suit. pub fn setLen(self: *Managed, new_len: usize) void { self.metadata &= sign_bit; self.metadata |= new_len; } pub fn setMetadata(self: *Managed, positive: bool, length: usize) void { self.metadata = if (positive) length & ~sign_bit else length | sign_bit; } /// Ensures an Managed has enough space allocated for capacity limbs. If the Managed does not have /// sufficient capacity, the exact amount will be allocated. This occurs even if the requested /// capacity is only greater than the current capacity by one limb. pub fn ensureCapacity(self: *Managed, capacity: usize) !void { if (capacity <= self.limbs.len) { return; } self.limbs = try self.allocator.realloc(self.limbs, capacity); } /// Frees all associated memory. pub fn deinit(self: *Managed) void { self.allocator.free(self.limbs); self.* = undefined; } /// Returns a `Managed` with the same value. The returned `Managed` is a deep copy and /// can be modified separately from the original, and its resources are managed /// separately from the original. pub fn clone(other: Managed) !Managed { return other.cloneWithDifferentAllocator(other.allocator); } pub fn cloneWithDifferentAllocator(other: Managed, allocator: Allocator) !Managed { return Managed{ .allocator = allocator, .metadata = other.metadata, .limbs = block: { const limbs = try allocator.alloc(Limb, other.len()); @memcpy(limbs, other.limbs[0..other.len()]); break :block limbs; }, }; } /// Copies the value of the integer to an existing `Managed` so that they both have the same value. /// Extra memory will be allocated if the receiver does not have enough capacity. pub fn copy(self: *Managed, other: Const) !void { if (self.limbs.ptr == other.limbs.ptr) return; try self.ensureCapacity(other.limbs.len); @memcpy(self.limbs[0..other.limbs.len], other.limbs[0..other.limbs.len]); self.setMetadata(other.positive, other.limbs.len); } /// Efficiently swap a `Managed` with another. This swaps the limb pointers and a full copy is not /// performed. The address of the limbs field will not be the same after this function. pub fn swap(self: *Managed, other: *Managed) void { mem.swap(Managed, self, other); } /// Debugging tool: prints the state to stderr. pub fn dump(self: Managed) void { for (self.limbs[0..self.len()]) |limb| { std.debug.print("{x} ", .{limb}); } std.debug.print("capacity={} positive={}\n", .{ self.limbs.len, self.isPositive() }); } /// Negate the sign. pub fn negate(self: *Managed) void { self.metadata ^= sign_bit; } /// Make positive. pub fn abs(self: *Managed) void { self.metadata &= ~sign_bit; } pub fn isOdd(self: Managed) bool { return self.limbs[0] & 1 != 0; } pub fn isEven(self: Managed) bool { return !self.isOdd(); } /// Returns the number of bits required to represent the absolute value of an integer. pub fn bitCountAbs(self: Managed) usize { return self.toConst().bitCountAbs(); } /// Returns the number of bits required to represent the integer in twos-complement form. /// /// If the integer is negative the value returned is the number of bits needed by a signed /// integer to represent the value. If positive the value is the number of bits for an /// unsigned integer. Any unsigned integer will fit in the signed integer with bitcount /// one greater than the returned value. /// /// e.g. -127 returns 8 as it will fit in an i8. 127 returns 7 since it fits in a u7. pub fn bitCountTwosComp(self: Managed) usize { return self.toConst().bitCountTwosComp(); } pub fn fitsInTwosComp(self: Managed, signedness: Signedness, bit_count: usize) bool { return self.toConst().fitsInTwosComp(signedness, bit_count); } /// Returns whether self can fit into an integer of the requested type. pub fn fits(self: Managed, comptime T: type) bool { return self.toConst().fits(T); } /// Returns the approximate size of the integer in the given base. Negative values accommodate for /// the minus sign. This is used for determining the number of characters needed to print the /// value. It is inexact and may exceed the given value by ~1-2 bytes. pub fn sizeInBaseUpperBound(self: Managed, base: usize) usize { return self.toConst().sizeInBaseUpperBound(base); } /// Sets an Managed to value. Value must be an primitive integer type. pub fn set(self: *Managed, value: anytype) Allocator.Error!void { try self.ensureCapacity(calcLimbLen(value)); var m = self.toMutable(); m.set(value); self.setMetadata(m.positive, m.len); } pub const ConvertError = Const.ConvertError; /// Convert self to type T. /// /// Returns an error if self cannot be narrowed into the requested type without truncation. pub fn to(self: Managed, comptime T: type) ConvertError!T { return self.toConst().to(T); } /// Set self from the string representation `value`. /// /// `value` must contain only digits <= `base` and is case insensitive. Base prefixes are /// not allowed (e.g. 0x43 should simply be 43). Underscores in the input string are /// ignored and can be used as digit separators. /// /// Returns an error if memory could not be allocated or `value` has invalid digits for the /// requested base. /// /// self's allocator is used for temporary storage to boost multiplication performance. pub fn setString(self: *Managed, base: u8, value: []const u8) !void { if (base < 2 or base > 16) return error.InvalidBase; try self.ensureCapacity(calcSetStringLimbCount(base, value.len)); const limbs_buffer = try self.allocator.alloc(Limb, calcSetStringLimbsBufferLen(base, value.len)); defer self.allocator.free(limbs_buffer); var m = self.toMutable(); try m.setString(base, value, limbs_buffer, self.allocator); self.setMetadata(m.positive, m.len); } /// Set self to either bound of a 2s-complement integer. /// Note: The result is still sign-magnitude, not twos complement! In order to convert the /// result to twos complement, it is sufficient to take the absolute value. pub fn setTwosCompIntLimit( r: *Managed, limit: TwosCompIntLimit, signedness: Signedness, bit_count: usize, ) !void { try r.ensureCapacity(calcTwosCompLimbCount(bit_count)); var m = r.toMutable(); m.setTwosCompIntLimit(limit, signedness, bit_count); r.setMetadata(m.positive, m.len); } /// Converts self to a string in the requested base. Memory is allocated from the provided /// allocator and not the one present in self. pub fn toString(self: Managed, allocator: Allocator, base: u8, case: std.fmt.Case) ![]u8 { if (base < 2 or base > 16) return error.InvalidBase; return self.toConst().toStringAlloc(allocator, base, case); } /// To allow `std.fmt.format` to work with `Managed`. /// If the absolute value of integer is greater than or equal to `pow(2, 64 * @sizeOf(usize) * 8)`, /// this function will fail to print the string, printing "(BigInt)" instead of a number. /// This is because the rendering algorithm requires reversing a string, which requires O(N) memory. /// See `toString` and `toStringAlloc` for a way to print big integers without failure. pub fn format( self: Managed, comptime fmt: []const u8, options: std.fmt.FormatOptions, out_stream: anytype, ) !void { return self.toConst().format(fmt, options, out_stream); } /// Returns math.Order.lt, math.Order.eq, math.Order.gt if |a| < |b|, |a| == /// |b| or |a| > |b| respectively. pub fn orderAbs(a: Managed, b: Managed) math.Order { return a.toConst().orderAbs(b.toConst()); } /// Returns math.Order.lt, math.Order.eq, math.Order.gt if a < b, a == b or a /// > b respectively. pub fn order(a: Managed, b: Managed) math.Order { return a.toConst().order(b.toConst()); } /// Returns true if a == 0. pub fn eqlZero(a: Managed) bool { return a.toConst().eqlZero(); } /// Returns true if |a| == |b|. pub fn eqlAbs(a: Managed, b: Managed) bool { return a.toConst().eqlAbs(b.toConst()); } /// Returns true if a == b. pub fn eql(a: Managed, b: Managed) bool { return a.toConst().eql(b.toConst()); } /// Normalize a possible sequence of leading zeros. /// /// [1, 2, 3, 4, 0] -> [1, 2, 3, 4] /// [1, 2, 0, 0, 0] -> [1, 2] /// [0, 0, 0, 0, 0] -> [0] pub fn normalize(r: *Managed, length: usize) void { assert(length > 0); assert(length <= r.limbs.len); var j = length; while (j > 0) : (j -= 1) { if (r.limbs[j - 1] != 0) { break; } } // Handle zero r.setLen(if (j != 0) j else 1); } /// r = a + scalar /// /// r and a may be aliases. /// /// Returns an error if memory could not be allocated. pub fn addScalar(r: *Managed, a: *const Managed, scalar: anytype) Allocator.Error!void { try r.ensureAddScalarCapacity(a.toConst(), scalar); var m = r.toMutable(); m.addScalar(a.toConst(), scalar); r.setMetadata(m.positive, m.len); } /// r = a + b /// /// r, a and b may be aliases. /// /// Returns an error if memory could not be allocated. pub fn add(r: *Managed, a: *const Managed, b: *const Managed) Allocator.Error!void { try r.ensureAddCapacity(a.toConst(), b.toConst()); var m = r.toMutable(); m.add(a.toConst(), b.toConst()); r.setMetadata(m.positive, m.len); } /// r = a + b with 2s-complement wrapping semantics. Returns whether any overflow occurred. /// /// r, a and b may be aliases. /// /// Returns an error if memory could not be allocated. pub fn addWrap( r: *Managed, a: *const Managed, b: *const Managed, signedness: Signedness, bit_count: usize, ) Allocator.Error!bool { try r.ensureTwosCompCapacity(bit_count); var m = r.toMutable(); const wrapped = m.addWrap(a.toConst(), b.toConst(), signedness, bit_count); r.setMetadata(m.positive, m.len); return wrapped; } /// r = a + b with 2s-complement saturating semantics. /// /// r, a and b may be aliases. /// /// Returns an error if memory could not be allocated. pub fn addSat(r: *Managed, a: *const Managed, b: *const Managed, signedness: Signedness, bit_count: usize) Allocator.Error!void { try r.ensureTwosCompCapacity(bit_count); var m = r.toMutable(); m.addSat(a.toConst(), b.toConst(), signedness, bit_count); r.setMetadata(m.positive, m.len); } /// r = a - b /// /// r, a and b may be aliases. /// /// Returns an error if memory could not be allocated. pub fn sub(r: *Managed, a: *const Managed, b: *const Managed) !void { try r.ensureCapacity(@max(a.len(), b.len()) + 1); var m = r.toMutable(); m.sub(a.toConst(), b.toConst()); r.setMetadata(m.positive, m.len); } /// r = a - b with 2s-complement wrapping semantics. Returns whether any overflow occurred. /// /// r, a and b may be aliases. /// /// Returns an error if memory could not be allocated. pub fn subWrap( r: *Managed, a: *const Managed, b: *const Managed, signedness: Signedness, bit_count: usize, ) Allocator.Error!bool { try r.ensureTwosCompCapacity(bit_count); var m = r.toMutable(); const wrapped = m.subWrap(a.toConst(), b.toConst(), signedness, bit_count); r.setMetadata(m.positive, m.len); return wrapped; } /// r = a - b with 2s-complement saturating semantics. /// /// r, a and b may be aliases. /// /// Returns an error if memory could not be allocated. pub fn subSat( r: *Managed, a: *const Managed, b: *const Managed, signedness: Signedness, bit_count: usize, ) Allocator.Error!void { try r.ensureTwosCompCapacity(bit_count); var m = r.toMutable(); m.subSat(a.toConst(), b.toConst(), signedness, bit_count); r.setMetadata(m.positive, m.len); } /// rma = a * b /// /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b. /// /// Returns an error if memory could not be allocated. /// /// rma's allocator is used for temporary storage to speed up the multiplication. pub fn mul(rma: *Managed, a: *const Managed, b: *const Managed) !void { var alias_count: usize = 0; if (rma.limbs.ptr == a.limbs.ptr) alias_count += 1; if (rma.limbs.ptr == b.limbs.ptr) alias_count += 1; try rma.ensureMulCapacity(a.toConst(), b.toConst()); var m = rma.toMutable(); if (alias_count == 0) { m.mulNoAlias(a.toConst(), b.toConst(), rma.allocator); } else { const limb_count = calcMulLimbsBufferLen(a.len(), b.len(), alias_count); const limbs_buffer = try rma.allocator.alloc(Limb, limb_count); defer rma.allocator.free(limbs_buffer); m.mul(a.toConst(), b.toConst(), limbs_buffer, rma.allocator); } rma.setMetadata(m.positive, m.len); } /// rma = a * b with 2s-complement wrapping semantics. /// /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b. /// /// Returns an error if memory could not be allocated. /// /// rma's allocator is used for temporary storage to speed up the multiplication. pub fn mulWrap( rma: *Managed, a: *const Managed, b: *const Managed, signedness: Signedness, bit_count: usize, ) !void { var alias_count: usize = 0; if (rma.limbs.ptr == a.limbs.ptr) alias_count += 1; if (rma.limbs.ptr == b.limbs.ptr) alias_count += 1; try rma.ensureTwosCompCapacity(bit_count); var m = rma.toMutable(); if (alias_count == 0) { m.mulWrapNoAlias(a.toConst(), b.toConst(), signedness, bit_count, rma.allocator); } else { const limb_count = calcMulWrapLimbsBufferLen(bit_count, a.len(), b.len(), alias_count); const limbs_buffer = try rma.allocator.alloc(Limb, limb_count); defer rma.allocator.free(limbs_buffer); m.mulWrap(a.toConst(), b.toConst(), signedness, bit_count, limbs_buffer, rma.allocator); } rma.setMetadata(m.positive, m.len); } pub fn ensureTwosCompCapacity(r: *Managed, bit_count: usize) !void { try r.ensureCapacity(calcTwosCompLimbCount(bit_count)); } pub fn ensureAddScalarCapacity(r: *Managed, a: Const, scalar: anytype) !void { try r.ensureCapacity(@max(a.limbs.len, calcLimbLen(scalar)) + 1); } pub fn ensureAddCapacity(r: *Managed, a: Const, b: Const) !void { try r.ensureCapacity(@max(a.limbs.len, b.limbs.len) + 1); } pub fn ensureMulCapacity(rma: *Managed, a: Const, b: Const) !void { try rma.ensureCapacity(a.limbs.len + b.limbs.len + 1); } /// q = a / b (rem r) /// /// a / b are floored (rounded towards 0). /// /// Returns an error if memory could not be allocated. pub fn divFloor(q: *Managed, r: *Managed, a: *const Managed, b: *const Managed) !void { try q.ensureCapacity(a.len()); try r.ensureCapacity(b.len()); var mq = q.toMutable(); var mr = r.toMutable(); const limbs_buffer = try q.allocator.alloc(Limb, calcDivLimbsBufferLen(a.len(), b.len())); defer q.allocator.free(limbs_buffer); mq.divFloor(&mr, a.toConst(), b.toConst(), limbs_buffer); q.setMetadata(mq.positive, mq.len); r.setMetadata(mr.positive, mr.len); } /// q = a / b (rem r) /// /// a / b are truncated (rounded towards -inf). /// /// Returns an error if memory could not be allocated. pub fn divTrunc(q: *Managed, r: *Managed, a: *const Managed, b: *const Managed) !void { try q.ensureCapacity(a.len()); try r.ensureCapacity(b.len()); var mq = q.toMutable(); var mr = r.toMutable(); const limbs_buffer = try q.allocator.alloc(Limb, calcDivLimbsBufferLen(a.len(), b.len())); defer q.allocator.free(limbs_buffer); mq.divTrunc(&mr, a.toConst(), b.toConst(), limbs_buffer); q.setMetadata(mq.positive, mq.len); r.setMetadata(mr.positive, mr.len); } /// r = a << shift, in other words, r = a * 2^shift /// r and a may alias. pub fn shiftLeft(r: *Managed, a: *const Managed, shift: usize) !void { try r.ensureCapacity(a.len() + (shift / limb_bits) + 1); var m = r.toMutable(); m.shiftLeft(a.toConst(), shift); r.setMetadata(m.positive, m.len); } /// r = a <<| shift with 2s-complement saturating semantics. /// r and a may alias. pub fn shiftLeftSat(r: *Managed, a: *const Managed, shift: usize, signedness: Signedness, bit_count: usize) !void { try r.ensureTwosCompCapacity(bit_count); var m = r.toMutable(); m.shiftLeftSat(a.toConst(), shift, signedness, bit_count); r.setMetadata(m.positive, m.len); } /// r = a >> shift /// r and a may alias. pub fn shiftRight(r: *Managed, a: *const Managed, shift: usize) !void { if (a.len() <= shift / limb_bits) { // Shifting negative numbers converges to -1 instead of 0 if (a.isPositive()) { r.metadata = 1; r.limbs[0] = 0; } else { r.metadata = 1; r.setSign(false); r.limbs[0] = 1; } return; } try r.ensureCapacity(a.len() - (shift / limb_bits)); var m = r.toMutable(); m.shiftRight(a.toConst(), shift); r.setMetadata(m.positive, m.len); } /// r = ~a under 2s-complement wrapping semantics. /// r and a may alias. pub fn bitNotWrap(r: *Managed, a: *const Managed, signedness: Signedness, bit_count: usize) !void { try r.ensureTwosCompCapacity(bit_count); var m = r.toMutable(); m.bitNotWrap(a.toConst(), signedness, bit_count); r.setMetadata(m.positive, m.len); } /// r = a | b /// /// a and b are zero-extended to the longer of a or b. pub fn bitOr(r: *Managed, a: *const Managed, b: *const Managed) !void { try r.ensureCapacity(@max(a.len(), b.len())); var m = r.toMutable(); m.bitOr(a.toConst(), b.toConst()); r.setMetadata(m.positive, m.len); } /// r = a & b pub fn bitAnd(r: *Managed, a: *const Managed, b: *const Managed) !void { const cap = if (a.len() >= b.len()) if (b.isPositive()) b.len() else if (a.isPositive()) a.len() else a.len() + 1 else if (a.isPositive()) a.len() else if (b.isPositive()) b.len() else b.len() + 1; try r.ensureCapacity(cap); var m = r.toMutable(); m.bitAnd(a.toConst(), b.toConst()); r.setMetadata(m.positive, m.len); } /// r = a ^ b pub fn bitXor(r: *Managed, a: *const Managed, b: *const Managed) !void { const cap = @max(a.len(), b.len()) + @intFromBool(a.isPositive() != b.isPositive()); try r.ensureCapacity(cap); var m = r.toMutable(); m.bitXor(a.toConst(), b.toConst()); r.setMetadata(m.positive, m.len); } /// rma may alias x or y. /// x and y may alias each other. /// /// rma's allocator is used for temporary storage to boost multiplication performance. pub fn gcd(rma: *Managed, x: *const Managed, y: *const Managed) !void { try rma.ensureCapacity(@min(x.len(), y.len())); var m = rma.toMutable(); var limbs_buffer = std.ArrayList(Limb).init(rma.allocator); defer limbs_buffer.deinit(); try m.gcd(x.toConst(), y.toConst(), &limbs_buffer); rma.setMetadata(m.positive, m.len); } /// r = a * a pub fn sqr(rma: *Managed, a: *const Managed) !void { const needed_limbs = 2 * a.len() + 1; if (rma.limbs.ptr == a.limbs.ptr) { var m = try Managed.initCapacity(rma.allocator, needed_limbs); errdefer m.deinit(); var m_mut = m.toMutable(); m_mut.sqrNoAlias(a.toConst(), rma.allocator); m.setMetadata(m_mut.positive, m_mut.len); rma.deinit(); rma.swap(&m); } else { try rma.ensureCapacity(needed_limbs); var rma_mut = rma.toMutable(); rma_mut.sqrNoAlias(a.toConst(), rma.allocator); rma.setMetadata(rma_mut.positive, rma_mut.len); } } pub fn pow(rma: *Managed, a: *const Managed, b: u32) !void { const needed_limbs = calcPowLimbsBufferLen(a.bitCountAbs(), b); const limbs_buffer = try rma.allocator.alloc(Limb, needed_limbs); defer rma.allocator.free(limbs_buffer); if (rma.limbs.ptr == a.limbs.ptr) { var m = try Managed.initCapacity(rma.allocator, needed_limbs); errdefer m.deinit(); var m_mut = m.toMutable(); m_mut.pow(a.toConst(), b, limbs_buffer); m.setMetadata(m_mut.positive, m_mut.len); rma.deinit(); rma.swap(&m); } else { try rma.ensureCapacity(needed_limbs); var rma_mut = rma.toMutable(); rma_mut.pow(a.toConst(), b, limbs_buffer); rma.setMetadata(rma_mut.positive, rma_mut.len); } } /// r = ⌊√a⌋ pub fn sqrt(rma: *Managed, a: *const Managed) !void { const bit_count = a.bitCountAbs(); if (bit_count == 0) { try rma.set(0); rma.setMetadata(a.isPositive(), rma.len()); return; } if (!a.isPositive()) { return error.SqrtOfNegativeNumber; } const needed_limbs = calcSqrtLimbsBufferLen(bit_count); const limbs_buffer = try rma.allocator.alloc(Limb, needed_limbs); defer rma.allocator.free(limbs_buffer); try rma.ensureCapacity((a.len() - 1) / 2 + 1); var m = rma.toMutable(); m.sqrt(a.toConst(), limbs_buffer); rma.setMetadata(m.positive, m.len); } /// r = truncate(Int(signedness, bit_count), a) pub fn truncate(r: *Managed, a: *const Managed, signedness: Signedness, bit_count: usize) !void { try r.ensureCapacity(calcTwosCompLimbCount(bit_count)); var m = r.toMutable(); m.truncate(a.toConst(), signedness, bit_count); r.setMetadata(m.positive, m.len); } /// r = saturate(Int(signedness, bit_count), a) pub fn saturate(r: *Managed, a: *const Managed, signedness: Signedness, bit_count: usize) !void { try r.ensureCapacity(calcTwosCompLimbCount(bit_count)); var m = r.toMutable(); m.saturate(a.toConst(), signedness, bit_count); r.setMetadata(m.positive, m.len); } /// r = @popCount(a) with 2s-complement semantics. /// r and a may be aliases. pub fn popCount(r: *Managed, a: *const Managed, bit_count: usize) !void { try r.ensureCapacity(calcTwosCompLimbCount(bit_count)); var m = r.toMutable(); m.popCount(a.toConst(), bit_count); r.setMetadata(m.positive, m.len); } }; /// Different operators which can be used in accumulation style functions /// (llmulacc, llmulaccKaratsuba, llmulaccLong, llmulLimb). In all these functions, /// a computed value is accumulated with an existing result. const AccOp = enum { /// The computed value is added to the result. add, /// The computed value is subtracted from the result. sub, }; /// Knuth 4.3.1, Algorithm M. /// /// r = r (op) a * b /// r MUST NOT alias any of a or b. /// /// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs. fn llmulacc(comptime op: AccOp, opt_allocator: ?Allocator, r: []Limb, a: []const Limb, b: []const Limb) void { @setRuntimeSafety(debug_safety); assert(r.len >= a.len); assert(r.len >= b.len); // Order greatest first. var x = a; var y = b; if (a.len < b.len) { x = b; y = a; } k_mul: { if (y.len > 48) { if (opt_allocator) |allocator| { llmulaccKaratsuba(op, allocator, r, x, y) catch |err| switch (err) { error.OutOfMemory => break :k_mul, // handled below }; return; } } } llmulaccLong(op, r, x, y); } /// Knuth 4.3.1, Algorithm M. /// /// r = r (op) a * b /// r MUST NOT alias any of a or b. /// /// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs. fn llmulaccKaratsuba( comptime op: AccOp, allocator: Allocator, r: []Limb, a: []const Limb, b: []const Limb, ) error{OutOfMemory}!void { @setRuntimeSafety(debug_safety); assert(r.len >= a.len); assert(a.len >= b.len); // Classical karatsuba algorithm: // a = a1 * B + a0 // b = b1 * B + b0 // Where a0, b0 < B // // We then have: // ab = a * b // = (a1 * B + a0) * (b1 * B + b0) // = a1 * b1 * B * B + a1 * B * b0 + a0 * b1 * B + a0 * b0 // = a1 * b1 * B * B + (a1 * b0 + a0 * b1) * B + a0 * b0 // // Note that: // a1 * b0 + a0 * b1 // = (a1 + a0)(b1 + b0) - a1 * b1 - a0 * b0 // = (a0 - a1)(b1 - b0) + a1 * b1 + a0 * b0 // // This yields: // ab = p2 * B^2 + (p0 + p1 + p2) * B + p0 // // Where: // p0 = a0 * b0 // p1 = (a0 - a1)(b1 - b0) // p2 = a1 * b1 // // Note, (a0 - a1) and (b1 - b0) produce values -B < x < B, and so we need to mind the sign here. // We also have: // 0 <= p0 <= 2B // -2B <= p1 <= 2B // // Note, when B is a multiple of the limb size, multiplies by B amount to shifts or // slices of a limbs array. // // This function computes the result of the multiplication modulo r.len. This means: // - p2 and p1 only need to be computed modulo r.len - B. // - In the case of p2, p2 * B^2 needs to be added modulo r.len - 2 * B. const split = b.len / 2; // B const limbs_after_split = r.len - split; // Limbs to compute for p1 and p2. const limbs_after_split2 = r.len - split * 2; // Limbs to add for p2 * B^2. // For a0 and b0 we need the full range. const a0 = a[0..llnormalize(a[0..split])]; const b0 = b[0..llnormalize(b[0..split])]; // For a1 and b1 we only need `limbs_after_split` limbs. const a1 = blk: { var a1 = a[split..]; a1.len = @min(llnormalize(a1), limbs_after_split); break :blk a1; }; const b1 = blk: { var b1 = b[split..]; b1.len = @min(llnormalize(b1), limbs_after_split); break :blk b1; }; // Note that the above slices relative to `split` work because we have a.len > b.len. // We need some temporary memory to store intermediate results. // Note, we can reduce the amount of temporaries we need by reordering the computation here: // ab = p2 * B^2 + (p0 + p1 + p2) * B + p0 // = p2 * B^2 + (p0 * B + p1 * B + p2 * B) + p0 // = (p2 * B^2 + p2 * B) + (p0 * B + p0) + p1 * B // Allocate at least enough memory to be able to multiply the upper two segments of a and b, assuming // no overflow. const tmp = try allocator.alloc(Limb, a.len - split + b.len - split); defer allocator.free(tmp); // Compute p2. // Note, we don't need to compute all of p2, just enough limbs to satisfy r. const p2_limbs = @min(limbs_after_split, a1.len + b1.len); @memset(tmp[0..p2_limbs], 0); llmulacc(.add, allocator, tmp[0..p2_limbs], a1[0..@min(a1.len, p2_limbs)], b1[0..@min(b1.len, p2_limbs)]); const p2 = tmp[0..llnormalize(tmp[0..p2_limbs])]; // Add p2 * B to the result. llaccum(op, r[split..], p2); // Add p2 * B^2 to the result if required. if (limbs_after_split2 > 0) { llaccum(op, r[split * 2 ..], p2[0..@min(p2.len, limbs_after_split2)]); } // Compute p0. // Since a0.len, b0.len <= split and r.len >= split * 2, the full width of p0 needs to be computed. const p0_limbs = a0.len + b0.len; @memset(tmp[0..p0_limbs], 0); llmulacc(.add, allocator, tmp[0..p0_limbs], a0, b0); const p0 = tmp[0..llnormalize(tmp[0..p0_limbs])]; // Add p0 to the result. llaccum(op, r, p0); // Add p0 * B to the result. In this case, we may not need all of it. llaccum(op, r[split..], p0[0..@min(limbs_after_split, p0.len)]); // Finally, compute and add p1. // From now on we only need `limbs_after_split` limbs for a0 and b0, since the result of the // following computation will be added * B. const a0x = a0[0..@min(a0.len, limbs_after_split)]; const b0x = b0[0..@min(b0.len, limbs_after_split)]; const j0_sign = llcmp(a0x, a1); const j1_sign = llcmp(b1, b0x); if (j0_sign * j1_sign == 0) { // p1 is zero, we don't need to do any computation at all. return; } @memset(tmp, 0); // p1 is nonzero, so compute the intermediary terms j0 = a0 - a1 and j1 = b1 - b0. // Note that in this case, we again need some storage for intermediary results // j0 and j1. Since we have tmp.len >= 2B, we can store both // intermediaries in the already allocated array. const j0 = tmp[0 .. a.len - split]; const j1 = tmp[a.len - split ..]; // Ensure that no subtraction overflows. if (j0_sign == 1) { // a0 > a1. _ = llsubcarry(j0, a0x, a1); } else { // a0 < a1. _ = llsubcarry(j0, a1, a0x); } if (j1_sign == 1) { // b1 > b0. _ = llsubcarry(j1, b1, b0x); } else { // b1 > b0. _ = llsubcarry(j1, b0x, b1); } if (j0_sign * j1_sign == 1) { // If j0 and j1 are both positive, we now have: // p1 = j0 * j1 // If j0 and j1 are both negative, we now have: // p1 = -j0 * -j1 = j0 * j1 // In this case we can add p1 to the result using llmulacc. llmulacc(op, allocator, r[split..], j0[0..llnormalize(j0)], j1[0..llnormalize(j1)]); } else { // In this case either j0 or j1 is negative, an we have: // p1 = -(j0 * j1) // Now we need to subtract instead of accumulate. const inverted_op = if (op == .add) .sub else .add; llmulacc(inverted_op, allocator, r[split..], j0[0..llnormalize(j0)], j1[0..llnormalize(j1)]); } } /// r = r (op) a. /// The result is computed modulo `r.len`. fn llaccum(comptime op: AccOp, r: []Limb, a: []const Limb) void { @setRuntimeSafety(debug_safety); if (op == .sub) { _ = llsubcarry(r, r, a); return; } assert(r.len != 0 and a.len != 0); assert(r.len >= a.len); var i: usize = 0; var carry: Limb = 0; while (i < a.len) : (i += 1) { const ov1 = @addWithOverflow(r[i], a[i]); r[i] = ov1[0]; const ov2 = @addWithOverflow(r[i], carry); r[i] = ov2[0]; carry = @as(Limb, ov1[1]) + ov2[1]; } while ((carry != 0) and i < r.len) : (i += 1) { const ov = @addWithOverflow(r[i], carry); r[i] = ov[0]; carry = ov[1]; } } /// Returns -1, 0, 1 if |a| < |b|, |a| == |b| or |a| > |b| respectively for limbs. pub fn llcmp(a: []const Limb, b: []const Limb) i8 { @setRuntimeSafety(debug_safety); const a_len = llnormalize(a); const b_len = llnormalize(b); if (a_len < b_len) { return -1; } if (a_len > b_len) { return 1; } var i: usize = a_len - 1; while (i != 0) : (i -= 1) { if (a[i] != b[i]) { break; } } if (a[i] < b[i]) { return -1; } else if (a[i] > b[i]) { return 1; } else { return 0; } } /// r = r (op) y * xi /// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs. fn llmulaccLong(comptime op: AccOp, r: []Limb, a: []const Limb, b: []const Limb) void { @setRuntimeSafety(debug_safety); assert(r.len >= a.len); assert(a.len >= b.len); var i: usize = 0; while (i < b.len) : (i += 1) { _ = llmulLimb(op, r[i..], a, b[i]); } } /// r = r (op) y * xi /// The result is computed modulo `r.len`. /// Returns whether the operation overflowed. fn llmulLimb(comptime op: AccOp, acc: []Limb, y: []const Limb, xi: Limb) bool { @setRuntimeSafety(debug_safety); if (xi == 0) { return false; } const split = @min(y.len, acc.len); var a_lo = acc[0..split]; var a_hi = acc[split..]; switch (op) { .add => { var carry: Limb = 0; var j: usize = 0; while (j < a_lo.len) : (j += 1) { a_lo[j] = addMulLimbWithCarry(a_lo[j], y[j], xi, &carry); } j = 0; while ((carry != 0) and (j < a_hi.len)) : (j += 1) { const ov = @addWithOverflow(a_hi[j], carry); a_hi[j] = ov[0]; carry = ov[1]; } return carry != 0; }, .sub => { var borrow: Limb = 0; var j: usize = 0; while (j < a_lo.len) : (j += 1) { a_lo[j] = subMulLimbWithBorrow(a_lo[j], y[j], xi, &borrow); } j = 0; while ((borrow != 0) and (j < a_hi.len)) : (j += 1) { const ov = @subWithOverflow(a_hi[j], borrow); a_hi[j] = ov[0]; borrow = ov[1]; } return borrow != 0; }, } } /// returns the min length the limb could be. fn llnormalize(a: []const Limb) usize { @setRuntimeSafety(debug_safety); var j = a.len; while (j > 0) : (j -= 1) { if (a[j - 1] != 0) { break; } } // Handle zero return if (j != 0) j else 1; } /// Knuth 4.3.1, Algorithm S. fn llsubcarry(r: []Limb, a: []const Limb, b: []const Limb) Limb { @setRuntimeSafety(debug_safety); assert(a.len != 0 and b.len != 0); assert(a.len >= b.len); assert(r.len >= a.len); var i: usize = 0; var borrow: Limb = 0; while (i < b.len) : (i += 1) { const ov1 = @subWithOverflow(a[i], b[i]); r[i] = ov1[0]; const ov2 = @subWithOverflow(r[i], borrow); r[i] = ov2[0]; borrow = @as(Limb, ov1[1]) + ov2[1]; } while (i < a.len) : (i += 1) { const ov = @subWithOverflow(a[i], borrow); r[i] = ov[0]; borrow = ov[1]; } return borrow; } fn llsub(r: []Limb, a: []const Limb, b: []const Limb) void { @setRuntimeSafety(debug_safety); assert(a.len > b.len or (a.len == b.len and a[a.len - 1] >= b[b.len - 1])); assert(llsubcarry(r, a, b) == 0); } /// Knuth 4.3.1, Algorithm A. fn lladdcarry(r: []Limb, a: []const Limb, b: []const Limb) Limb { @setRuntimeSafety(debug_safety); assert(a.len != 0 and b.len != 0); assert(a.len >= b.len); assert(r.len >= a.len); var i: usize = 0; var carry: Limb = 0; while (i < b.len) : (i += 1) { const ov1 = @addWithOverflow(a[i], b[i]); r[i] = ov1[0]; const ov2 = @addWithOverflow(r[i], carry); r[i] = ov2[0]; carry = @as(Limb, ov1[1]) + ov2[1]; } while (i < a.len) : (i += 1) { const ov = @addWithOverflow(a[i], carry); r[i] = ov[0]; carry = ov[1]; } return carry; } fn lladd(r: []Limb, a: []const Limb, b: []const Limb) void { @setRuntimeSafety(debug_safety); assert(r.len >= a.len + 1); r[a.len] = lladdcarry(r, a, b); } /// Knuth 4.3.1, Exercise 16. fn lldiv1(quo: []Limb, rem: *Limb, a: []const Limb, b: Limb) void { @setRuntimeSafety(debug_safety); assert(a.len > 1 or a[0] >= b); assert(quo.len >= a.len); rem.* = 0; for (a, 0..) |_, ri| { const i = a.len - ri - 1; const pdiv = ((@as(DoubleLimb, rem.*) << limb_bits) | a[i]); if (pdiv == 0) { quo[i] = 0; rem.* = 0; } else if (pdiv < b) { quo[i] = 0; rem.* = @as(Limb, @truncate(pdiv)); } else if (pdiv == b) { quo[i] = 1; rem.* = 0; } else { quo[i] = @as(Limb, @truncate(@divTrunc(pdiv, b))); rem.* = @as(Limb, @truncate(pdiv - (quo[i] *% b))); } } } fn lldiv0p5(quo: []Limb, rem: *Limb, a: []const Limb, b: HalfLimb) void { @setRuntimeSafety(debug_safety); assert(a.len > 1 or a[0] >= b); assert(quo.len >= a.len); rem.* = 0; for (a, 0..) |_, ri| { const i = a.len - ri - 1; const ai_high = a[i] >> half_limb_bits; const ai_low = a[i] & ((1 << half_limb_bits) - 1); // Split the division into two divisions acting on half a limb each. Carry remainder. const ai_high_with_carry = (rem.* << half_limb_bits) | ai_high; const ai_high_quo = ai_high_with_carry / b; rem.* = ai_high_with_carry % b; const ai_low_with_carry = (rem.* << half_limb_bits) | ai_low; const ai_low_quo = ai_low_with_carry / b; rem.* = ai_low_with_carry % b; quo[i] = (ai_high_quo << half_limb_bits) | ai_low_quo; } } fn llshl(r: []Limb, a: []const Limb, shift: usize) void { @setRuntimeSafety(debug_safety); assert(a.len >= 1); const interior_limb_shift = @as(Log2Limb, @truncate(shift)); // We only need the extra limb if the shift of the last element overflows. // This is useful for the implementation of `shiftLeftSat`. if (a[a.len - 1] << interior_limb_shift >> interior_limb_shift != a[a.len - 1]) { assert(r.len >= a.len + (shift / limb_bits) + 1); } else { assert(r.len >= a.len + (shift / limb_bits)); } const limb_shift = shift / limb_bits + 1; var carry: Limb = 0; var i: usize = 0; while (i < a.len) : (i += 1) { const src_i = a.len - i - 1; const dst_i = src_i + limb_shift; const src_digit = a[src_i]; r[dst_i] = carry | @call(.always_inline, math.shr, .{ Limb, src_digit, limb_bits - @as(Limb, @intCast(interior_limb_shift)), }); carry = (src_digit << interior_limb_shift); } r[limb_shift - 1] = carry; @memset(r[0 .. limb_shift - 1], 0); } fn llshr(r: []Limb, a: []const Limb, shift: usize) void { @setRuntimeSafety(debug_safety); assert(a.len >= 1); assert(r.len >= a.len - (shift / limb_bits)); const limb_shift = shift / limb_bits; const interior_limb_shift = @as(Log2Limb, @truncate(shift)); var i: usize = 0; while (i < a.len - limb_shift) : (i += 1) { const dst_i = i; const src_i = dst_i + limb_shift; const src_digit = a[src_i]; const src_digit_next = if (src_i + 1 < a.len) a[src_i + 1] else 0; const carry = @call(.always_inline, math.shl, .{ Limb, src_digit_next, limb_bits - @as(Limb, @intCast(interior_limb_shift)), }); r[dst_i] = carry | (src_digit >> interior_limb_shift); } } // r = ~r fn llnot(r: []Limb) void { @setRuntimeSafety(debug_safety); for (r) |*elem| { elem.* = ~elem.*; } } // r = a | b with 2s complement semantics. // r may alias. // a and b must not be 0. // Returns `true` when the result is positive. // When b is positive, r requires at least `a.len` limbs of storage. // When b is negative, r requires at least `b.len` limbs of storage. fn llsignedor(r: []Limb, a: []const Limb, a_positive: bool, b: []const Limb, b_positive: bool) bool { @setRuntimeSafety(debug_safety); assert(r.len >= a.len); assert(a.len >= b.len); if (a_positive and b_positive) { // Trivial case, result is positive. var i: usize = 0; while (i < b.len) : (i += 1) { r[i] = a[i] | b[i]; } while (i < a.len) : (i += 1) { r[i] = a[i]; } return true; } else if (!a_positive and b_positive) { // Result is negative. // r = (--a) | b // = ~(-a - 1) | b // = ~(-a - 1) | ~~b // = ~((-a - 1) & ~b) // = -(((-a - 1) & ~b) + 1) var i: usize = 0; var a_borrow: u1 = 1; var r_carry: u1 = 1; while (i < b.len) : (i += 1) { const ov1 = @subWithOverflow(a[i], a_borrow); a_borrow = ov1[1]; const ov2 = @addWithOverflow(ov1[0] & ~b[i], r_carry); r[i] = ov2[0]; r_carry = ov2[1]; } // In order for r_carry to be nonzero at this point, ~b[i] would need to be // all ones, which would require b[i] to be zero. This cannot be when // b is normalized, so there cannot be a carry here. // Also, x & ~b can only clear bits, so (x & ~b) <= x, meaning (-a - 1) + 1 never overflows. assert(r_carry == 0); // With b = 0, we get (-a - 1) & ~0 = -a - 1. // Note, if a_borrow is zero we do not need to compute anything for // the higher limbs so we can early return here. while (i < a.len and a_borrow == 1) : (i += 1) { const ov = @subWithOverflow(a[i], a_borrow); r[i] = ov[0]; a_borrow = ov[1]; } assert(a_borrow == 0); // a was 0. return false; } else if (a_positive and !b_positive) { // Result is negative. // r = a | (--b) // = a | ~(-b - 1) // = ~~a | ~(-b - 1) // = ~(~a & (-b - 1)) // = -((~a & (-b - 1)) + 1) var i: usize = 0; var b_borrow: u1 = 1; var r_carry: u1 = 1; while (i < b.len) : (i += 1) { const ov1 = @subWithOverflow(b[i], b_borrow); b_borrow = ov1[1]; const ov2 = @addWithOverflow(~a[i] & ov1[0], r_carry); r[i] = ov2[0]; r_carry = ov2[1]; } // b is at least 1, so this should never underflow. assert(b_borrow == 0); // b was 0 // x & ~a can only clear bits, so (x & ~a) <= x, meaning (-b - 1) + 1 never overflows. assert(r_carry == 0); // With b = 0 and b_borrow = 0, we get ~a & (0 - 0) = ~a & 0 = 0. // Omit setting the upper bytes, just deal with those when calling llsignedor. return false; } else { // Result is negative. // r = (--a) | (--b) // = ~(-a - 1) | ~(-b - 1) // = ~((-a - 1) & (-b - 1)) // = -(~(~((-a - 1) & (-b - 1))) + 1) // = -((-a - 1) & (-b - 1) + 1) var i: usize = 0; var a_borrow: u1 = 1; var b_borrow: u1 = 1; var r_carry: u1 = 1; while (i < b.len) : (i += 1) { const ov1 = @subWithOverflow(a[i], a_borrow); a_borrow = ov1[1]; const ov2 = @subWithOverflow(b[i], b_borrow); b_borrow = ov2[1]; const ov3 = @addWithOverflow(ov1[0] & ov2[0], r_carry); r[i] = ov3[0]; r_carry = ov3[1]; } // b is at least 1, so this should never underflow. assert(b_borrow == 0); // b was 0 // Can never overflow because in order for b_limb to be maxInt(Limb), // b_borrow would need to equal 1. // x & y can only clear bits, meaning x & y <= x and x & y <= y. This implies that // for x = a - 1 and y = b - 1, the +1 term would never cause an overflow. assert(r_carry == 0); // With b = 0 and b_borrow = 0 we get (-a - 1) & (0 - 0) = (-a - 1) & 0 = 0. // Omit setting the upper bytes, just deal with those when calling llsignedor. return false; } } // r = a & b with 2s complement semantics. // r may alias. // a and b must not be 0. // Returns `true` when the result is positive. // We assume `a.len >= b.len` here, so: // 1. when b is positive, r requires at least `b.len` limbs of storage, // 2. when b is negative but a is positive, r requires at least `a.len` limbs of storage, // 3. when both a and b are negative, r requires at least `a.len + 1` limbs of storage. fn llsignedand(r: []Limb, a: []const Limb, a_positive: bool, b: []const Limb, b_positive: bool) bool { @setRuntimeSafety(debug_safety); assert(a.len != 0 and b.len != 0); assert(a.len >= b.len); assert(r.len >= if (b_positive) b.len else if (a_positive) a.len else a.len + 1); if (a_positive and b_positive) { // Trivial case, result is positive. var i: usize = 0; while (i < b.len) : (i += 1) { r[i] = a[i] & b[i]; } // With b = 0 we have a & 0 = 0, so the upper bytes are zero. // Omit setting them here and simply discard them whenever // llsignedand is called. return true; } else if (!a_positive and b_positive) { // Result is positive. // r = (--a) & b // = ~(-a - 1) & b var i: usize = 0; var a_borrow: u1 = 1; while (i < b.len) : (i += 1) { const ov = @subWithOverflow(a[i], a_borrow); a_borrow = ov[1]; r[i] = ~ov[0] & b[i]; } // With b = 0 we have ~(a - 1) & 0 = 0, so the upper bytes are zero. // Omit setting them here and simply discard them whenever // llsignedand is called. return true; } else if (a_positive and !b_positive) { // Result is positive. // r = a & (--b) // = a & ~(-b - 1) var i: usize = 0; var b_borrow: u1 = 1; while (i < b.len) : (i += 1) { const ov = @subWithOverflow(b[i], b_borrow); b_borrow = ov[1]; r[i] = a[i] & ~ov[0]; } assert(b_borrow == 0); // b was 0 // With b = 0 and b_borrow = 0 we have a & ~(0 - 0) = a & ~0 = a, so // the upper bytes are the same as those of a. while (i < a.len) : (i += 1) { r[i] = a[i]; } return true; } else { // Result is negative. // r = (--a) & (--b) // = ~(-a - 1) & ~(-b - 1) // = ~((-a - 1) | (-b - 1)) // = -(((-a - 1) | (-b - 1)) + 1) var i: usize = 0; var a_borrow: u1 = 1; var b_borrow: u1 = 1; var r_carry: u1 = 1; while (i < b.len) : (i += 1) { const ov1 = @subWithOverflow(a[i], a_borrow); a_borrow = ov1[1]; const ov2 = @subWithOverflow(b[i], b_borrow); b_borrow = ov2[1]; const ov3 = @addWithOverflow(ov1[0] | ov2[0], r_carry); r[i] = ov3[0]; r_carry = ov3[1]; } // b is at least 1, so this should never underflow. assert(b_borrow == 0); // b was 0 // With b = 0 and b_borrow = 0 we get (-a - 1) | (0 - 0) = (-a - 1) | 0 = -a - 1. while (i < a.len) : (i += 1) { const ov1 = @subWithOverflow(a[i], a_borrow); a_borrow = ov1[1]; const ov2 = @addWithOverflow(ov1[0], r_carry); r[i] = ov2[0]; r_carry = ov2[1]; } assert(a_borrow == 0); // a was 0. // The final addition can overflow here, so we need to keep that in mind. r[i] = r_carry; return false; } } // r = a ^ b with 2s complement semantics. // r may alias. // a and b must not be -0. // Returns `true` when the result is positive. // If the sign of a and b is equal, then r requires at least `@max(a.len, b.len)` limbs are required. // Otherwise, r requires at least `@max(a.len, b.len) + 1` limbs. fn llsignedxor(r: []Limb, a: []const Limb, a_positive: bool, b: []const Limb, b_positive: bool) bool { @setRuntimeSafety(debug_safety); assert(a.len != 0 and b.len != 0); assert(r.len >= a.len); assert(a.len >= b.len); // If a and b are positive, the result is positive and r = a ^ b. // If a negative, b positive, result is negative and we have // r = --(--a ^ b) // = --(~(-a - 1) ^ b) // = -(~(~(-a - 1) ^ b) + 1) // = -(((-a - 1) ^ b) + 1) // Same if a is positive and b is negative, sides switched. // If both a and b are negative, the result is positive and we have // r = (--a) ^ (--b) // = ~(-a - 1) ^ ~(-b - 1) // = (-a - 1) ^ (-b - 1) // These operations can be made more generic as follows: // - If a is negative, subtract 1 from |a| before the xor. // - If b is negative, subtract 1 from |b| before the xor. // - if the result is supposed to be negative, add 1. var i: usize = 0; var a_borrow = @intFromBool(!a_positive); var b_borrow = @intFromBool(!b_positive); var r_carry = @intFromBool(a_positive != b_positive); while (i < b.len) : (i += 1) { const ov1 = @subWithOverflow(a[i], a_borrow); a_borrow = ov1[1]; const ov2 = @subWithOverflow(b[i], b_borrow); b_borrow = ov2[1]; const ov3 = @addWithOverflow(ov1[0] ^ ov2[0], r_carry); r[i] = ov3[0]; r_carry = ov3[1]; } while (i < a.len) : (i += 1) { const ov1 = @subWithOverflow(a[i], a_borrow); a_borrow = ov1[1]; const ov2 = @addWithOverflow(ov1[0], r_carry); r[i] = ov2[0]; r_carry = ov2[1]; } // If both inputs don't share the same sign, an extra limb is required. if (a_positive != b_positive) { r[i] = r_carry; } else { assert(r_carry == 0); } assert(a_borrow == 0); assert(b_borrow == 0); return a_positive == b_positive; } /// r MUST NOT alias x. fn llsquareBasecase(r: []Limb, x: []const Limb) void { @setRuntimeSafety(debug_safety); const x_norm = x; assert(r.len >= 2 * x_norm.len + 1); // Compute the square of a N-limb bigint with only (N^2 + N)/2 // multiplications by exploiting the symmetry of the coefficients around the // diagonal: // // a b c * // a b c = // ------------------- // ca cb cc + // ba bb bc + // aa ab ac // // Note that: // - Each mixed-product term appears twice for each column, // - Squares are always in the 2k (0 <= k < N) column for (x_norm, 0..) |v, i| { // Accumulate all the x[i]*x[j] (with x!=j) products const overflow = llmulLimb(.add, r[2 * i + 1 ..], x_norm[i + 1 ..], v); assert(!overflow); } // Each product appears twice, multiply by 2 llshl(r, r[0 .. 2 * x_norm.len], 1); for (x_norm, 0..) |v, i| { // Compute and add the squares const overflow = llmulLimb(.add, r[2 * i ..], x[i..][0..1], v); assert(!overflow); } } /// Knuth 4.6.3 fn llpow(r: []Limb, a: []const Limb, b: u32, tmp_limbs: []Limb) void { var tmp1: []Limb = undefined; var tmp2: []Limb = undefined; // Multiplication requires no aliasing between the operand and the result // variable, use the output limbs and another temporary set to overcome this // limitation. // The initial assignment makes the result end in `r` so an extra memory // copy is saved, each 1 flips the index twice so it's only the zeros that // matter. const b_leading_zeros = @clz(b); const exp_zeros = @popCount(~b) - b_leading_zeros; if (exp_zeros & 1 != 0) { tmp1 = tmp_limbs; tmp2 = r; } else { tmp1 = r; tmp2 = tmp_limbs; } @memcpy(tmp1[0..a.len], a); @memset(tmp1[a.len..], 0); // Scan the exponent as a binary number, from left to right, dropping the // most significant bit set. // Square the result if the current bit is zero, square and multiply by a if // it is one. const exp_bits = 32 - 1 - b_leading_zeros; var exp = b << @as(u5, @intCast(1 + b_leading_zeros)); var i: usize = 0; while (i < exp_bits) : (i += 1) { // Square @memset(tmp2, 0); llsquareBasecase(tmp2, tmp1[0..llnormalize(tmp1)]); mem.swap([]Limb, &tmp1, &tmp2); // Multiply by a const ov = @shlWithOverflow(exp, 1); exp = ov[0]; if (ov[1] != 0) { @memset(tmp2, 0); llmulacc(.add, null, tmp2, tmp1[0..llnormalize(tmp1)], a); mem.swap([]Limb, &tmp1, &tmp2); } } } // Storage must live for the lifetime of the returned value fn fixedIntFromSignedDoubleLimb(A: SignedDoubleLimb, storage: []Limb) Mutable { assert(storage.len >= 2); const A_is_positive = A >= 0; const Au = @as(DoubleLimb, @intCast(if (A < 0) -A else A)); storage[0] = @as(Limb, @truncate(Au)); storage[1] = @as(Limb, @truncate(Au >> limb_bits)); return .{ .limbs = storage[0..2], .positive = A_is_positive, .len = 2, }; } test { _ = @import("int_test.zig"); }