From 636308a17d8f8118ab34e9d4b217baa5878416c4 Mon Sep 17 00:00:00 2001 From: Frank Denis <124872+jedisct1@users.noreply.github.com> Date: Fri, 22 Nov 2024 10:00:49 +0100 Subject: [PATCH] std.crypto.aes: introduce AES block vectors (#22023) * std.crypto.aes: introduce AES block vectors Modern Intel CPUs with the VAES extension can handle more than a single AES block per instruction. So can some ARM and RISC-V CPUs. Software implementations with bitslicing can also greatly benefit from this. Implement low-level operations on AES block vectors, and the parallel AEGIS variants on top of them. AMD Zen4: aegis-128x4: 73225 MiB/s aegis-128x2: 51571 MiB/s aegis-128l: 25806 MiB/s aegis-256x4: 46742 MiB/s aegis-256x2: 30227 MiB/s aegis-256: 8436 MiB/s aes128-gcm: 5926 MiB/s aes256-gcm: 5085 MiB/s AES-GCM, and anything based on AES-CTR are also going to benefit from this later. * Make AEGIS-MAC twice a fast --- lib/std/crypto.zig | 41 +- lib/std/crypto/aegis.zig | 873 +++++++++++++++++++------------ lib/std/crypto/aes.zig | 1 + lib/std/crypto/aes/aesni.zig | 170 +++++- lib/std/crypto/aes/armcrypto.zig | 155 +++++- lib/std/crypto/aes/soft.zig | 151 +++++- lib/std/crypto/benchmark.zig | 8 + 7 files changed, 1012 insertions(+), 387 deletions(-) diff --git a/lib/std/crypto.zig b/lib/std/crypto.zig index aa524fa2c2..7b167a467a 100644 --- a/lib/std/crypto.zig +++ b/lib/std/crypto.zig @@ -7,10 +7,23 @@ pub const timing_safe = @import("crypto/timing_safe.zig"); /// Authenticated Encryption with Associated Data pub const aead = struct { pub const aegis = struct { - pub const Aegis128L = @import("crypto/aegis.zig").Aegis128L; - pub const Aegis128L_256 = @import("crypto/aegis.zig").Aegis128L_256; - pub const Aegis256 = @import("crypto/aegis.zig").Aegis256; - pub const Aegis256_256 = @import("crypto/aegis.zig").Aegis256_256; + const variants = @import("crypto/aegis.zig"); + + pub const Aegis128X4 = variants.Aegis128X4; + pub const Aegis128X2 = variants.Aegis128X2; + pub const Aegis128L = variants.Aegis128L; + + pub const Aegis256X4 = variants.Aegis256X4; + pub const Aegis256X2 = variants.Aegis256X2; + pub const Aegis256 = variants.Aegis256; + + pub const Aegis128X4_256 = variants.Aegis128X4_256; + pub const Aegis128X2_256 = variants.Aegis128X2_256; + pub const Aegis128L_256 = variants.Aegis128L_256; + + pub const Aegis256X4_256 = variants.Aegis256X4_256; + pub const Aegis256X2_256 = variants.Aegis256X2_256; + pub const Aegis256_256 = variants.Aegis256_256; }; pub const aes_gcm = struct { @@ -44,10 +57,22 @@ pub const auth = struct { pub const hmac = @import("crypto/hmac.zig"); pub const siphash = @import("crypto/siphash.zig"); pub const aegis = struct { - pub const Aegis128LMac = @import("crypto/aegis.zig").Aegis128LMac; - pub const Aegis128LMac_128 = @import("crypto/aegis.zig").Aegis128LMac_128; - pub const Aegis256Mac = @import("crypto/aegis.zig").Aegis256Mac; - pub const Aegis256Mac_128 = @import("crypto/aegis.zig").Aegis256Mac_128; + const variants = @import("crypto/aegis.zig"); + pub const Aegis128X4Mac = variants.Aegis128X4Mac; + pub const Aegis128X2Mac = variants.Aegis128X2Mac; + pub const Aegis128LMac = variants.Aegis128LMac; + + pub const Aegis256X4Mac = variants.Aegis256X4Mac; + pub const Aegis256X2Mac = variants.Aegis256X2Mac; + pub const Aegis256Mac = variants.Aegis256Mac; + + pub const Aegis128X4Mac_128 = variants.Aegis128X4Mac_128; + pub const Aegis128X2Mac_128 = variants.Aegis128X2Mac_128; + pub const Aegis128LMac_128 = variants.Aegis128LMac_128; + + pub const Aegis256X4Mac_128 = variants.Aegis256X4Mac_128; + pub const Aegis256X2Mac_128 = variants.Aegis256X2Mac_128; + pub const Aegis256Mac_128 = variants.Aegis256Mac_128; }; pub const cmac = @import("crypto/cmac.zig"); }; diff --git a/lib/std/crypto/aegis.zig b/lib/std/crypto/aegis.zig index 67cc13c8c0..be6a655850 100644 --- a/lib/std/crypto/aegis.zig +++ b/lib/std/crypto/aegis.zig @@ -1,16 +1,21 @@ //! AEGIS is a very fast authenticated encryption system built on top of the core AES function. //! -//! The AEGIS-128L variant has a 128 bit key, a 128 bit nonce, and processes 256 bit message blocks. -//! The AEGIS-256 variant has a 256 bit key, a 256 bit nonce, and processes 128 bit message blocks. +//! The AEGIS-128* variants have a 128 bit key and a 128 bit nonce. +//! The AEGIS-256* variants have a 256 bit key and a 256 bit nonce. +//! All of them can compute 128 and 256 bit authentication tags. //! //! The AEGIS cipher family offers performance that significantly exceeds that of AES-GCM with //! hardware support for parallelizable AES block encryption. //! -//! Unlike with AES-GCM, nonces can be safely chosen at random with no practical limit when using AEGIS-256. -//! AEGIS-128L also allows for more messages to be safely encrypted when using random nonces. +//! On high-end Intel CPUs with AVX-512 support, AEGIS-128X4 and AEGIS-256X4 are the fastest options. +//! On other modern server, desktop and mobile CPUs, AEGIS-128X2 and AEGIS-256X2 are usually the fastest options. +//! AEGIS-128L and AEGIS-256 perform well on a broad range of platforms, including WebAssembly. //! -//! AEGIS is believed to be key-committing, making it a safer choice than most other AEADs -//! when the key has low entropy, or can be controlled by an attacker. +//! Unlike with AES-GCM, nonces can be safely chosen at random with no practical limit when using AEGIS-256*. +//! AEGIS-128* also allows for more messages to be safely encrypted when using random nonces. +//! +//! Unless the associated data can be fully controled by an adversary, AEGIS is believed to be key-committing, +//! making it a safer choice than most other AEADs when the key has low entropy, or can be controlled by an attacker. //! //! Finally, leaking the state does not leak the key. //! @@ -20,305 +25,202 @@ const std = @import("std"); const crypto = std.crypto; const mem = std.mem; const assert = std.debug.assert; -const AesBlock = crypto.core.aes.Block; const AuthenticationError = crypto.errors.AuthenticationError; -/// AEGIS-128L with a 128-bit authentication tag. -pub const Aegis128L = Aegis128LGeneric(128); +/// AEGIS-128X4 with a 128 bit tag +pub const Aegis128X4 = Aegis128XGeneric(4, 128); +/// AEGIS-128X2 with a 128 bit tag +pub const Aegis128X2 = Aegis128XGeneric(2, 128); +/// AEGIS-128L with a 128 bit tag +pub const Aegis128L = Aegis128XGeneric(1, 128); -/// AEGIS-128L with a 256-bit authentication tag. -pub const Aegis128L_256 = Aegis128LGeneric(256); +/// AEGIS-256X4 with a 128 bit tag +pub const Aegis256X4 = Aegis256XGeneric(4, 128); +/// AEGIS-256X2 with a 128 bit tag +pub const Aegis256X2 = Aegis256XGeneric(2, 128); +/// AEGIS-256 with a 128 bit tag +pub const Aegis256 = Aegis256XGeneric(1, 128); -/// AEGIS-256 with a 128-bit authentication tag. -pub const Aegis256 = Aegis256Generic(128); +/// AEGIS-128X4 with a 256 bit tag +pub const Aegis128X4_256 = Aegis128XGeneric(4, 256); +/// AEGIS-128X2 with a 256 bit tag +pub const Aegis128X2_256 = Aegis128XGeneric(2, 256); +/// AEGIS-128L with a 256 bit tag +pub const Aegis128L_256 = Aegis128XGeneric(1, 256); -/// AEGIS-256 with a 256-bit authentication tag. -pub const Aegis256_256 = Aegis256Generic(256); - -const State128L = struct { - blocks: [8]AesBlock, - - fn init(key: [16]u8, nonce: [16]u8) State128L { - const c1 = AesBlock.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd }); - const c2 = AesBlock.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 }); - const key_block = AesBlock.fromBytes(&key); - const nonce_block = AesBlock.fromBytes(&nonce); - const blocks = [8]AesBlock{ - key_block.xorBlocks(nonce_block), - c1, - c2, - c1, - key_block.xorBlocks(nonce_block), - key_block.xorBlocks(c2), - key_block.xorBlocks(c1), - key_block.xorBlocks(c2), - }; - var state = State128L{ .blocks = blocks }; - var i: usize = 0; - while (i < 10) : (i += 1) { - state.update(nonce_block, key_block); - } - return state; - } - - inline fn update(state: *State128L, d1: AesBlock, d2: AesBlock) void { - const blocks = &state.blocks; - const tmp = blocks[7]; - comptime var i: usize = 7; - inline while (i > 0) : (i -= 1) { - blocks[i] = blocks[i - 1].encrypt(blocks[i]); - } - blocks[0] = tmp.encrypt(blocks[0]); - blocks[0] = blocks[0].xorBlocks(d1); - blocks[4] = blocks[4].xorBlocks(d2); - } - - fn absorb(state: *State128L, src: *const [32]u8) void { - const msg0 = AesBlock.fromBytes(src[0..16]); - const msg1 = AesBlock.fromBytes(src[16..32]); - state.update(msg0, msg1); - } - - fn enc(state: *State128L, dst: *[32]u8, src: *const [32]u8) void { - const blocks = &state.blocks; - const msg0 = AesBlock.fromBytes(src[0..16]); - const msg1 = AesBlock.fromBytes(src[16..32]); - var tmp0 = msg0.xorBlocks(blocks[6]).xorBlocks(blocks[1]); - var tmp1 = msg1.xorBlocks(blocks[2]).xorBlocks(blocks[5]); - tmp0 = tmp0.xorBlocks(blocks[2].andBlocks(blocks[3])); - tmp1 = tmp1.xorBlocks(blocks[6].andBlocks(blocks[7])); - dst[0..16].* = tmp0.toBytes(); - dst[16..32].* = tmp1.toBytes(); - state.update(msg0, msg1); - } - - fn dec(state: *State128L, dst: *[32]u8, src: *const [32]u8) void { - const blocks = &state.blocks; - var msg0 = AesBlock.fromBytes(src[0..16]).xorBlocks(blocks[6]).xorBlocks(blocks[1]); - var msg1 = AesBlock.fromBytes(src[16..32]).xorBlocks(blocks[2]).xorBlocks(blocks[5]); - msg0 = msg0.xorBlocks(blocks[2].andBlocks(blocks[3])); - msg1 = msg1.xorBlocks(blocks[6].andBlocks(blocks[7])); - dst[0..16].* = msg0.toBytes(); - dst[16..32].* = msg1.toBytes(); - state.update(msg0, msg1); - } - - fn mac(state: *State128L, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 { - const blocks = &state.blocks; - var sizes: [16]u8 = undefined; - mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little); - mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little); - const tmp = AesBlock.fromBytes(&sizes).xorBlocks(blocks[2]); - var i: usize = 0; - while (i < 7) : (i += 1) { - state.update(tmp, tmp); - } - return switch (tag_bits) { - 128 => blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]) - .xorBlocks(blocks[4]).xorBlocks(blocks[5]).xorBlocks(blocks[6]).toBytes(), - 256 => tag: { - const t1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]); - const t2 = blocks[4].xorBlocks(blocks[5]).xorBlocks(blocks[6]).xorBlocks(blocks[7]); - break :tag t1.toBytes() ++ t2.toBytes(); - }, - else => unreachable, - }; - } -}; - -fn Aegis128LGeneric(comptime tag_bits: u9) type { - comptime assert(tag_bits == 128 or tag_bits == 256); // tag must be 128 or 256 bits +/// AEGIS-256X4 with a 256 bit tag +pub const Aegis256X4_256 = Aegis256XGeneric(4, 256); +/// AEGIS-256X2 with a 256 bit tag +pub const Aegis256X2_256 = Aegis256XGeneric(2, 256); +/// AEGIS-256 with a 256 bit tag +pub const Aegis256_256 = Aegis256XGeneric(1, 256); +fn State128X(comptime degree: u7) type { return struct { - pub const tag_length = tag_bits / 8; - pub const nonce_length = 16; - pub const key_length = 16; - pub const block_length = 32; + const AesBlockVec = crypto.core.aes.BlockVec(degree); + const State = @This(); - const State = State128L; + blocks: [8]AesBlockVec, - /// c: ciphertext: output buffer should be of size m.len - /// tag: authentication tag: output MAC - /// m: message - /// ad: Associated Data - /// npub: public nonce - /// k: private key - pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) void { - assert(c.len == m.len); - var state = State128L.init(key, npub); - var src: [32]u8 align(16) = undefined; - var dst: [32]u8 align(16) = undefined; - var i: usize = 0; - while (i + 32 <= ad.len) : (i += 32) { - state.absorb(ad[i..][0..32]); + const aes_block_length = AesBlockVec.block_length; + const rate = aes_block_length * 2; + const alignment = AesBlockVec.native_word_size; + + fn init(key: [16]u8, nonce: [16]u8) State { + const c1 = AesBlockVec.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd } ** degree); + const c2 = AesBlockVec.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 } ** degree); + const key_block = AesBlockVec.fromBytes(&(key ** degree)); + const nonce_block = AesBlockVec.fromBytes(&(nonce ** degree)); + const blocks = [8]AesBlockVec{ + key_block.xorBlocks(nonce_block), + c1, + c2, + c1, + key_block.xorBlocks(nonce_block), + key_block.xorBlocks(c2), + key_block.xorBlocks(c1), + key_block.xorBlocks(c2), + }; + var state = State{ .blocks = blocks }; + if (degree > 1) { + const context_block = ctx: { + var contexts_bytes = [_]u8{0} ** aes_block_length; + for (0..degree) |i| { + contexts_bytes[i * 16] = @intCast(i); + contexts_bytes[i * 16 + 1] = @intCast(degree - 1); + } + break :ctx AesBlockVec.fromBytes(&contexts_bytes); + }; + for (0..10) |_| { + state.blocks[3] = state.blocks[3].xorBlocks(context_block); + state.blocks[7] = state.blocks[7].xorBlocks(context_block); + state.update(nonce_block, key_block); + } + } else { + for (0..10) |_| { + state.update(nonce_block, key_block); + } } - if (ad.len % 32 != 0) { - @memset(src[0..], 0); - @memcpy(src[0 .. ad.len % 32], ad[i..][0 .. ad.len % 32]); - state.absorb(&src); - } - i = 0; - while (i + 32 <= m.len) : (i += 32) { - state.enc(c[i..][0..32], m[i..][0..32]); - } - if (m.len % 32 != 0) { - @memset(src[0..], 0); - @memcpy(src[0 .. m.len % 32], m[i..][0 .. m.len % 32]); - state.enc(&dst, &src); - @memcpy(c[i..][0 .. m.len % 32], dst[0 .. m.len % 32]); - } - tag.* = state.mac(tag_bits, ad.len, m.len); + return state; } - /// `m`: Message - /// `c`: Ciphertext - /// `tag`: Authentication tag - /// `ad`: Associated data - /// `npub`: Public nonce - /// `k`: Private key - /// Asserts `c.len == m.len`. - /// - /// Contents of `m` are undefined if an error is returned. - pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) AuthenticationError!void { - assert(c.len == m.len); - var state = State128L.init(key, npub); - var src: [32]u8 align(16) = undefined; - var dst: [32]u8 align(16) = undefined; - var i: usize = 0; - while (i + 32 <= ad.len) : (i += 32) { - state.absorb(ad[i..][0..32]); + inline fn update(state: *State, d1: AesBlockVec, d2: AesBlockVec) void { + const blocks = &state.blocks; + const tmp = blocks[7]; + comptime var i: usize = 7; + inline while (i > 0) : (i -= 1) { + blocks[i] = blocks[i - 1].encrypt(blocks[i]); } - if (ad.len % 32 != 0) { - @memset(src[0..], 0); - @memcpy(src[0 .. ad.len % 32], ad[i..][0 .. ad.len % 32]); - state.absorb(&src); + blocks[0] = tmp.encrypt(blocks[0]); + blocks[0] = blocks[0].xorBlocks(d1); + blocks[4] = blocks[4].xorBlocks(d2); + } + + fn absorb(state: *State, src: *const [rate]u8) void { + const msg0 = AesBlockVec.fromBytes(src[0..aes_block_length]); + const msg1 = AesBlockVec.fromBytes(src[aes_block_length..rate]); + state.update(msg0, msg1); + } + + fn enc(state: *State, dst: *[rate]u8, src: *const [rate]u8) void { + const blocks = &state.blocks; + const msg0 = AesBlockVec.fromBytes(src[0..aes_block_length]); + const msg1 = AesBlockVec.fromBytes(src[aes_block_length..rate]); + var tmp0 = msg0.xorBlocks(blocks[6]).xorBlocks(blocks[1]); + var tmp1 = msg1.xorBlocks(blocks[2]).xorBlocks(blocks[5]); + tmp0 = tmp0.xorBlocks(blocks[2].andBlocks(blocks[3])); + tmp1 = tmp1.xorBlocks(blocks[6].andBlocks(blocks[7])); + dst[0..aes_block_length].* = tmp0.toBytes(); + dst[aes_block_length..rate].* = tmp1.toBytes(); + state.update(msg0, msg1); + } + + fn dec(state: *State, dst: *[rate]u8, src: *const [rate]u8) void { + const blocks = &state.blocks; + var msg0 = AesBlockVec.fromBytes(src[0..aes_block_length]).xorBlocks(blocks[6]).xorBlocks(blocks[1]); + var msg1 = AesBlockVec.fromBytes(src[aes_block_length..rate]).xorBlocks(blocks[2]).xorBlocks(blocks[5]); + msg0 = msg0.xorBlocks(blocks[2].andBlocks(blocks[3])); + msg1 = msg1.xorBlocks(blocks[6].andBlocks(blocks[7])); + dst[0..aes_block_length].* = msg0.toBytes(); + dst[aes_block_length..rate].* = msg1.toBytes(); + state.update(msg0, msg1); + } + + fn decLast(state: *State, dst: []u8, src: []const u8) void { + const blocks = &state.blocks; + const z0 = blocks[6].xorBlocks(blocks[1]).xorBlocks(blocks[2].andBlocks(blocks[3])); + const z1 = blocks[2].xorBlocks(blocks[5]).xorBlocks(blocks[6].andBlocks(blocks[7])); + var pad = [_]u8{0} ** rate; + pad[0..aes_block_length].* = z0.toBytes(); + pad[aes_block_length..].* = z1.toBytes(); + for (pad[0..src.len], src) |*p, x| p.* ^= x; + @memcpy(dst, pad[0..src.len]); + @memset(pad[src.len..], 0); + const msg0 = AesBlockVec.fromBytes(pad[0..aes_block_length]); + const msg1 = AesBlockVec.fromBytes(pad[aes_block_length..rate]); + state.update(msg0, msg1); + } + + fn mac(state: *State, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 { + const blocks = &state.blocks; + var sizes: [aes_block_length]u8 = undefined; + mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little); + mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little); + for (1..degree) |i| { + @memcpy(sizes[i * 16 ..][0..16], sizes[0..16]); } - i = 0; - while (i + 32 <= m.len) : (i += 32) { - state.dec(m[i..][0..32], c[i..][0..32]); + const tmp = AesBlockVec.fromBytes(&sizes).xorBlocks(blocks[2]); + for (0..7) |_| { + state.update(tmp, tmp); } - if (m.len % 32 != 0) { - @memset(src[0..], 0); - @memcpy(src[0 .. m.len % 32], c[i..][0 .. m.len % 32]); - state.dec(&dst, &src); - @memcpy(m[i..][0 .. m.len % 32], dst[0 .. m.len % 32]); - @memset(dst[0 .. m.len % 32], 0); - const blocks = &state.blocks; - blocks[0] = blocks[0].xorBlocks(AesBlock.fromBytes(dst[0..16])); - blocks[4] = blocks[4].xorBlocks(AesBlock.fromBytes(dst[16..32])); - } - var computed_tag = state.mac(tag_bits, ad.len, m.len); - const verify = crypto.timing_safe.eql([tag_length]u8, computed_tag, tag); - if (!verify) { - crypto.secureZero(u8, &computed_tag); - @memset(m, undefined); - return error.AuthenticationFailed; + switch (tag_bits) { + 128 => { + var tag_multi = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]).xorBlocks(blocks[4]).xorBlocks(blocks[5]).xorBlocks(blocks[6]).toBytes(); + var tag = tag_multi[0..16].*; + @memcpy(tag[0..], tag_multi[0..16]); + for (1..degree) |d| { + for (0..16) |i| { + tag[i] ^= tag_multi[d * 16 + i]; + } + } + return tag; + }, + 256 => { + const tag_multi_1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]).toBytes(); + const tag_multi_2 = blocks[4].xorBlocks(blocks[5]).xorBlocks(blocks[6]).xorBlocks(blocks[7]).toBytes(); + var tag = tag_multi_1[0..16].* ++ tag_multi_2[0..16].*; + for (1..degree) |d| { + for (0..16) |i| { + tag[i] ^= tag_multi_1[d * 16 + i]; + tag[i + 16] ^= tag_multi_2[d * 16 + i]; + } + } + return tag; + }, + else => unreachable, } } }; } -const State256 = struct { - blocks: [6]AesBlock, - - fn init(key: [32]u8, nonce: [32]u8) State256 { - const c1 = AesBlock.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd }); - const c2 = AesBlock.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 }); - const key_block1 = AesBlock.fromBytes(key[0..16]); - const key_block2 = AesBlock.fromBytes(key[16..32]); - const nonce_block1 = AesBlock.fromBytes(nonce[0..16]); - const nonce_block2 = AesBlock.fromBytes(nonce[16..32]); - const kxn1 = key_block1.xorBlocks(nonce_block1); - const kxn2 = key_block2.xorBlocks(nonce_block2); - const blocks = [6]AesBlock{ - kxn1, - kxn2, - c1, - c2, - key_block1.xorBlocks(c2), - key_block2.xorBlocks(c1), - }; - var state = State256{ .blocks = blocks }; - var i: usize = 0; - while (i < 4) : (i += 1) { - state.update(key_block1); - state.update(key_block2); - state.update(kxn1); - state.update(kxn2); - } - return state; - } - - inline fn update(state: *State256, d: AesBlock) void { - const blocks = &state.blocks; - const tmp = blocks[5].encrypt(blocks[0]); - comptime var i: usize = 5; - inline while (i > 0) : (i -= 1) { - blocks[i] = blocks[i - 1].encrypt(blocks[i]); - } - blocks[0] = tmp.xorBlocks(d); - } - - fn absorb(state: *State256, src: *const [16]u8) void { - const msg = AesBlock.fromBytes(src); - state.update(msg); - } - - fn enc(state: *State256, dst: *[16]u8, src: *const [16]u8) void { - const blocks = &state.blocks; - const msg = AesBlock.fromBytes(src); - var tmp = msg.xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]); - tmp = tmp.xorBlocks(blocks[2].andBlocks(blocks[3])); - dst.* = tmp.toBytes(); - state.update(msg); - } - - fn dec(state: *State256, dst: *[16]u8, src: *const [16]u8) void { - const blocks = &state.blocks; - var msg = AesBlock.fromBytes(src).xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]); - msg = msg.xorBlocks(blocks[2].andBlocks(blocks[3])); - dst.* = msg.toBytes(); - state.update(msg); - } - - fn mac(state: *State256, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 { - const blocks = &state.blocks; - var sizes: [16]u8 = undefined; - mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little); - mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little); - const tmp = AesBlock.fromBytes(&sizes).xorBlocks(blocks[3]); - var i: usize = 0; - while (i < 7) : (i += 1) { - state.update(tmp); - } - return switch (tag_bits) { - 128 => blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]) - .xorBlocks(blocks[4]).xorBlocks(blocks[5]).toBytes(), - 256 => tag: { - const t1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]); - const t2 = blocks[3].xorBlocks(blocks[4]).xorBlocks(blocks[5]); - break :tag t1.toBytes() ++ t2.toBytes(); - }, - else => unreachable, - }; - } -}; - /// AEGIS is a very fast authenticated encryption system built on top of the core AES function. /// -/// The 256 bit variant of AEGIS has a 256 bit key, a 256 bit nonce, and processes 128 bit message blocks. +/// The 128 bits variants of AEGIS have a 128 bit key and a 128 bit nonce. /// /// https://datatracker.ietf.org/doc/draft-irtf-cfrg-aegis-aead/ -fn Aegis256Generic(comptime tag_bits: u9) type { +fn Aegis128XGeneric(comptime degree: u7, comptime tag_bits: u9) type { + comptime assert(degree > 0); // degree must be greater than 0 comptime assert(tag_bits == 128 or tag_bits == 256); // tag must be 128 or 256 bits return struct { - pub const tag_length = tag_bits / 8; - pub const nonce_length = 32; - pub const key_length = 32; - pub const block_length = 16; + const State = State128X(degree); - const State = State256; + pub const tag_length = tag_bits / 8; + pub const nonce_length = 16; + pub const key_length = 16; + pub const block_length = State.rate; + + const alignment = State.alignment; /// c: ciphertext: output buffer should be of size m.len /// tag: authentication tag: output MAC @@ -328,27 +230,27 @@ fn Aegis256Generic(comptime tag_bits: u9) type { /// k: private key pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) void { assert(c.len == m.len); - var state = State256.init(key, npub); - var src: [16]u8 align(16) = undefined; - var dst: [16]u8 align(16) = undefined; + var state = State.init(key, npub); + var src: [block_length]u8 align(alignment) = undefined; + var dst: [block_length]u8 align(alignment) = undefined; var i: usize = 0; - while (i + 16 <= ad.len) : (i += 16) { - state.enc(&dst, ad[i..][0..16]); + while (i + block_length <= ad.len) : (i += block_length) { + state.absorb(ad[i..][0..block_length]); } - if (ad.len % 16 != 0) { + if (ad.len % block_length != 0) { @memset(src[0..], 0); - @memcpy(src[0 .. ad.len % 16], ad[i..][0 .. ad.len % 16]); - state.enc(&dst, &src); + @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]); + state.absorb(&src); } i = 0; - while (i + 16 <= m.len) : (i += 16) { - state.enc(c[i..][0..16], m[i..][0..16]); + while (i + block_length <= m.len) : (i += block_length) { + state.enc(c[i..][0..block_length], m[i..][0..block_length]); } - if (m.len % 16 != 0) { + if (m.len % block_length != 0) { @memset(src[0..], 0); - @memcpy(src[0 .. m.len % 16], m[i..][0 .. m.len % 16]); + @memcpy(src[0 .. m.len % block_length], m[i..][0 .. m.len % block_length]); state.enc(&dst, &src); - @memcpy(c[i..][0 .. m.len % 16], dst[0 .. m.len % 16]); + @memcpy(c[i..][0 .. m.len % block_length], dst[0 .. m.len % block_length]); } tag.* = state.mac(tag_bits, ad.len, m.len); } @@ -364,30 +266,23 @@ fn Aegis256Generic(comptime tag_bits: u9) type { /// Contents of `m` are undefined if an error is returned. pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) AuthenticationError!void { assert(c.len == m.len); - var state = State256.init(key, npub); - var src: [16]u8 align(16) = undefined; - var dst: [16]u8 align(16) = undefined; + var state = State.init(key, npub); + var src: [block_length]u8 align(alignment) = undefined; var i: usize = 0; - while (i + 16 <= ad.len) : (i += 16) { - state.enc(&dst, ad[i..][0..16]); + while (i + block_length <= ad.len) : (i += block_length) { + state.absorb(ad[i..][0..block_length]); } - if (ad.len % 16 != 0) { + if (ad.len % block_length != 0) { @memset(src[0..], 0); - @memcpy(src[0 .. ad.len % 16], ad[i..][0 .. ad.len % 16]); - state.enc(&dst, &src); + @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]); + state.absorb(&src); } i = 0; - while (i + 16 <= m.len) : (i += 16) { - state.dec(m[i..][0..16], c[i..][0..16]); + while (i + block_length <= m.len) : (i += block_length) { + state.dec(m[i..][0..block_length], c[i..][0..block_length]); } - if (m.len % 16 != 0) { - @memset(src[0..], 0); - @memcpy(src[0 .. m.len % 16], c[i..][0 .. m.len % 16]); - state.dec(&dst, &src); - @memcpy(m[i..][0 .. m.len % 16], dst[0 .. m.len % 16]); - @memset(dst[0 .. m.len % 16], 0); - const blocks = &state.blocks; - blocks[0] = blocks[0].xorBlocks(AesBlock.fromBytes(&dst)); + if (m.len % block_length != 0) { + state.decLast(m[i..], c[i..]); } var computed_tag = state.mac(tag_bits, ad.len, m.len); const verify = crypto.timing_safe.eql([tag_length]u8, computed_tag, tag); @@ -400,6 +295,264 @@ fn Aegis256Generic(comptime tag_bits: u9) type { }; } +fn State256X(comptime degree: u7) type { + return struct { + const AesBlockVec = crypto.core.aes.BlockVec(degree); + const State = @This(); + + blocks: [6]AesBlockVec, + + const aes_block_length = AesBlockVec.block_length; + const rate = aes_block_length; + const alignment = AesBlockVec.native_word_size; + + fn init(key: [32]u8, nonce: [32]u8) State { + const c1 = AesBlockVec.fromBytes(&[16]u8{ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd } ** degree); + const c2 = AesBlockVec.fromBytes(&[16]u8{ 0x0, 0x1, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 } ** degree); + const key_block1 = AesBlockVec.fromBytes(key[0..16] ** degree); + const key_block2 = AesBlockVec.fromBytes(key[16..32] ** degree); + const nonce_block1 = AesBlockVec.fromBytes(nonce[0..16] ** degree); + const nonce_block2 = AesBlockVec.fromBytes(nonce[16..32] ** degree); + const kxn1 = key_block1.xorBlocks(nonce_block1); + const kxn2 = key_block2.xorBlocks(nonce_block2); + const blocks = [6]AesBlockVec{ + kxn1, + kxn2, + c1, + c2, + key_block1.xorBlocks(c2), + key_block2.xorBlocks(c1), + }; + var state = State{ .blocks = blocks }; + if (degree > 1) { + const context_block = ctx: { + var contexts_bytes = [_]u8{0} ** aes_block_length; + for (0..degree) |i| { + contexts_bytes[i * 16] = @intCast(i); + contexts_bytes[i * 16 + 1] = @intCast(degree - 1); + } + break :ctx AesBlockVec.fromBytes(&contexts_bytes); + }; + for (0..4) |_| { + state.blocks[3] = state.blocks[3].xorBlocks(context_block); + state.blocks[5] = state.blocks[5].xorBlocks(context_block); + state.update(key_block1); + state.blocks[3] = state.blocks[3].xorBlocks(context_block); + state.blocks[5] = state.blocks[5].xorBlocks(context_block); + state.update(key_block2); + state.blocks[3] = state.blocks[3].xorBlocks(context_block); + state.blocks[5] = state.blocks[5].xorBlocks(context_block); + state.update(kxn1); + state.blocks[3] = state.blocks[3].xorBlocks(context_block); + state.blocks[5] = state.blocks[5].xorBlocks(context_block); + state.update(kxn2); + } + } else { + for (0..4) |_| { + state.update(key_block1); + state.update(key_block2); + state.update(kxn1); + state.update(kxn2); + } + } + return state; + } + + inline fn update(state: *State, d: AesBlockVec) void { + const blocks = &state.blocks; + const tmp = blocks[5].encrypt(blocks[0]); + comptime var i: usize = 5; + inline while (i > 0) : (i -= 1) { + blocks[i] = blocks[i - 1].encrypt(blocks[i]); + } + blocks[0] = tmp.xorBlocks(d); + } + + fn absorb(state: *State, src: *const [rate]u8) void { + const msg = AesBlockVec.fromBytes(src); + state.update(msg); + } + + fn enc(state: *State, dst: *[rate]u8, src: *const [rate]u8) void { + const blocks = &state.blocks; + const msg = AesBlockVec.fromBytes(src); + var tmp = msg.xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]); + tmp = tmp.xorBlocks(blocks[2].andBlocks(blocks[3])); + dst.* = tmp.toBytes(); + state.update(msg); + } + + fn dec(state: *State, dst: *[rate]u8, src: *const [rate]u8) void { + const blocks = &state.blocks; + var msg = AesBlockVec.fromBytes(src).xorBlocks(blocks[5]).xorBlocks(blocks[4]).xorBlocks(blocks[1]); + msg = msg.xorBlocks(blocks[2].andBlocks(blocks[3])); + dst.* = msg.toBytes(); + state.update(msg); + } + + fn decLast(state: *State, dst: []u8, src: []const u8) void { + const blocks = &state.blocks; + const z = blocks[5].xorBlocks(blocks[4]).xorBlocks(blocks[1]).xorBlocks(blocks[2].andBlocks(blocks[3])); + var pad = z.toBytes(); + for (pad[0..src.len], src) |*p, x| p.* ^= x; + @memcpy(dst, pad[0..src.len]); + @memset(pad[src.len..], 0); + const msg = AesBlockVec.fromBytes(pad[0..]); + state.update(msg); + } + + fn mac(state: *State, comptime tag_bits: u9, adlen: usize, mlen: usize) [tag_bits / 8]u8 { + const blocks = &state.blocks; + var sizes: [aes_block_length]u8 = undefined; + mem.writeInt(u64, sizes[0..8], @as(u64, adlen) * 8, .little); + mem.writeInt(u64, sizes[8..16], @as(u64, mlen) * 8, .little); + for (1..degree) |i| { + @memcpy(sizes[i * 16 ..][0..16], sizes[0..16]); + } + const tmp = AesBlockVec.fromBytes(&sizes).xorBlocks(blocks[3]); + for (0..7) |_| { + state.update(tmp); + } + switch (tag_bits) { + 128 => { + var tag_multi = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).xorBlocks(blocks[3]).xorBlocks(blocks[4]).xorBlocks(blocks[5]).toBytes(); + var tag = tag_multi[0..16].*; + @memcpy(tag[0..], tag_multi[0..16]); + for (1..degree) |d| { + for (0..16) |i| { + tag[i] ^= tag_multi[d * 16 + i]; + } + } + return tag; + }, + 256 => { + const tag_multi_1 = blocks[0].xorBlocks(blocks[1]).xorBlocks(blocks[2]).toBytes(); + const tag_multi_2 = blocks[3].xorBlocks(blocks[4]).xorBlocks(blocks[5]).toBytes(); + var tag = tag_multi_1[0..16].* ++ tag_multi_2[0..16].*; + for (1..degree) |d| { + for (0..16) |i| { + tag[i] ^= tag_multi_1[d * 16 + i]; + tag[i + 16] ^= tag_multi_2[d * 16 + i]; + } + } + return tag; + }, + else => unreachable, + } + } + }; +} + +/// AEGIS is a very fast authenticated encryption system built on top of the core AES function. +/// +/// The 256 bits variants of AEGIS have a 256 bit key and a 256 bit nonce. +/// +/// https://datatracker.ietf.org/doc/draft-irtf-cfrg-aegis-aead/ +fn Aegis256XGeneric(comptime degree: u7, comptime tag_bits: u9) type { + comptime assert(degree > 0); // degree must be greater than 0 + comptime assert(tag_bits == 128 or tag_bits == 256); // tag must be 128 or 256 bits + + return struct { + const State = State256X(degree); + + pub const tag_length = tag_bits / 8; + pub const nonce_length = 32; + pub const key_length = 32; + pub const block_length = State.rate; + + const alignment = State.alignment; + + /// c: ciphertext: output buffer should be of size m.len + /// tag: authentication tag: output MAC + /// m: message + /// ad: Associated Data + /// npub: public nonce + /// k: private key + pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) void { + assert(c.len == m.len); + var state = State.init(key, npub); + var src: [block_length]u8 align(alignment) = undefined; + var dst: [block_length]u8 align(alignment) = undefined; + var i: usize = 0; + while (i + block_length <= ad.len) : (i += block_length) { + state.enc(&dst, ad[i..][0..block_length]); + } + if (ad.len % block_length != 0) { + @memset(src[0..], 0); + @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]); + state.enc(&dst, &src); + } + i = 0; + while (i + block_length <= m.len) : (i += block_length) { + state.enc(c[i..][0..block_length], m[i..][0..block_length]); + } + if (m.len % block_length != 0) { + @memset(src[0..], 0); + @memcpy(src[0 .. m.len % block_length], m[i..][0 .. m.len % block_length]); + state.enc(&dst, &src); + @memcpy(c[i..][0 .. m.len % block_length], dst[0 .. m.len % block_length]); + } + tag.* = state.mac(tag_bits, ad.len, m.len); + } + + /// `m`: Message + /// `c`: Ciphertext + /// `tag`: Authentication tag + /// `ad`: Associated data + /// `npub`: Public nonce + /// `k`: Private key + /// Asserts `c.len == m.len`. + /// + /// Contents of `m` are undefined if an error is returned. + pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) AuthenticationError!void { + assert(c.len == m.len); + var state = State.init(key, npub); + var src: [block_length]u8 align(alignment) = undefined; + var i: usize = 0; + while (i + block_length <= ad.len) : (i += block_length) { + state.absorb(ad[i..][0..block_length]); + } + if (ad.len % block_length != 0) { + @memset(src[0..], 0); + @memcpy(src[0 .. ad.len % block_length], ad[i..][0 .. ad.len % block_length]); + state.absorb(&src); + } + i = 0; + while (i + block_length <= m.len) : (i += block_length) { + state.dec(m[i..][0..block_length], c[i..][0..block_length]); + } + if (m.len % block_length != 0) { + state.decLast(m[i..], c[i..]); + } + var computed_tag = state.mac(tag_bits, ad.len, m.len); + const verify = crypto.timing_safe.eql([tag_length]u8, computed_tag, tag); + if (!verify) { + crypto.secureZero(u8, &computed_tag); + @memset(m, undefined); + return error.AuthenticationFailed; + } + } + }; +} + +/// The `Aegis128X4Mac` message authentication function outputs 256 bit tags. +/// In addition to being extremely fast, its large state, non-linearity +/// and non-invertibility provides the following properties: +/// - 128 bit security, stronger than GHash/Polyval/Poly1305. +/// - Recovering the secret key from the state would require ~2^128 attempts, +/// which is infeasible for any practical adversary. +/// - It has a large security margin against internal collisions. +pub const Aegis128X4Mac = AegisMac(Aegis128X4_256); + +/// The `Aegis128X2Mac` message authentication function outputs 256 bit tags. +/// In addition to being extremely fast, its large state, non-linearity +/// and non-invertibility provides the following properties: +/// - 128 bit security, stronger than GHash/Polyval/Poly1305. +/// - Recovering the secret key from the state would require ~2^128 attempts, +/// which is infeasible for any practical adversary. +/// - It has a large security margin against internal collisions. +pub const Aegis128X2Mac = AegisMac(Aegis128X2_256); + /// The `Aegis128LMac` message authentication function outputs 256 bit tags. /// In addition to being extremely fast, its large state, non-linearity /// and non-invertibility provides the following properties: @@ -409,34 +562,60 @@ fn Aegis256Generic(comptime tag_bits: u9) type { /// - It has a large security margin against internal collisions. pub const Aegis128LMac = AegisMac(Aegis128L_256); +/// The `Aegis256X4Mac` message authentication function has a 256-bit key size, +/// and outputs 256 bit tags. Unless theoretical multi-target attacks are a +/// concern, the AEGIS-128L variant should be preferred. +/// AEGIS' large state, non-linearity and non-invertibility provides the +/// following properties: +/// - 256 bit security against forgery. +/// - Recovering the secret key from the state would require ~2^256 attempts, +/// which is infeasible for any practical adversary. +/// - It has a large security margin against internal collisions. +pub const Aegis256X4Mac = AegisMac(Aegis256X4_256); + +/// The `Aegis256X2Mac` message authentication function has a 256-bit key size, +/// and outputs 256 bit tags. Unless theoretical multi-target attacks are a +/// concern, the AEGIS-128L variant should be preferred. +/// AEGIS' large state, non-linearity and non-invertibility provides the +/// following properties: +/// - 256 bit security against forgery. +/// - Recovering the secret key from the state would require ~2^256 attempts, +/// which is infeasible for any practical adversary. +/// - It has a large security margin against internal collisions. +pub const Aegis256X2Mac = AegisMac(Aegis256X2_256); + /// The `Aegis256Mac` message authentication function has a 256-bit key size, /// and outputs 256 bit tags. Unless theoretical multi-target attacks are a /// concern, the AEGIS-128L variant should be preferred. /// AEGIS' large state, non-linearity and non-invertibility provides the /// following properties: -/// - More than 128 bit security against forgery. +/// - 256 bit security against forgery. /// - Recovering the secret key from the state would require ~2^256 attempts, /// which is infeasible for any practical adversary. /// - It has a large security margin against internal collisions. pub const Aegis256Mac = AegisMac(Aegis256_256); -/// Aegis128L MAC with a 128-bit output. -/// A MAC with a 128-bit output is not safe unless the number of messages -/// authenticated with the same key remains small. -/// After 2^48 messages, the probability of a collision is already ~ 2^-33. -/// If unsure, use the Aegis128LMac type, that has a 256 bit output. +/// AEGIS-128X4 MAC with 128-bit tags +pub const Aegis128X4Mac_128 = AegisMac(Aegis128X4); + +/// AEGIS-128X2 MAC with 128-bit tags +pub const Aegis128X2Mac_128 = AegisMac(Aegis128X2); + +/// AEGIS-128L MAC with 128-bit tags pub const Aegis128LMac_128 = AegisMac(Aegis128L); -/// Aegis256 MAC with a 128-bit output. -/// A MAC with a 128-bit output is not safe unless the number of messages -/// authenticated with the same key remains small. -/// After 2^48 messages, the probability of a collision is already ~ 2^-33. -/// If unsure, use the Aegis256Mac type, that has a 256 bit output. +/// AEGIS-256X4 MAC with 128-bit tags +pub const Aegis256X4Mac_128 = AegisMac(Aegis256X4); + +/// AEGIS-256X2 MAC with 128-bit tags +pub const Aegis256X2Mac_128 = AegisMac(Aegis256X2); + +/// AEGIS-256 MAC with 128-bit tags pub const Aegis256Mac_128 = AegisMac(Aegis256); fn AegisMac(comptime T: type) type { return struct { - const Self = @This(); + const Mac = @This(); pub const mac_length = T.tag_length; pub const key_length = T.key_length; @@ -448,15 +627,15 @@ fn AegisMac(comptime T: type) type { msg_len: usize = 0, /// Initialize a state for the MAC function - pub fn init(key: *const [key_length]u8) Self { + pub fn init(key: *const [key_length]u8) Mac { const nonce = [_]u8{0} ** T.nonce_length; - return Self{ + return Mac{ .state = T.State.init(key.*, nonce), }; } /// Add data to the state - pub fn update(self: *Self, b: []const u8) void { + pub fn update(self: *Mac, b: []const u8) void { self.msg_len += b.len; const len_partial = @min(b.len, block_length - self.off); @@ -469,6 +648,10 @@ fn AegisMac(comptime T: type) type { var i = len_partial; self.off = 0; + while (i + block_length * 2 <= b.len) : (i += block_length * 2) { + self.state.absorb(b[i..][0..block_length]); + self.state.absorb(b[i..][block_length .. block_length * 2]); + } while (i + block_length <= b.len) : (i += block_length) { self.state.absorb(b[i..][0..block_length]); } @@ -479,7 +662,7 @@ fn AegisMac(comptime T: type) type { } /// Return an authentication tag for the current state - pub fn final(self: *Self, out: *[mac_length]u8) void { + pub fn final(self: *Mac, out: *[mac_length]u8) void { if (self.off > 0) { var pad = [_]u8{0} ** block_length; @memcpy(pad[0..self.off], self.buf[0..self.off]); @@ -490,20 +673,20 @@ fn AegisMac(comptime T: type) type { /// Return an authentication tag for a message and a key pub fn create(out: *[mac_length]u8, msg: []const u8, key: *const [key_length]u8) void { - var ctx = Self.init(key); + var ctx = Mac.init(key); ctx.update(msg); ctx.final(out); } pub const Error = error{}; - pub const Writer = std.io.Writer(*Self, Error, write); + pub const Writer = std.io.Writer(*Mac, Error, write); - fn write(self: *Self, bytes: []const u8) Error!usize { + fn write(self: *Mac, bytes: []const u8) Error!usize { self.update(bytes); return bytes.len; } - pub fn writer(self: *Self) Writer { + pub fn writer(self: *Mac) Writer { return .{ .context = self }; } }; @@ -568,6 +751,23 @@ test "Aegis128L test vector 3" { try htest.assertEqual("83cc600dc4e3e7e62d4055826174f149", &tag); } +test "Aegis128X2 test vector 1" { + const key: [Aegis128X2.key_length]u8 = [_]u8{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }; + const nonce: [Aegis128X2.nonce_length]u8 = [_]u8{ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; + var empty = [_]u8{}; + var tag: [Aegis128X2.tag_length]u8 = undefined; + var tag256: [Aegis128X2_256.tag_length]u8 = undefined; + + Aegis128X2.encrypt(&empty, &tag, &empty, &empty, nonce, key); + Aegis128X2_256.encrypt(&empty, &tag256, &empty, &empty, nonce, key); + try htest.assertEqual("63117dc57756e402819a82e13eca8379", &tag); + try htest.assertEqual("b92c71fdbd358b8a4de70b27631ace90cffd9b9cfba82028412bac41b4f53759", &tag256); + tag[0] +%= 1; + try testing.expectError(error.AuthenticationFailed, Aegis128X2.decrypt(&empty, &empty, tag, &empty, nonce, key)); + tag256[0] +%= 1; + try testing.expectError(error.AuthenticationFailed, Aegis128X2_256.decrypt(&empty, &empty, tag256, &empty, nonce, key)); +} + test "Aegis256 test vector 1" { const key: [Aegis256.key_length]u8 = [_]u8{ 0x10, 0x01 } ++ [_]u8{0x00} ** 30; const nonce: [Aegis256.nonce_length]u8 = [_]u8{ 0x10, 0x00, 0x02 } ++ [_]u8{0x00} ** 29; @@ -624,6 +824,23 @@ test "Aegis256 test vector 3" { try htest.assertEqual("f7a0878f68bd083e8065354071fc27c3", &tag); } +test "Aegis256X4 test vector 1" { + const key: [Aegis256X4.key_length]u8 = [_]u8{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }; + const nonce: [Aegis256X4.nonce_length]u8 = [_]u8{ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f }; + var empty = [_]u8{}; + var tag: [Aegis256X4.tag_length]u8 = undefined; + var tag256: [Aegis256X4_256.tag_length]u8 = undefined; + + Aegis256X4.encrypt(&empty, &tag, &empty, &empty, nonce, key); + Aegis256X4_256.encrypt(&empty, &tag256, &empty, &empty, nonce, key); + try htest.assertEqual("3b7fee6cee7bf17888ad11ed2397beb4", &tag); + try htest.assertEqual("6093a1a8aab20ec635dc1ca71745b01b5bec4fc444c9ffbebd710d4a34d20eaf", &tag256); + tag[0] +%= 1; + try testing.expectError(error.AuthenticationFailed, Aegis256X4.decrypt(&empty, &empty, tag, &empty, nonce, key)); + tag256[0] +%= 1; + try testing.expectError(error.AuthenticationFailed, Aegis256X4_256.decrypt(&empty, &empty, tag256, &empty, nonce, key)); +} + test "Aegis MAC" { const key = [_]u8{0x00} ** Aegis128LMac.key_length; var msg: [64]u8 = undefined; diff --git a/lib/std/crypto/aes.zig b/lib/std/crypto/aes.zig index 5e5ae04b58..d14b82c937 100644 --- a/lib/std/crypto/aes.zig +++ b/lib/std/crypto/aes.zig @@ -22,6 +22,7 @@ pub const has_hardware_support = (builtin.cpu.arch == .aarch64 and has_armaes); pub const Block = impl.Block; +pub const BlockVec = impl.BlockVec; pub const AesEncryptCtx = impl.AesEncryptCtx; pub const AesDecryptCtx = impl.AesDecryptCtx; pub const Aes128 = impl.Aes128; diff --git a/lib/std/crypto/aes/aesni.zig b/lib/std/crypto/aes/aesni.zig index e0893cfba8..2793ff4184 100644 --- a/lib/std/crypto/aes/aesni.zig +++ b/lib/std/crypto/aes/aesni.zig @@ -2,18 +2,23 @@ const std = @import("../../std.zig"); const builtin = @import("builtin"); const mem = std.mem; const debug = std.debug; -const BlockVec = @Vector(2, u64); + +const has_vaes = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .vaes); +const has_avx512f = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f); /// A single AES block. pub const Block = struct { + const Repr = @Vector(2, u64); + + /// The length of an AES block in bytes. pub const block_length: usize = 16; /// Internal representation of a block. - repr: BlockVec, + repr: Repr, /// Convert a byte sequence into an internal representation. pub inline fn fromBytes(bytes: *const [16]u8) Block { - const repr = mem.bytesToValue(BlockVec, bytes); + const repr = mem.bytesToValue(Repr, bytes); return Block{ .repr = repr }; } @@ -33,7 +38,7 @@ pub const Block = struct { return Block{ .repr = asm ( \\ vaesenc %[rk], %[in], %[out] - : [out] "=x" (-> BlockVec), + : [out] "=x" (-> Repr), : [in] "x" (block.repr), [rk] "x" (round_key.repr), ), @@ -45,7 +50,7 @@ pub const Block = struct { return Block{ .repr = asm ( \\ vaesenclast %[rk], %[in], %[out] - : [out] "=x" (-> BlockVec), + : [out] "=x" (-> Repr), : [in] "x" (block.repr), [rk] "x" (round_key.repr), ), @@ -57,7 +62,7 @@ pub const Block = struct { return Block{ .repr = asm ( \\ vaesdec %[rk], %[in], %[out] - : [out] "=x" (-> BlockVec), + : [out] "=x" (-> Repr), : [in] "x" (block.repr), [rk] "x" (inv_round_key.repr), ), @@ -69,7 +74,7 @@ pub const Block = struct { return Block{ .repr = asm ( \\ vaesdeclast %[rk], %[in], %[out] - : [out] "=x" (-> BlockVec), + : [out] "=x" (-> Repr), : [in] "x" (block.repr), [rk] "x" (inv_round_key.repr), ), @@ -168,17 +173,158 @@ pub const Block = struct { }; }; +/// A fixed-size vector of AES blocks. +/// All operations are performed in parallel, using SIMD instructions when available. +pub fn BlockVec(comptime blocks_count: comptime_int) type { + return struct { + const Self = @This(); + + /// The number of AES blocks the target architecture can process with a single instruction. + pub const native_vector_size = w: { + if (has_avx512f and blocks_count % 4 == 0) break :w 4; + if (has_vaes and blocks_count % 2 == 0) break :w 2; + break :w 1; + }; + + /// The size of the AES block vector that the target architecture can process with a single instruction, in bytes. + pub const native_word_size = native_vector_size * 16; + + const native_words = blocks_count / native_vector_size; + + const Repr = @Vector(native_vector_size * 2, u64); + + /// Internal representation of a block vector. + repr: [native_words]Repr, + + /// Length of the block vector in bytes. + pub const block_length: usize = blocks_count * 16; + + /// Convert a byte sequence into an internal representation. + pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = mem.bytesToValue(Repr, bytes[i * native_word_size ..][0..native_word_size]); + } + return out; + } + + /// Convert the internal representation of a block vector into a byte sequence. + pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 { + var out: [blocks_count * 16]u8 = undefined; + inline for (0..native_words) |i| { + out[i * native_word_size ..][0..native_word_size].* = mem.toBytes(block_vec.repr[i]); + } + return out; + } + + /// XOR the block vector with a byte sequence. + pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [blocks_count * 16]u8 { + var x: Self = undefined; + inline for (0..native_words) |i| { + x.repr[i] = block_vec.repr[i] ^ mem.bytesToValue(Repr, bytes[i * native_word_size ..][0..native_word_size]); + } + return x.toBytes(); + } + + /// Apply the forward AES operation to the block vector with a vector of round keys. + pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = asm ( + \\ vaesenc %[rk], %[in], %[out] + : [out] "=x" (-> Repr), + : [in] "x" (block_vec.repr[i]), + [rk] "x" (round_key_vec.repr[i]), + ); + } + return out; + } + + /// Apply the forward AES operation to the block vector with a vector of last round keys. + pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = asm ( + \\ vaesenclast %[rk], %[in], %[out] + : [out] "=x" (-> Repr), + : [in] "x" (block_vec.repr[i]), + [rk] "x" (round_key_vec.repr[i]), + ); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of round keys. + pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = asm ( + \\ vaesdec %[rk], %[in], %[out] + : [out] "=x" (-> Repr), + : [in] "x" (block_vec.repr[i]), + [rk] "x" (inv_round_key_vec.repr[i]), + ); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of last round keys. + pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = asm ( + \\ vaesdeclast %[rk], %[in], %[out] + : [out] "=x" (-> Repr), + : [in] "x" (block_vec.repr[i]), + [rk] "x" (inv_round_key_vec.repr[i]), + ); + } + return out; + } + + /// Apply the bitwise XOR operation to the content of two block vectors. + pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i] ^ block_vec2.repr[i]; + } + return out; + } + + /// Apply the bitwise AND operation to the content of two block vectors. + pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i] & block_vec2.repr[i]; + } + return out; + } + + /// Apply the bitwise OR operation to the content of two block vectors. + pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i] | block_vec2.repr[i]; + } + return out; + } + }; +} + fn KeySchedule(comptime Aes: type) type { std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14); const rounds = Aes.rounds; return struct { const Self = @This(); + + const Repr = Aes.block.Repr; + round_keys: [rounds + 1]Block, - fn drc(comptime second: bool, comptime rc: u8, t: BlockVec, tx: BlockVec) BlockVec { - var s: BlockVec = undefined; - var ts: BlockVec = undefined; + fn drc(comptime second: bool, comptime rc: u8, t: Repr, tx: Repr) Repr { + var s: Repr = undefined; + var ts: Repr = undefined; return asm ( \\ vaeskeygenassist %[rc], %[t], %[s] \\ vpslldq $4, %[tx], %[ts] @@ -187,7 +333,7 @@ fn KeySchedule(comptime Aes: type) type { \\ vpxor %[ts], %[r], %[r] \\ vpshufd %[mask], %[s], %[ts] \\ vpxor %[ts], %[r], %[r] - : [r] "=&x" (-> BlockVec), + : [r] "=&x" (-> Repr), [s] "=&x" (s), [ts] "=&x" (ts), : [rc] "n" (rc), @@ -234,7 +380,7 @@ fn KeySchedule(comptime Aes: type) type { inv_round_keys[i] = Block{ .repr = asm ( \\ vaesimc %[rk], %[inv_rk] - : [inv_rk] "=x" (-> BlockVec), + : [inv_rk] "=x" (-> Repr), : [rk] "x" (round_keys[rounds - i].repr), ), }; diff --git a/lib/std/crypto/aes/armcrypto.zig b/lib/std/crypto/aes/armcrypto.zig index a6574c372a..2487ab7e72 100644 --- a/lib/std/crypto/aes/armcrypto.zig +++ b/lib/std/crypto/aes/armcrypto.zig @@ -1,18 +1,19 @@ const std = @import("../../std.zig"); const mem = std.mem; const debug = std.debug; -const BlockVec = @Vector(2, u64); /// A single AES block. pub const Block = struct { + const Repr = @Vector(2, u64); + pub const block_length: usize = 16; /// Internal representation of a block. - repr: BlockVec, + repr: Repr, /// Convert a byte sequence into an internal representation. pub inline fn fromBytes(bytes: *const [16]u8) Block { - const repr = mem.bytesToValue(BlockVec, bytes); + const repr = mem.bytesToValue(Repr, bytes); return Block{ .repr = repr }; } @@ -36,7 +37,7 @@ pub const Block = struct { \\ mov %[out].16b, %[in].16b \\ aese %[out].16b, %[zero].16b \\ aesmc %[out].16b, %[out].16b - : [out] "=&x" (-> BlockVec), + : [out] "=&x" (-> Repr), : [in] "x" (block.repr), [zero] "x" (zero), )) ^ round_key.repr, @@ -49,7 +50,7 @@ pub const Block = struct { .repr = (asm ( \\ mov %[out].16b, %[in].16b \\ aese %[out].16b, %[zero].16b - : [out] "=&x" (-> BlockVec), + : [out] "=&x" (-> Repr), : [in] "x" (block.repr), [zero] "x" (zero), )) ^ round_key.repr, @@ -63,7 +64,7 @@ pub const Block = struct { \\ mov %[out].16b, %[in].16b \\ aesd %[out].16b, %[zero].16b \\ aesimc %[out].16b, %[out].16b - : [out] "=&x" (-> BlockVec), + : [out] "=&x" (-> Repr), : [in] "x" (block.repr), [zero] "x" (zero), )) ^ inv_round_key.repr, @@ -76,7 +77,7 @@ pub const Block = struct { .repr = (asm ( \\ mov %[out].16b, %[in].16b \\ aesd %[out].16b, %[zero].16b - : [out] "=&x" (-> BlockVec), + : [out] "=&x" (-> Repr), : [in] "x" (block.repr), [zero] "x" (zero), )) ^ inv_round_key.repr, @@ -165,6 +166,118 @@ pub const Block = struct { }; }; +/// A fixed-size vector of AES blocks. +/// All operations are performed in parallel, using SIMD instructions when available. +pub fn BlockVec(comptime blocks_count: comptime_int) type { + return struct { + const Self = @This(); + + /// The number of AES blocks the target architecture can process with a single instruction. + pub const native_vector_size = 1; + + /// The size of the AES block vector that the target architecture can process with a single instruction, in bytes. + pub const native_word_size = native_vector_size * 16; + + const native_words = blocks_count; + + /// Internal representation of a block vector. + repr: [native_words]Block, + + /// Length of the block vector in bytes. + pub const block_length: usize = blocks_count * 16; + + /// Convert a byte sequence into an internal representation. + pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = Block.fromBytes(bytes[i * native_word_size ..][0..native_word_size]); + } + return out; + } + + /// Convert the internal representation of a block vector into a byte sequence. + pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 { + var out: [blocks_count * 16]u8 = undefined; + inline for (0..native_words) |i| { + out[i * native_word_size ..][0..native_word_size].* = block_vec.repr[i].toBytes(); + } + return out; + } + + /// XOR the block vector with a byte sequence. + pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [32]u8 { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].xorBytes(bytes[i * native_word_size ..][0..native_word_size]); + } + return out; + } + + /// Apply the forward AES operation to the block vector with a vector of round keys. + pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].encrypt(round_key_vec.repr[i]); + } + return out; + } + + /// Apply the forward AES operation to the block vector with a vector of last round keys. + pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].encryptLast(round_key_vec.repr[i]); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of round keys. + pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].decrypt(inv_round_key_vec.repr[i]); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of last round keys. + pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].decryptLast(inv_round_key_vec.repr[i]); + } + return out; + } + + /// Apply the bitwise XOR operation to the content of two block vectors. + pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].xorBlocks(block_vec2.repr[i]); + } + return out; + } + + /// Apply the bitwise AND operation to the content of two block vectors. + pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].andBlocks(block_vec2.repr[i]); + } + return out; + } + + /// Apply the bitwise OR operation to the content of two block vectors. + pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self { + var out: Self = undefined; + inline for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].orBlocks(block_vec2.repr[i]); + } + return out; + } + }; +} + fn KeySchedule(comptime Aes: type) type { std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14); const rounds = Aes.rounds; @@ -172,17 +285,19 @@ fn KeySchedule(comptime Aes: type) type { return struct { const Self = @This(); + const Repr = Aes.block.Repr; + const zero = @Vector(2, u64){ 0, 0 }; const mask1 = @Vector(16, u8){ 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 }; const mask2 = @Vector(16, u8){ 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 }; round_keys: [rounds + 1]Block, - fn drc128(comptime rc: u8, t: BlockVec) BlockVec { - var v1: BlockVec = undefined; - var v2: BlockVec = undefined; - var v3: BlockVec = undefined; - var v4: BlockVec = undefined; + fn drc128(comptime rc: u8, t: Repr) Repr { + var v1: Repr = undefined; + var v2: Repr = undefined; + var v3: Repr = undefined; + var v4: Repr = undefined; return asm ( \\ movi %[v2].4s, %[rc] @@ -196,7 +311,7 @@ fn KeySchedule(comptime Aes: type) type { \\ eor %[v1].16b, %[v1].16b, %[r].16b \\ eor %[r].16b, %[v1].16b, %[v3].16b \\ eor %[r].16b, %[r].16b, %[v4].16b - : [r] "=&x" (-> BlockVec), + : [r] "=&x" (-> Repr), [v1] "=&x" (v1), [v2] "=&x" (v2), [v3] "=&x" (v3), @@ -208,11 +323,11 @@ fn KeySchedule(comptime Aes: type) type { ); } - fn drc256(comptime second: bool, comptime rc: u8, t: BlockVec, tx: BlockVec) BlockVec { - var v1: BlockVec = undefined; - var v2: BlockVec = undefined; - var v3: BlockVec = undefined; - var v4: BlockVec = undefined; + fn drc256(comptime second: bool, comptime rc: u8, t: Repr, tx: Repr) Repr { + var v1: Repr = undefined; + var v2: Repr = undefined; + var v3: Repr = undefined; + var v4: Repr = undefined; return asm ( \\ movi %[v2].4s, %[rc] @@ -226,7 +341,7 @@ fn KeySchedule(comptime Aes: type) type { \\ eor %[v1].16b, %[v1].16b, %[v2].16b \\ eor %[v1].16b, %[v1].16b, %[v3].16b \\ eor %[r].16b, %[v1].16b, %[v4].16b - : [r] "=&x" (-> BlockVec), + : [r] "=&x" (-> Repr), [v1] "=&x" (v1), [v2] "=&x" (v2), [v3] "=&x" (v3), @@ -276,7 +391,7 @@ fn KeySchedule(comptime Aes: type) type { inv_round_keys[i] = Block{ .repr = asm ( \\ aesimc %[inv_rk].16b, %[rk].16b - : [inv_rk] "=x" (-> BlockVec), + : [inv_rk] "=x" (-> Repr), : [rk] "x" (round_keys[rounds - i].repr), ), }; diff --git a/lib/std/crypto/aes/soft.zig b/lib/std/crypto/aes/soft.zig index 8430a3af7e..7f3d298a3a 100644 --- a/lib/std/crypto/aes/soft.zig +++ b/lib/std/crypto/aes/soft.zig @@ -2,16 +2,16 @@ const std = @import("../../std.zig"); const math = std.math; const mem = std.mem; -const BlockVec = [4]u32; - const side_channels_mitigations = std.options.side_channels_mitigations; /// A single AES block. pub const Block = struct { + const Repr = [4]u32; + pub const block_length: usize = 16; /// Internal representation of a block. - repr: BlockVec align(16), + repr: Repr align(16), /// Convert a byte sequence into an internal representation. pub inline fn fromBytes(bytes: *const [16]u8) Block { @@ -19,7 +19,7 @@ pub const Block = struct { const s1 = mem.readInt(u32, bytes[4..8], .little); const s2 = mem.readInt(u32, bytes[8..12], .little); const s3 = mem.readInt(u32, bytes[12..16], .little); - return Block{ .repr = BlockVec{ s0, s1, s2, s3 } }; + return Block{ .repr = Repr{ s0, s1, s2, s3 } }; } /// Convert the internal representation of a block into a byte sequence. @@ -65,7 +65,7 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Encrypt a block with a round key *WITHOUT ANY PROTECTION AGAINST SIDE CHANNELS* @@ -110,7 +110,7 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Encrypt a block with the last round key. @@ -136,7 +136,7 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Decrypt a block with a round key. @@ -161,7 +161,7 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Decrypt a block with a round key *WITHOUT ANY PROTECTION AGAINST SIDE CHANNELS* @@ -206,7 +206,7 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Decrypt a block with the last round key. @@ -232,12 +232,12 @@ pub const Block = struct { t2 ^= round_key.repr[2]; t3 ^= round_key.repr[3]; - return Block{ .repr = BlockVec{ t0, t1, t2, t3 } }; + return Block{ .repr = Repr{ t0, t1, t2, t3 } }; } /// Apply the bitwise XOR operation to the content of two blocks. pub inline fn xorBlocks(block1: Block, block2: Block) Block { - var x: BlockVec = undefined; + var x: Repr = undefined; comptime var i = 0; inline while (i < 4) : (i += 1) { x[i] = block1.repr[i] ^ block2.repr[i]; @@ -247,7 +247,7 @@ pub const Block = struct { /// Apply the bitwise AND operation to the content of two blocks. pub inline fn andBlocks(block1: Block, block2: Block) Block { - var x: BlockVec = undefined; + var x: Repr = undefined; comptime var i = 0; inline while (i < 4) : (i += 1) { x[i] = block1.repr[i] & block2.repr[i]; @@ -257,7 +257,7 @@ pub const Block = struct { /// Apply the bitwise OR operation to the content of two blocks. pub inline fn orBlocks(block1: Block, block2: Block) Block { - var x: BlockVec = undefined; + var x: Repr = undefined; comptime var i = 0; inline while (i < 4) : (i += 1) { x[i] = block1.repr[i] | block2.repr[i]; @@ -332,6 +332,118 @@ pub const Block = struct { }; }; +/// A fixed-size vector of AES blocks. +/// All operations are performed in parallel, using SIMD instructions when available. +pub fn BlockVec(comptime blocks_count: comptime_int) type { + return struct { + const Self = @This(); + + /// The number of AES blocks the target architecture can process with a single instruction. + pub const native_vector_size = 1; + + /// The size of the AES block vector that the target architecture can process with a single instruction, in bytes. + pub const native_word_size = native_vector_size * 16; + + const native_words = blocks_count; + + /// Internal representation of a block vector. + repr: [native_words]Block, + + /// Length of the block vector in bytes. + pub const block_length: usize = blocks_count * 16; + + /// Convert a byte sequence into an internal representation. + pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = Block.fromBytes(bytes[i * native_word_size ..][0..native_word_size]); + } + return out; + } + + /// Convert the internal representation of a block vector into a byte sequence. + pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 { + var out: [blocks_count * 16]u8 = undefined; + for (0..native_words) |i| { + out[i * native_word_size ..][0..native_word_size].* = block_vec.repr[i].toBytes(); + } + return out; + } + + /// XOR the block vector with a byte sequence. + pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [32]u8 { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].xorBytes(bytes[i * native_word_size ..][0..native_word_size]); + } + return out; + } + + /// Apply the forward AES operation to the block vector with a vector of round keys. + pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].encrypt(round_key_vec.repr[i]); + } + return out; + } + + /// Apply the forward AES operation to the block vector with a vector of last round keys. + pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].encryptLast(round_key_vec.repr[i]); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of round keys. + pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].decrypt(inv_round_key_vec.repr[i]); + } + return out; + } + + /// Apply the inverse AES operation to the block vector with a vector of last round keys. + pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec.repr[i].decryptLast(inv_round_key_vec.repr[i]); + } + return out; + } + + /// Apply the bitwise XOR operation to the content of two block vectors. + pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].xorBlocks(block_vec2.repr[i]); + } + return out; + } + + /// Apply the bitwise AND operation to the content of two block vectors. + pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].andBlocks(block_vec2.repr[i]); + } + return out; + } + + /// Apply the bitwise OR operation to the content of two block vectors. + pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self { + var out: Self = undefined; + for (0..native_words) |i| { + out.repr[i] = block_vec1.repr[i].orBlocks(block_vec2.repr[i]); + } + return out; + } + }; +} + fn KeySchedule(comptime Aes: type) type { std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14); const key_length = Aes.key_bits / 8; @@ -671,7 +783,7 @@ fn mul(a: u8, b: u8) u8 { const cache_line_bytes = std.atomic.cache_line; -inline fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u8 { +fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u8 { if (side_channels_mitigations == .none) { return [4]u8{ sbox[idx0], @@ -709,7 +821,7 @@ inline fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: } } -inline fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u32 { +fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u32 { if (side_channels_mitigations == .none) { return [4]u32{ table[0][idx0], @@ -718,17 +830,18 @@ inline fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, table[3][idx3], }; } else { + const table_len: usize = 256; const stride = switch (side_channels_mitigations) { .none => unreachable, - .basic => table[0].len / 4, - .medium => @max(1, @min(table[0].len, 2 * cache_line_bytes / 4)), - .full => @max(1, @min(table[0].len, cache_line_bytes / 4)), + .basic => table_len / 4, + .medium => @max(1, @min(table_len, 2 * cache_line_bytes / 4)), + .full => @max(1, @min(table_len, cache_line_bytes / 4)), }; const of0 = idx0 % stride; const of1 = idx1 % stride; const of2 = idx2 % stride; const of3 = idx3 % stride; - var t: [4][table[0].len / stride]u32 align(64) = undefined; + var t: [4][table_len / stride]u32 align(64) = undefined; var i: usize = 0; while (i < t[0].len) : (i += 1) { const tx = table[0][i * stride ..]; diff --git a/lib/std/crypto/benchmark.zig b/lib/std/crypto/benchmark.zig index 8bb651f73b..c3dcd9b8cb 100644 --- a/lib/std/crypto/benchmark.zig +++ b/lib/std/crypto/benchmark.zig @@ -72,6 +72,10 @@ const macs = [_]Crypto{ Crypto{ .ty = crypto.auth.siphash.SipHash64(1, 3), .name = "siphash-1-3" }, Crypto{ .ty = crypto.auth.siphash.SipHash128(2, 4), .name = "siphash128-2-4" }, Crypto{ .ty = crypto.auth.siphash.SipHash128(1, 3), .name = "siphash128-1-3" }, + Crypto{ .ty = crypto.auth.aegis.Aegis128X4Mac, .name = "aegis-128x4 mac" }, + Crypto{ .ty = crypto.auth.aegis.Aegis256X4Mac, .name = "aegis-256x4 mac" }, + Crypto{ .ty = crypto.auth.aegis.Aegis128X2Mac, .name = "aegis-128x2 mac" }, + Crypto{ .ty = crypto.auth.aegis.Aegis256X2Mac, .name = "aegis-256x2 mac" }, Crypto{ .ty = crypto.auth.aegis.Aegis128LMac, .name = "aegis-128l mac" }, Crypto{ .ty = crypto.auth.aegis.Aegis256Mac, .name = "aegis-256 mac" }, Crypto{ .ty = crypto.auth.cmac.CmacAes128, .name = "aes-cmac" }, @@ -283,7 +287,11 @@ const aeads = [_]Crypto{ Crypto{ .ty = crypto.aead.chacha_poly.XChaCha20Poly1305, .name = "xchacha20Poly1305" }, Crypto{ .ty = crypto.aead.chacha_poly.XChaCha8Poly1305, .name = "xchacha8Poly1305" }, Crypto{ .ty = crypto.aead.salsa_poly.XSalsa20Poly1305, .name = "xsalsa20Poly1305" }, + Crypto{ .ty = crypto.aead.aegis.Aegis128X4, .name = "aegis-128x4" }, + Crypto{ .ty = crypto.aead.aegis.Aegis128X2, .name = "aegis-128x2" }, Crypto{ .ty = crypto.aead.aegis.Aegis128L, .name = "aegis-128l" }, + Crypto{ .ty = crypto.aead.aegis.Aegis256X4, .name = "aegis-256x4" }, + Crypto{ .ty = crypto.aead.aegis.Aegis256X2, .name = "aegis-256x2" }, Crypto{ .ty = crypto.aead.aegis.Aegis256, .name = "aegis-256" }, Crypto{ .ty = crypto.aead.aes_gcm.Aes128Gcm, .name = "aes128-gcm" }, Crypto{ .ty = crypto.aead.aes_gcm.Aes256Gcm, .name = "aes256-gcm" },