std.crypto.aes: introduce AES block vectors (#22023)

* std.crypto.aes: introduce AES block vectors

Modern Intel CPUs with the VAES extension can handle more than a
single AES block per instruction.

So can some ARM and RISC-V CPUs. Software implementations with
bitslicing can also greatly benefit from this.

Implement low-level operations on AES block vectors, and the
parallel AEGIS variants on top of them.

AMD Zen4:

      aegis-128x4:      73225 MiB/s
      aegis-128x2:      51571 MiB/s
       aegis-128l:      25806 MiB/s
      aegis-256x4:      46742 MiB/s
      aegis-256x2:      30227 MiB/s
        aegis-256:       8436 MiB/s
       aes128-gcm:       5926 MiB/s
       aes256-gcm:       5085 MiB/s

AES-GCM, and anything based on AES-CTR are also going to benefit
from this later.

* Make AEGIS-MAC twice a fast
This commit is contained in:
Frank Denis 2024-11-22 10:00:49 +01:00 committed by GitHub
parent f845fa04a0
commit 636308a17d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 1012 additions and 387 deletions

View File

@ -7,10 +7,23 @@ pub const timing_safe = @import("crypto/timing_safe.zig");
/// Authenticated Encryption with Associated Data
pub const aead = struct {
pub const aegis = struct {
pub const Aegis128L = @import("crypto/aegis.zig").Aegis128L;
pub const Aegis128L_256 = @import("crypto/aegis.zig").Aegis128L_256;
pub const Aegis256 = @import("crypto/aegis.zig").Aegis256;
pub const Aegis256_256 = @import("crypto/aegis.zig").Aegis256_256;
const variants = @import("crypto/aegis.zig");
pub const Aegis128X4 = variants.Aegis128X4;
pub const Aegis128X2 = variants.Aegis128X2;
pub const Aegis128L = variants.Aegis128L;
pub const Aegis256X4 = variants.Aegis256X4;
pub const Aegis256X2 = variants.Aegis256X2;
pub const Aegis256 = variants.Aegis256;
pub const Aegis128X4_256 = variants.Aegis128X4_256;
pub const Aegis128X2_256 = variants.Aegis128X2_256;
pub const Aegis128L_256 = variants.Aegis128L_256;
pub const Aegis256X4_256 = variants.Aegis256X4_256;
pub const Aegis256X2_256 = variants.Aegis256X2_256;
pub const Aegis256_256 = variants.Aegis256_256;
};
pub const aes_gcm = struct {
@ -44,10 +57,22 @@ pub const auth = struct {
pub const hmac = @import("crypto/hmac.zig");
pub const siphash = @import("crypto/siphash.zig");
pub const aegis = struct {
pub const Aegis128LMac = @import("crypto/aegis.zig").Aegis128LMac;
pub const Aegis128LMac_128 = @import("crypto/aegis.zig").Aegis128LMac_128;
pub const Aegis256Mac = @import("crypto/aegis.zig").Aegis256Mac;
pub const Aegis256Mac_128 = @import("crypto/aegis.zig").Aegis256Mac_128;
const variants = @import("crypto/aegis.zig");
pub const Aegis128X4Mac = variants.Aegis128X4Mac;
pub const Aegis128X2Mac = variants.Aegis128X2Mac;
pub const Aegis128LMac = variants.Aegis128LMac;
pub const Aegis256X4Mac = variants.Aegis256X4Mac;
pub const Aegis256X2Mac = variants.Aegis256X2Mac;
pub const Aegis256Mac = variants.Aegis256Mac;
pub const Aegis128X4Mac_128 = variants.Aegis128X4Mac_128;
pub const Aegis128X2Mac_128 = variants.Aegis128X2Mac_128;
pub const Aegis128LMac_128 = variants.Aegis128LMac_128;
pub const Aegis256X4Mac_128 = variants.Aegis256X4Mac_128;
pub const Aegis256X2Mac_128 = variants.Aegis256X2Mac_128;
pub const Aegis256Mac_128 = variants.Aegis256Mac_128;
};
pub const cmac = @import("crypto/cmac.zig");
};

File diff suppressed because it is too large Load Diff

View File

@ -22,6 +22,7 @@ pub const has_hardware_support =
(builtin.cpu.arch == .aarch64 and has_armaes);
pub const Block = impl.Block;
pub const BlockVec = impl.BlockVec;
pub const AesEncryptCtx = impl.AesEncryptCtx;
pub const AesDecryptCtx = impl.AesDecryptCtx;
pub const Aes128 = impl.Aes128;

View File

@ -2,18 +2,23 @@ const std = @import("../../std.zig");
const builtin = @import("builtin");
const mem = std.mem;
const debug = std.debug;
const BlockVec = @Vector(2, u64);
const has_vaes = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .vaes);
const has_avx512f = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
/// A single AES block.
pub const Block = struct {
const Repr = @Vector(2, u64);
/// The length of an AES block in bytes.
pub const block_length: usize = 16;
/// Internal representation of a block.
repr: BlockVec,
repr: Repr,
/// Convert a byte sequence into an internal representation.
pub inline fn fromBytes(bytes: *const [16]u8) Block {
const repr = mem.bytesToValue(BlockVec, bytes);
const repr = mem.bytesToValue(Repr, bytes);
return Block{ .repr = repr };
}
@ -33,7 +38,7 @@ pub const Block = struct {
return Block{
.repr = asm (
\\ vaesenc %[rk], %[in], %[out]
: [out] "=x" (-> BlockVec),
: [out] "=x" (-> Repr),
: [in] "x" (block.repr),
[rk] "x" (round_key.repr),
),
@ -45,7 +50,7 @@ pub const Block = struct {
return Block{
.repr = asm (
\\ vaesenclast %[rk], %[in], %[out]
: [out] "=x" (-> BlockVec),
: [out] "=x" (-> Repr),
: [in] "x" (block.repr),
[rk] "x" (round_key.repr),
),
@ -57,7 +62,7 @@ pub const Block = struct {
return Block{
.repr = asm (
\\ vaesdec %[rk], %[in], %[out]
: [out] "=x" (-> BlockVec),
: [out] "=x" (-> Repr),
: [in] "x" (block.repr),
[rk] "x" (inv_round_key.repr),
),
@ -69,7 +74,7 @@ pub const Block = struct {
return Block{
.repr = asm (
\\ vaesdeclast %[rk], %[in], %[out]
: [out] "=x" (-> BlockVec),
: [out] "=x" (-> Repr),
: [in] "x" (block.repr),
[rk] "x" (inv_round_key.repr),
),
@ -168,17 +173,158 @@ pub const Block = struct {
};
};
/// A fixed-size vector of AES blocks.
/// All operations are performed in parallel, using SIMD instructions when available.
pub fn BlockVec(comptime blocks_count: comptime_int) type {
return struct {
const Self = @This();
/// The number of AES blocks the target architecture can process with a single instruction.
pub const native_vector_size = w: {
if (has_avx512f and blocks_count % 4 == 0) break :w 4;
if (has_vaes and blocks_count % 2 == 0) break :w 2;
break :w 1;
};
/// The size of the AES block vector that the target architecture can process with a single instruction, in bytes.
pub const native_word_size = native_vector_size * 16;
const native_words = blocks_count / native_vector_size;
const Repr = @Vector(native_vector_size * 2, u64);
/// Internal representation of a block vector.
repr: [native_words]Repr,
/// Length of the block vector in bytes.
pub const block_length: usize = blocks_count * 16;
/// Convert a byte sequence into an internal representation.
pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = mem.bytesToValue(Repr, bytes[i * native_word_size ..][0..native_word_size]);
}
return out;
}
/// Convert the internal representation of a block vector into a byte sequence.
pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 {
var out: [blocks_count * 16]u8 = undefined;
inline for (0..native_words) |i| {
out[i * native_word_size ..][0..native_word_size].* = mem.toBytes(block_vec.repr[i]);
}
return out;
}
/// XOR the block vector with a byte sequence.
pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [blocks_count * 16]u8 {
var x: Self = undefined;
inline for (0..native_words) |i| {
x.repr[i] = block_vec.repr[i] ^ mem.bytesToValue(Repr, bytes[i * native_word_size ..][0..native_word_size]);
}
return x.toBytes();
}
/// Apply the forward AES operation to the block vector with a vector of round keys.
pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = asm (
\\ vaesenc %[rk], %[in], %[out]
: [out] "=x" (-> Repr),
: [in] "x" (block_vec.repr[i]),
[rk] "x" (round_key_vec.repr[i]),
);
}
return out;
}
/// Apply the forward AES operation to the block vector with a vector of last round keys.
pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = asm (
\\ vaesenclast %[rk], %[in], %[out]
: [out] "=x" (-> Repr),
: [in] "x" (block_vec.repr[i]),
[rk] "x" (round_key_vec.repr[i]),
);
}
return out;
}
/// Apply the inverse AES operation to the block vector with a vector of round keys.
pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = asm (
\\ vaesdec %[rk], %[in], %[out]
: [out] "=x" (-> Repr),
: [in] "x" (block_vec.repr[i]),
[rk] "x" (inv_round_key_vec.repr[i]),
);
}
return out;
}
/// Apply the inverse AES operation to the block vector with a vector of last round keys.
pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = asm (
\\ vaesdeclast %[rk], %[in], %[out]
: [out] "=x" (-> Repr),
: [in] "x" (block_vec.repr[i]),
[rk] "x" (inv_round_key_vec.repr[i]),
);
}
return out;
}
/// Apply the bitwise XOR operation to the content of two block vectors.
pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = block_vec1.repr[i] ^ block_vec2.repr[i];
}
return out;
}
/// Apply the bitwise AND operation to the content of two block vectors.
pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = block_vec1.repr[i] & block_vec2.repr[i];
}
return out;
}
/// Apply the bitwise OR operation to the content of two block vectors.
pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = block_vec1.repr[i] | block_vec2.repr[i];
}
return out;
}
};
}
fn KeySchedule(comptime Aes: type) type {
std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14);
const rounds = Aes.rounds;
return struct {
const Self = @This();
const Repr = Aes.block.Repr;
round_keys: [rounds + 1]Block,
fn drc(comptime second: bool, comptime rc: u8, t: BlockVec, tx: BlockVec) BlockVec {
var s: BlockVec = undefined;
var ts: BlockVec = undefined;
fn drc(comptime second: bool, comptime rc: u8, t: Repr, tx: Repr) Repr {
var s: Repr = undefined;
var ts: Repr = undefined;
return asm (
\\ vaeskeygenassist %[rc], %[t], %[s]
\\ vpslldq $4, %[tx], %[ts]
@ -187,7 +333,7 @@ fn KeySchedule(comptime Aes: type) type {
\\ vpxor %[ts], %[r], %[r]
\\ vpshufd %[mask], %[s], %[ts]
\\ vpxor %[ts], %[r], %[r]
: [r] "=&x" (-> BlockVec),
: [r] "=&x" (-> Repr),
[s] "=&x" (s),
[ts] "=&x" (ts),
: [rc] "n" (rc),
@ -234,7 +380,7 @@ fn KeySchedule(comptime Aes: type) type {
inv_round_keys[i] = Block{
.repr = asm (
\\ vaesimc %[rk], %[inv_rk]
: [inv_rk] "=x" (-> BlockVec),
: [inv_rk] "=x" (-> Repr),
: [rk] "x" (round_keys[rounds - i].repr),
),
};

View File

@ -1,18 +1,19 @@
const std = @import("../../std.zig");
const mem = std.mem;
const debug = std.debug;
const BlockVec = @Vector(2, u64);
/// A single AES block.
pub const Block = struct {
const Repr = @Vector(2, u64);
pub const block_length: usize = 16;
/// Internal representation of a block.
repr: BlockVec,
repr: Repr,
/// Convert a byte sequence into an internal representation.
pub inline fn fromBytes(bytes: *const [16]u8) Block {
const repr = mem.bytesToValue(BlockVec, bytes);
const repr = mem.bytesToValue(Repr, bytes);
return Block{ .repr = repr };
}
@ -36,7 +37,7 @@ pub const Block = struct {
\\ mov %[out].16b, %[in].16b
\\ aese %[out].16b, %[zero].16b
\\ aesmc %[out].16b, %[out].16b
: [out] "=&x" (-> BlockVec),
: [out] "=&x" (-> Repr),
: [in] "x" (block.repr),
[zero] "x" (zero),
)) ^ round_key.repr,
@ -49,7 +50,7 @@ pub const Block = struct {
.repr = (asm (
\\ mov %[out].16b, %[in].16b
\\ aese %[out].16b, %[zero].16b
: [out] "=&x" (-> BlockVec),
: [out] "=&x" (-> Repr),
: [in] "x" (block.repr),
[zero] "x" (zero),
)) ^ round_key.repr,
@ -63,7 +64,7 @@ pub const Block = struct {
\\ mov %[out].16b, %[in].16b
\\ aesd %[out].16b, %[zero].16b
\\ aesimc %[out].16b, %[out].16b
: [out] "=&x" (-> BlockVec),
: [out] "=&x" (-> Repr),
: [in] "x" (block.repr),
[zero] "x" (zero),
)) ^ inv_round_key.repr,
@ -76,7 +77,7 @@ pub const Block = struct {
.repr = (asm (
\\ mov %[out].16b, %[in].16b
\\ aesd %[out].16b, %[zero].16b
: [out] "=&x" (-> BlockVec),
: [out] "=&x" (-> Repr),
: [in] "x" (block.repr),
[zero] "x" (zero),
)) ^ inv_round_key.repr,
@ -165,6 +166,118 @@ pub const Block = struct {
};
};
/// A fixed-size vector of AES blocks.
/// All operations are performed in parallel, using SIMD instructions when available.
pub fn BlockVec(comptime blocks_count: comptime_int) type {
return struct {
const Self = @This();
/// The number of AES blocks the target architecture can process with a single instruction.
pub const native_vector_size = 1;
/// The size of the AES block vector that the target architecture can process with a single instruction, in bytes.
pub const native_word_size = native_vector_size * 16;
const native_words = blocks_count;
/// Internal representation of a block vector.
repr: [native_words]Block,
/// Length of the block vector in bytes.
pub const block_length: usize = blocks_count * 16;
/// Convert a byte sequence into an internal representation.
pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = Block.fromBytes(bytes[i * native_word_size ..][0..native_word_size]);
}
return out;
}
/// Convert the internal representation of a block vector into a byte sequence.
pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 {
var out: [blocks_count * 16]u8 = undefined;
inline for (0..native_words) |i| {
out[i * native_word_size ..][0..native_word_size].* = block_vec.repr[i].toBytes();
}
return out;
}
/// XOR the block vector with a byte sequence.
pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [32]u8 {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = block_vec.repr[i].xorBytes(bytes[i * native_word_size ..][0..native_word_size]);
}
return out;
}
/// Apply the forward AES operation to the block vector with a vector of round keys.
pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = block_vec.repr[i].encrypt(round_key_vec.repr[i]);
}
return out;
}
/// Apply the forward AES operation to the block vector with a vector of last round keys.
pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = block_vec.repr[i].encryptLast(round_key_vec.repr[i]);
}
return out;
}
/// Apply the inverse AES operation to the block vector with a vector of round keys.
pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = block_vec.repr[i].decrypt(inv_round_key_vec.repr[i]);
}
return out;
}
/// Apply the inverse AES operation to the block vector with a vector of last round keys.
pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = block_vec.repr[i].decryptLast(inv_round_key_vec.repr[i]);
}
return out;
}
/// Apply the bitwise XOR operation to the content of two block vectors.
pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = block_vec1.repr[i].xorBlocks(block_vec2.repr[i]);
}
return out;
}
/// Apply the bitwise AND operation to the content of two block vectors.
pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = block_vec1.repr[i].andBlocks(block_vec2.repr[i]);
}
return out;
}
/// Apply the bitwise OR operation to the content of two block vectors.
pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self {
var out: Self = undefined;
inline for (0..native_words) |i| {
out.repr[i] = block_vec1.repr[i].orBlocks(block_vec2.repr[i]);
}
return out;
}
};
}
fn KeySchedule(comptime Aes: type) type {
std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14);
const rounds = Aes.rounds;
@ -172,17 +285,19 @@ fn KeySchedule(comptime Aes: type) type {
return struct {
const Self = @This();
const Repr = Aes.block.Repr;
const zero = @Vector(2, u64){ 0, 0 };
const mask1 = @Vector(16, u8){ 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 };
const mask2 = @Vector(16, u8){ 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 };
round_keys: [rounds + 1]Block,
fn drc128(comptime rc: u8, t: BlockVec) BlockVec {
var v1: BlockVec = undefined;
var v2: BlockVec = undefined;
var v3: BlockVec = undefined;
var v4: BlockVec = undefined;
fn drc128(comptime rc: u8, t: Repr) Repr {
var v1: Repr = undefined;
var v2: Repr = undefined;
var v3: Repr = undefined;
var v4: Repr = undefined;
return asm (
\\ movi %[v2].4s, %[rc]
@ -196,7 +311,7 @@ fn KeySchedule(comptime Aes: type) type {
\\ eor %[v1].16b, %[v1].16b, %[r].16b
\\ eor %[r].16b, %[v1].16b, %[v3].16b
\\ eor %[r].16b, %[r].16b, %[v4].16b
: [r] "=&x" (-> BlockVec),
: [r] "=&x" (-> Repr),
[v1] "=&x" (v1),
[v2] "=&x" (v2),
[v3] "=&x" (v3),
@ -208,11 +323,11 @@ fn KeySchedule(comptime Aes: type) type {
);
}
fn drc256(comptime second: bool, comptime rc: u8, t: BlockVec, tx: BlockVec) BlockVec {
var v1: BlockVec = undefined;
var v2: BlockVec = undefined;
var v3: BlockVec = undefined;
var v4: BlockVec = undefined;
fn drc256(comptime second: bool, comptime rc: u8, t: Repr, tx: Repr) Repr {
var v1: Repr = undefined;
var v2: Repr = undefined;
var v3: Repr = undefined;
var v4: Repr = undefined;
return asm (
\\ movi %[v2].4s, %[rc]
@ -226,7 +341,7 @@ fn KeySchedule(comptime Aes: type) type {
\\ eor %[v1].16b, %[v1].16b, %[v2].16b
\\ eor %[v1].16b, %[v1].16b, %[v3].16b
\\ eor %[r].16b, %[v1].16b, %[v4].16b
: [r] "=&x" (-> BlockVec),
: [r] "=&x" (-> Repr),
[v1] "=&x" (v1),
[v2] "=&x" (v2),
[v3] "=&x" (v3),
@ -276,7 +391,7 @@ fn KeySchedule(comptime Aes: type) type {
inv_round_keys[i] = Block{
.repr = asm (
\\ aesimc %[inv_rk].16b, %[rk].16b
: [inv_rk] "=x" (-> BlockVec),
: [inv_rk] "=x" (-> Repr),
: [rk] "x" (round_keys[rounds - i].repr),
),
};

View File

@ -2,16 +2,16 @@ const std = @import("../../std.zig");
const math = std.math;
const mem = std.mem;
const BlockVec = [4]u32;
const side_channels_mitigations = std.options.side_channels_mitigations;
/// A single AES block.
pub const Block = struct {
const Repr = [4]u32;
pub const block_length: usize = 16;
/// Internal representation of a block.
repr: BlockVec align(16),
repr: Repr align(16),
/// Convert a byte sequence into an internal representation.
pub inline fn fromBytes(bytes: *const [16]u8) Block {
@ -19,7 +19,7 @@ pub const Block = struct {
const s1 = mem.readInt(u32, bytes[4..8], .little);
const s2 = mem.readInt(u32, bytes[8..12], .little);
const s3 = mem.readInt(u32, bytes[12..16], .little);
return Block{ .repr = BlockVec{ s0, s1, s2, s3 } };
return Block{ .repr = Repr{ s0, s1, s2, s3 } };
}
/// Convert the internal representation of a block into a byte sequence.
@ -65,7 +65,7 @@ pub const Block = struct {
t2 ^= round_key.repr[2];
t3 ^= round_key.repr[3];
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
}
/// Encrypt a block with a round key *WITHOUT ANY PROTECTION AGAINST SIDE CHANNELS*
@ -110,7 +110,7 @@ pub const Block = struct {
t2 ^= round_key.repr[2];
t3 ^= round_key.repr[3];
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
}
/// Encrypt a block with the last round key.
@ -136,7 +136,7 @@ pub const Block = struct {
t2 ^= round_key.repr[2];
t3 ^= round_key.repr[3];
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
}
/// Decrypt a block with a round key.
@ -161,7 +161,7 @@ pub const Block = struct {
t2 ^= round_key.repr[2];
t3 ^= round_key.repr[3];
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
}
/// Decrypt a block with a round key *WITHOUT ANY PROTECTION AGAINST SIDE CHANNELS*
@ -206,7 +206,7 @@ pub const Block = struct {
t2 ^= round_key.repr[2];
t3 ^= round_key.repr[3];
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
}
/// Decrypt a block with the last round key.
@ -232,12 +232,12 @@ pub const Block = struct {
t2 ^= round_key.repr[2];
t3 ^= round_key.repr[3];
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
}
/// Apply the bitwise XOR operation to the content of two blocks.
pub inline fn xorBlocks(block1: Block, block2: Block) Block {
var x: BlockVec = undefined;
var x: Repr = undefined;
comptime var i = 0;
inline while (i < 4) : (i += 1) {
x[i] = block1.repr[i] ^ block2.repr[i];
@ -247,7 +247,7 @@ pub const Block = struct {
/// Apply the bitwise AND operation to the content of two blocks.
pub inline fn andBlocks(block1: Block, block2: Block) Block {
var x: BlockVec = undefined;
var x: Repr = undefined;
comptime var i = 0;
inline while (i < 4) : (i += 1) {
x[i] = block1.repr[i] & block2.repr[i];
@ -257,7 +257,7 @@ pub const Block = struct {
/// Apply the bitwise OR operation to the content of two blocks.
pub inline fn orBlocks(block1: Block, block2: Block) Block {
var x: BlockVec = undefined;
var x: Repr = undefined;
comptime var i = 0;
inline while (i < 4) : (i += 1) {
x[i] = block1.repr[i] | block2.repr[i];
@ -332,6 +332,118 @@ pub const Block = struct {
};
};
/// A fixed-size vector of AES blocks.
/// All operations are performed in parallel, using SIMD instructions when available.
pub fn BlockVec(comptime blocks_count: comptime_int) type {
return struct {
const Self = @This();
/// The number of AES blocks the target architecture can process with a single instruction.
pub const native_vector_size = 1;
/// The size of the AES block vector that the target architecture can process with a single instruction, in bytes.
pub const native_word_size = native_vector_size * 16;
const native_words = blocks_count;
/// Internal representation of a block vector.
repr: [native_words]Block,
/// Length of the block vector in bytes.
pub const block_length: usize = blocks_count * 16;
/// Convert a byte sequence into an internal representation.
pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self {
var out: Self = undefined;
for (0..native_words) |i| {
out.repr[i] = Block.fromBytes(bytes[i * native_word_size ..][0..native_word_size]);
}
return out;
}
/// Convert the internal representation of a block vector into a byte sequence.
pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 {
var out: [blocks_count * 16]u8 = undefined;
for (0..native_words) |i| {
out[i * native_word_size ..][0..native_word_size].* = block_vec.repr[i].toBytes();
}
return out;
}
/// XOR the block vector with a byte sequence.
pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [32]u8 {
var out: Self = undefined;
for (0..native_words) |i| {
out.repr[i] = block_vec.repr[i].xorBytes(bytes[i * native_word_size ..][0..native_word_size]);
}
return out;
}
/// Apply the forward AES operation to the block vector with a vector of round keys.
pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self {
var out: Self = undefined;
for (0..native_words) |i| {
out.repr[i] = block_vec.repr[i].encrypt(round_key_vec.repr[i]);
}
return out;
}
/// Apply the forward AES operation to the block vector with a vector of last round keys.
pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self {
var out: Self = undefined;
for (0..native_words) |i| {
out.repr[i] = block_vec.repr[i].encryptLast(round_key_vec.repr[i]);
}
return out;
}
/// Apply the inverse AES operation to the block vector with a vector of round keys.
pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self {
var out: Self = undefined;
for (0..native_words) |i| {
out.repr[i] = block_vec.repr[i].decrypt(inv_round_key_vec.repr[i]);
}
return out;
}
/// Apply the inverse AES operation to the block vector with a vector of last round keys.
pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self {
var out: Self = undefined;
for (0..native_words) |i| {
out.repr[i] = block_vec.repr[i].decryptLast(inv_round_key_vec.repr[i]);
}
return out;
}
/// Apply the bitwise XOR operation to the content of two block vectors.
pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self {
var out: Self = undefined;
for (0..native_words) |i| {
out.repr[i] = block_vec1.repr[i].xorBlocks(block_vec2.repr[i]);
}
return out;
}
/// Apply the bitwise AND operation to the content of two block vectors.
pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self {
var out: Self = undefined;
for (0..native_words) |i| {
out.repr[i] = block_vec1.repr[i].andBlocks(block_vec2.repr[i]);
}
return out;
}
/// Apply the bitwise OR operation to the content of two block vectors.
pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self {
var out: Self = undefined;
for (0..native_words) |i| {
out.repr[i] = block_vec1.repr[i].orBlocks(block_vec2.repr[i]);
}
return out;
}
};
}
fn KeySchedule(comptime Aes: type) type {
std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14);
const key_length = Aes.key_bits / 8;
@ -671,7 +783,7 @@ fn mul(a: u8, b: u8) u8 {
const cache_line_bytes = std.atomic.cache_line;
inline fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u8 {
fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u8 {
if (side_channels_mitigations == .none) {
return [4]u8{
sbox[idx0],
@ -709,7 +821,7 @@ inline fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2:
}
}
inline fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u32 {
fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u32 {
if (side_channels_mitigations == .none) {
return [4]u32{
table[0][idx0],
@ -718,17 +830,18 @@ inline fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8,
table[3][idx3],
};
} else {
const table_len: usize = 256;
const stride = switch (side_channels_mitigations) {
.none => unreachable,
.basic => table[0].len / 4,
.medium => @max(1, @min(table[0].len, 2 * cache_line_bytes / 4)),
.full => @max(1, @min(table[0].len, cache_line_bytes / 4)),
.basic => table_len / 4,
.medium => @max(1, @min(table_len, 2 * cache_line_bytes / 4)),
.full => @max(1, @min(table_len, cache_line_bytes / 4)),
};
const of0 = idx0 % stride;
const of1 = idx1 % stride;
const of2 = idx2 % stride;
const of3 = idx3 % stride;
var t: [4][table[0].len / stride]u32 align(64) = undefined;
var t: [4][table_len / stride]u32 align(64) = undefined;
var i: usize = 0;
while (i < t[0].len) : (i += 1) {
const tx = table[0][i * stride ..];

View File

@ -72,6 +72,10 @@ const macs = [_]Crypto{
Crypto{ .ty = crypto.auth.siphash.SipHash64(1, 3), .name = "siphash-1-3" },
Crypto{ .ty = crypto.auth.siphash.SipHash128(2, 4), .name = "siphash128-2-4" },
Crypto{ .ty = crypto.auth.siphash.SipHash128(1, 3), .name = "siphash128-1-3" },
Crypto{ .ty = crypto.auth.aegis.Aegis128X4Mac, .name = "aegis-128x4 mac" },
Crypto{ .ty = crypto.auth.aegis.Aegis256X4Mac, .name = "aegis-256x4 mac" },
Crypto{ .ty = crypto.auth.aegis.Aegis128X2Mac, .name = "aegis-128x2 mac" },
Crypto{ .ty = crypto.auth.aegis.Aegis256X2Mac, .name = "aegis-256x2 mac" },
Crypto{ .ty = crypto.auth.aegis.Aegis128LMac, .name = "aegis-128l mac" },
Crypto{ .ty = crypto.auth.aegis.Aegis256Mac, .name = "aegis-256 mac" },
Crypto{ .ty = crypto.auth.cmac.CmacAes128, .name = "aes-cmac" },
@ -283,7 +287,11 @@ const aeads = [_]Crypto{
Crypto{ .ty = crypto.aead.chacha_poly.XChaCha20Poly1305, .name = "xchacha20Poly1305" },
Crypto{ .ty = crypto.aead.chacha_poly.XChaCha8Poly1305, .name = "xchacha8Poly1305" },
Crypto{ .ty = crypto.aead.salsa_poly.XSalsa20Poly1305, .name = "xsalsa20Poly1305" },
Crypto{ .ty = crypto.aead.aegis.Aegis128X4, .name = "aegis-128x4" },
Crypto{ .ty = crypto.aead.aegis.Aegis128X2, .name = "aegis-128x2" },
Crypto{ .ty = crypto.aead.aegis.Aegis128L, .name = "aegis-128l" },
Crypto{ .ty = crypto.aead.aegis.Aegis256X4, .name = "aegis-256x4" },
Crypto{ .ty = crypto.aead.aegis.Aegis256X2, .name = "aegis-256x2" },
Crypto{ .ty = crypto.aead.aegis.Aegis256, .name = "aegis-256" },
Crypto{ .ty = crypto.aead.aes_gcm.Aes128Gcm, .name = "aes128-gcm" },
Crypto{ .ty = crypto.aead.aes_gcm.Aes256Gcm, .name = "aes256-gcm" },