mirror of
https://github.com/ziglang/zig.git
synced 2024-11-30 00:52:52 +00:00
std.crypto.aes: introduce AES block vectors (#22023)
* std.crypto.aes: introduce AES block vectors Modern Intel CPUs with the VAES extension can handle more than a single AES block per instruction. So can some ARM and RISC-V CPUs. Software implementations with bitslicing can also greatly benefit from this. Implement low-level operations on AES block vectors, and the parallel AEGIS variants on top of them. AMD Zen4: aegis-128x4: 73225 MiB/s aegis-128x2: 51571 MiB/s aegis-128l: 25806 MiB/s aegis-256x4: 46742 MiB/s aegis-256x2: 30227 MiB/s aegis-256: 8436 MiB/s aes128-gcm: 5926 MiB/s aes256-gcm: 5085 MiB/s AES-GCM, and anything based on AES-CTR are also going to benefit from this later. * Make AEGIS-MAC twice a fast
This commit is contained in:
parent
f845fa04a0
commit
636308a17d
@ -7,10 +7,23 @@ pub const timing_safe = @import("crypto/timing_safe.zig");
|
||||
/// Authenticated Encryption with Associated Data
|
||||
pub const aead = struct {
|
||||
pub const aegis = struct {
|
||||
pub const Aegis128L = @import("crypto/aegis.zig").Aegis128L;
|
||||
pub const Aegis128L_256 = @import("crypto/aegis.zig").Aegis128L_256;
|
||||
pub const Aegis256 = @import("crypto/aegis.zig").Aegis256;
|
||||
pub const Aegis256_256 = @import("crypto/aegis.zig").Aegis256_256;
|
||||
const variants = @import("crypto/aegis.zig");
|
||||
|
||||
pub const Aegis128X4 = variants.Aegis128X4;
|
||||
pub const Aegis128X2 = variants.Aegis128X2;
|
||||
pub const Aegis128L = variants.Aegis128L;
|
||||
|
||||
pub const Aegis256X4 = variants.Aegis256X4;
|
||||
pub const Aegis256X2 = variants.Aegis256X2;
|
||||
pub const Aegis256 = variants.Aegis256;
|
||||
|
||||
pub const Aegis128X4_256 = variants.Aegis128X4_256;
|
||||
pub const Aegis128X2_256 = variants.Aegis128X2_256;
|
||||
pub const Aegis128L_256 = variants.Aegis128L_256;
|
||||
|
||||
pub const Aegis256X4_256 = variants.Aegis256X4_256;
|
||||
pub const Aegis256X2_256 = variants.Aegis256X2_256;
|
||||
pub const Aegis256_256 = variants.Aegis256_256;
|
||||
};
|
||||
|
||||
pub const aes_gcm = struct {
|
||||
@ -44,10 +57,22 @@ pub const auth = struct {
|
||||
pub const hmac = @import("crypto/hmac.zig");
|
||||
pub const siphash = @import("crypto/siphash.zig");
|
||||
pub const aegis = struct {
|
||||
pub const Aegis128LMac = @import("crypto/aegis.zig").Aegis128LMac;
|
||||
pub const Aegis128LMac_128 = @import("crypto/aegis.zig").Aegis128LMac_128;
|
||||
pub const Aegis256Mac = @import("crypto/aegis.zig").Aegis256Mac;
|
||||
pub const Aegis256Mac_128 = @import("crypto/aegis.zig").Aegis256Mac_128;
|
||||
const variants = @import("crypto/aegis.zig");
|
||||
pub const Aegis128X4Mac = variants.Aegis128X4Mac;
|
||||
pub const Aegis128X2Mac = variants.Aegis128X2Mac;
|
||||
pub const Aegis128LMac = variants.Aegis128LMac;
|
||||
|
||||
pub const Aegis256X4Mac = variants.Aegis256X4Mac;
|
||||
pub const Aegis256X2Mac = variants.Aegis256X2Mac;
|
||||
pub const Aegis256Mac = variants.Aegis256Mac;
|
||||
|
||||
pub const Aegis128X4Mac_128 = variants.Aegis128X4Mac_128;
|
||||
pub const Aegis128X2Mac_128 = variants.Aegis128X2Mac_128;
|
||||
pub const Aegis128LMac_128 = variants.Aegis128LMac_128;
|
||||
|
||||
pub const Aegis256X4Mac_128 = variants.Aegis256X4Mac_128;
|
||||
pub const Aegis256X2Mac_128 = variants.Aegis256X2Mac_128;
|
||||
pub const Aegis256Mac_128 = variants.Aegis256Mac_128;
|
||||
};
|
||||
pub const cmac = @import("crypto/cmac.zig");
|
||||
};
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -22,6 +22,7 @@ pub const has_hardware_support =
|
||||
(builtin.cpu.arch == .aarch64 and has_armaes);
|
||||
|
||||
pub const Block = impl.Block;
|
||||
pub const BlockVec = impl.BlockVec;
|
||||
pub const AesEncryptCtx = impl.AesEncryptCtx;
|
||||
pub const AesDecryptCtx = impl.AesDecryptCtx;
|
||||
pub const Aes128 = impl.Aes128;
|
||||
|
@ -2,18 +2,23 @@ const std = @import("../../std.zig");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
const debug = std.debug;
|
||||
const BlockVec = @Vector(2, u64);
|
||||
|
||||
const has_vaes = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .vaes);
|
||||
const has_avx512f = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
|
||||
|
||||
/// A single AES block.
|
||||
pub const Block = struct {
|
||||
const Repr = @Vector(2, u64);
|
||||
|
||||
/// The length of an AES block in bytes.
|
||||
pub const block_length: usize = 16;
|
||||
|
||||
/// Internal representation of a block.
|
||||
repr: BlockVec,
|
||||
repr: Repr,
|
||||
|
||||
/// Convert a byte sequence into an internal representation.
|
||||
pub inline fn fromBytes(bytes: *const [16]u8) Block {
|
||||
const repr = mem.bytesToValue(BlockVec, bytes);
|
||||
const repr = mem.bytesToValue(Repr, bytes);
|
||||
return Block{ .repr = repr };
|
||||
}
|
||||
|
||||
@ -33,7 +38,7 @@ pub const Block = struct {
|
||||
return Block{
|
||||
.repr = asm (
|
||||
\\ vaesenc %[rk], %[in], %[out]
|
||||
: [out] "=x" (-> BlockVec),
|
||||
: [out] "=x" (-> Repr),
|
||||
: [in] "x" (block.repr),
|
||||
[rk] "x" (round_key.repr),
|
||||
),
|
||||
@ -45,7 +50,7 @@ pub const Block = struct {
|
||||
return Block{
|
||||
.repr = asm (
|
||||
\\ vaesenclast %[rk], %[in], %[out]
|
||||
: [out] "=x" (-> BlockVec),
|
||||
: [out] "=x" (-> Repr),
|
||||
: [in] "x" (block.repr),
|
||||
[rk] "x" (round_key.repr),
|
||||
),
|
||||
@ -57,7 +62,7 @@ pub const Block = struct {
|
||||
return Block{
|
||||
.repr = asm (
|
||||
\\ vaesdec %[rk], %[in], %[out]
|
||||
: [out] "=x" (-> BlockVec),
|
||||
: [out] "=x" (-> Repr),
|
||||
: [in] "x" (block.repr),
|
||||
[rk] "x" (inv_round_key.repr),
|
||||
),
|
||||
@ -69,7 +74,7 @@ pub const Block = struct {
|
||||
return Block{
|
||||
.repr = asm (
|
||||
\\ vaesdeclast %[rk], %[in], %[out]
|
||||
: [out] "=x" (-> BlockVec),
|
||||
: [out] "=x" (-> Repr),
|
||||
: [in] "x" (block.repr),
|
||||
[rk] "x" (inv_round_key.repr),
|
||||
),
|
||||
@ -168,17 +173,158 @@ pub const Block = struct {
|
||||
};
|
||||
};
|
||||
|
||||
/// A fixed-size vector of AES blocks.
|
||||
/// All operations are performed in parallel, using SIMD instructions when available.
|
||||
pub fn BlockVec(comptime blocks_count: comptime_int) type {
|
||||
return struct {
|
||||
const Self = @This();
|
||||
|
||||
/// The number of AES blocks the target architecture can process with a single instruction.
|
||||
pub const native_vector_size = w: {
|
||||
if (has_avx512f and blocks_count % 4 == 0) break :w 4;
|
||||
if (has_vaes and blocks_count % 2 == 0) break :w 2;
|
||||
break :w 1;
|
||||
};
|
||||
|
||||
/// The size of the AES block vector that the target architecture can process with a single instruction, in bytes.
|
||||
pub const native_word_size = native_vector_size * 16;
|
||||
|
||||
const native_words = blocks_count / native_vector_size;
|
||||
|
||||
const Repr = @Vector(native_vector_size * 2, u64);
|
||||
|
||||
/// Internal representation of a block vector.
|
||||
repr: [native_words]Repr,
|
||||
|
||||
/// Length of the block vector in bytes.
|
||||
pub const block_length: usize = blocks_count * 16;
|
||||
|
||||
/// Convert a byte sequence into an internal representation.
|
||||
pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = mem.bytesToValue(Repr, bytes[i * native_word_size ..][0..native_word_size]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Convert the internal representation of a block vector into a byte sequence.
|
||||
pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 {
|
||||
var out: [blocks_count * 16]u8 = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out[i * native_word_size ..][0..native_word_size].* = mem.toBytes(block_vec.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// XOR the block vector with a byte sequence.
|
||||
pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [blocks_count * 16]u8 {
|
||||
var x: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
x.repr[i] = block_vec.repr[i] ^ mem.bytesToValue(Repr, bytes[i * native_word_size ..][0..native_word_size]);
|
||||
}
|
||||
return x.toBytes();
|
||||
}
|
||||
|
||||
/// Apply the forward AES operation to the block vector with a vector of round keys.
|
||||
pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = asm (
|
||||
\\ vaesenc %[rk], %[in], %[out]
|
||||
: [out] "=x" (-> Repr),
|
||||
: [in] "x" (block_vec.repr[i]),
|
||||
[rk] "x" (round_key_vec.repr[i]),
|
||||
);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the forward AES operation to the block vector with a vector of last round keys.
|
||||
pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = asm (
|
||||
\\ vaesenclast %[rk], %[in], %[out]
|
||||
: [out] "=x" (-> Repr),
|
||||
: [in] "x" (block_vec.repr[i]),
|
||||
[rk] "x" (round_key_vec.repr[i]),
|
||||
);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the inverse AES operation to the block vector with a vector of round keys.
|
||||
pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = asm (
|
||||
\\ vaesdec %[rk], %[in], %[out]
|
||||
: [out] "=x" (-> Repr),
|
||||
: [in] "x" (block_vec.repr[i]),
|
||||
[rk] "x" (inv_round_key_vec.repr[i]),
|
||||
);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the inverse AES operation to the block vector with a vector of last round keys.
|
||||
pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = asm (
|
||||
\\ vaesdeclast %[rk], %[in], %[out]
|
||||
: [out] "=x" (-> Repr),
|
||||
: [in] "x" (block_vec.repr[i]),
|
||||
[rk] "x" (inv_round_key_vec.repr[i]),
|
||||
);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the bitwise XOR operation to the content of two block vectors.
|
||||
pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec1.repr[i] ^ block_vec2.repr[i];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the bitwise AND operation to the content of two block vectors.
|
||||
pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec1.repr[i] & block_vec2.repr[i];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the bitwise OR operation to the content of two block vectors.
|
||||
pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec1.repr[i] | block_vec2.repr[i];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn KeySchedule(comptime Aes: type) type {
|
||||
std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14);
|
||||
const rounds = Aes.rounds;
|
||||
|
||||
return struct {
|
||||
const Self = @This();
|
||||
|
||||
const Repr = Aes.block.Repr;
|
||||
|
||||
round_keys: [rounds + 1]Block,
|
||||
|
||||
fn drc(comptime second: bool, comptime rc: u8, t: BlockVec, tx: BlockVec) BlockVec {
|
||||
var s: BlockVec = undefined;
|
||||
var ts: BlockVec = undefined;
|
||||
fn drc(comptime second: bool, comptime rc: u8, t: Repr, tx: Repr) Repr {
|
||||
var s: Repr = undefined;
|
||||
var ts: Repr = undefined;
|
||||
return asm (
|
||||
\\ vaeskeygenassist %[rc], %[t], %[s]
|
||||
\\ vpslldq $4, %[tx], %[ts]
|
||||
@ -187,7 +333,7 @@ fn KeySchedule(comptime Aes: type) type {
|
||||
\\ vpxor %[ts], %[r], %[r]
|
||||
\\ vpshufd %[mask], %[s], %[ts]
|
||||
\\ vpxor %[ts], %[r], %[r]
|
||||
: [r] "=&x" (-> BlockVec),
|
||||
: [r] "=&x" (-> Repr),
|
||||
[s] "=&x" (s),
|
||||
[ts] "=&x" (ts),
|
||||
: [rc] "n" (rc),
|
||||
@ -234,7 +380,7 @@ fn KeySchedule(comptime Aes: type) type {
|
||||
inv_round_keys[i] = Block{
|
||||
.repr = asm (
|
||||
\\ vaesimc %[rk], %[inv_rk]
|
||||
: [inv_rk] "=x" (-> BlockVec),
|
||||
: [inv_rk] "=x" (-> Repr),
|
||||
: [rk] "x" (round_keys[rounds - i].repr),
|
||||
),
|
||||
};
|
||||
|
@ -1,18 +1,19 @@
|
||||
const std = @import("../../std.zig");
|
||||
const mem = std.mem;
|
||||
const debug = std.debug;
|
||||
const BlockVec = @Vector(2, u64);
|
||||
|
||||
/// A single AES block.
|
||||
pub const Block = struct {
|
||||
const Repr = @Vector(2, u64);
|
||||
|
||||
pub const block_length: usize = 16;
|
||||
|
||||
/// Internal representation of a block.
|
||||
repr: BlockVec,
|
||||
repr: Repr,
|
||||
|
||||
/// Convert a byte sequence into an internal representation.
|
||||
pub inline fn fromBytes(bytes: *const [16]u8) Block {
|
||||
const repr = mem.bytesToValue(BlockVec, bytes);
|
||||
const repr = mem.bytesToValue(Repr, bytes);
|
||||
return Block{ .repr = repr };
|
||||
}
|
||||
|
||||
@ -36,7 +37,7 @@ pub const Block = struct {
|
||||
\\ mov %[out].16b, %[in].16b
|
||||
\\ aese %[out].16b, %[zero].16b
|
||||
\\ aesmc %[out].16b, %[out].16b
|
||||
: [out] "=&x" (-> BlockVec),
|
||||
: [out] "=&x" (-> Repr),
|
||||
: [in] "x" (block.repr),
|
||||
[zero] "x" (zero),
|
||||
)) ^ round_key.repr,
|
||||
@ -49,7 +50,7 @@ pub const Block = struct {
|
||||
.repr = (asm (
|
||||
\\ mov %[out].16b, %[in].16b
|
||||
\\ aese %[out].16b, %[zero].16b
|
||||
: [out] "=&x" (-> BlockVec),
|
||||
: [out] "=&x" (-> Repr),
|
||||
: [in] "x" (block.repr),
|
||||
[zero] "x" (zero),
|
||||
)) ^ round_key.repr,
|
||||
@ -63,7 +64,7 @@ pub const Block = struct {
|
||||
\\ mov %[out].16b, %[in].16b
|
||||
\\ aesd %[out].16b, %[zero].16b
|
||||
\\ aesimc %[out].16b, %[out].16b
|
||||
: [out] "=&x" (-> BlockVec),
|
||||
: [out] "=&x" (-> Repr),
|
||||
: [in] "x" (block.repr),
|
||||
[zero] "x" (zero),
|
||||
)) ^ inv_round_key.repr,
|
||||
@ -76,7 +77,7 @@ pub const Block = struct {
|
||||
.repr = (asm (
|
||||
\\ mov %[out].16b, %[in].16b
|
||||
\\ aesd %[out].16b, %[zero].16b
|
||||
: [out] "=&x" (-> BlockVec),
|
||||
: [out] "=&x" (-> Repr),
|
||||
: [in] "x" (block.repr),
|
||||
[zero] "x" (zero),
|
||||
)) ^ inv_round_key.repr,
|
||||
@ -165,6 +166,118 @@ pub const Block = struct {
|
||||
};
|
||||
};
|
||||
|
||||
/// A fixed-size vector of AES blocks.
|
||||
/// All operations are performed in parallel, using SIMD instructions when available.
|
||||
pub fn BlockVec(comptime blocks_count: comptime_int) type {
|
||||
return struct {
|
||||
const Self = @This();
|
||||
|
||||
/// The number of AES blocks the target architecture can process with a single instruction.
|
||||
pub const native_vector_size = 1;
|
||||
|
||||
/// The size of the AES block vector that the target architecture can process with a single instruction, in bytes.
|
||||
pub const native_word_size = native_vector_size * 16;
|
||||
|
||||
const native_words = blocks_count;
|
||||
|
||||
/// Internal representation of a block vector.
|
||||
repr: [native_words]Block,
|
||||
|
||||
/// Length of the block vector in bytes.
|
||||
pub const block_length: usize = blocks_count * 16;
|
||||
|
||||
/// Convert a byte sequence into an internal representation.
|
||||
pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = Block.fromBytes(bytes[i * native_word_size ..][0..native_word_size]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Convert the internal representation of a block vector into a byte sequence.
|
||||
pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 {
|
||||
var out: [blocks_count * 16]u8 = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out[i * native_word_size ..][0..native_word_size].* = block_vec.repr[i].toBytes();
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// XOR the block vector with a byte sequence.
|
||||
pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [32]u8 {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec.repr[i].xorBytes(bytes[i * native_word_size ..][0..native_word_size]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the forward AES operation to the block vector with a vector of round keys.
|
||||
pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec.repr[i].encrypt(round_key_vec.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the forward AES operation to the block vector with a vector of last round keys.
|
||||
pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec.repr[i].encryptLast(round_key_vec.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the inverse AES operation to the block vector with a vector of round keys.
|
||||
pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec.repr[i].decrypt(inv_round_key_vec.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the inverse AES operation to the block vector with a vector of last round keys.
|
||||
pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec.repr[i].decryptLast(inv_round_key_vec.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the bitwise XOR operation to the content of two block vectors.
|
||||
pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec1.repr[i].xorBlocks(block_vec2.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the bitwise AND operation to the content of two block vectors.
|
||||
pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec1.repr[i].andBlocks(block_vec2.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the bitwise OR operation to the content of two block vectors.
|
||||
pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self {
|
||||
var out: Self = undefined;
|
||||
inline for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec1.repr[i].orBlocks(block_vec2.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn KeySchedule(comptime Aes: type) type {
|
||||
std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14);
|
||||
const rounds = Aes.rounds;
|
||||
@ -172,17 +285,19 @@ fn KeySchedule(comptime Aes: type) type {
|
||||
return struct {
|
||||
const Self = @This();
|
||||
|
||||
const Repr = Aes.block.Repr;
|
||||
|
||||
const zero = @Vector(2, u64){ 0, 0 };
|
||||
const mask1 = @Vector(16, u8){ 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 };
|
||||
const mask2 = @Vector(16, u8){ 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 };
|
||||
|
||||
round_keys: [rounds + 1]Block,
|
||||
|
||||
fn drc128(comptime rc: u8, t: BlockVec) BlockVec {
|
||||
var v1: BlockVec = undefined;
|
||||
var v2: BlockVec = undefined;
|
||||
var v3: BlockVec = undefined;
|
||||
var v4: BlockVec = undefined;
|
||||
fn drc128(comptime rc: u8, t: Repr) Repr {
|
||||
var v1: Repr = undefined;
|
||||
var v2: Repr = undefined;
|
||||
var v3: Repr = undefined;
|
||||
var v4: Repr = undefined;
|
||||
|
||||
return asm (
|
||||
\\ movi %[v2].4s, %[rc]
|
||||
@ -196,7 +311,7 @@ fn KeySchedule(comptime Aes: type) type {
|
||||
\\ eor %[v1].16b, %[v1].16b, %[r].16b
|
||||
\\ eor %[r].16b, %[v1].16b, %[v3].16b
|
||||
\\ eor %[r].16b, %[r].16b, %[v4].16b
|
||||
: [r] "=&x" (-> BlockVec),
|
||||
: [r] "=&x" (-> Repr),
|
||||
[v1] "=&x" (v1),
|
||||
[v2] "=&x" (v2),
|
||||
[v3] "=&x" (v3),
|
||||
@ -208,11 +323,11 @@ fn KeySchedule(comptime Aes: type) type {
|
||||
);
|
||||
}
|
||||
|
||||
fn drc256(comptime second: bool, comptime rc: u8, t: BlockVec, tx: BlockVec) BlockVec {
|
||||
var v1: BlockVec = undefined;
|
||||
var v2: BlockVec = undefined;
|
||||
var v3: BlockVec = undefined;
|
||||
var v4: BlockVec = undefined;
|
||||
fn drc256(comptime second: bool, comptime rc: u8, t: Repr, tx: Repr) Repr {
|
||||
var v1: Repr = undefined;
|
||||
var v2: Repr = undefined;
|
||||
var v3: Repr = undefined;
|
||||
var v4: Repr = undefined;
|
||||
|
||||
return asm (
|
||||
\\ movi %[v2].4s, %[rc]
|
||||
@ -226,7 +341,7 @@ fn KeySchedule(comptime Aes: type) type {
|
||||
\\ eor %[v1].16b, %[v1].16b, %[v2].16b
|
||||
\\ eor %[v1].16b, %[v1].16b, %[v3].16b
|
||||
\\ eor %[r].16b, %[v1].16b, %[v4].16b
|
||||
: [r] "=&x" (-> BlockVec),
|
||||
: [r] "=&x" (-> Repr),
|
||||
[v1] "=&x" (v1),
|
||||
[v2] "=&x" (v2),
|
||||
[v3] "=&x" (v3),
|
||||
@ -276,7 +391,7 @@ fn KeySchedule(comptime Aes: type) type {
|
||||
inv_round_keys[i] = Block{
|
||||
.repr = asm (
|
||||
\\ aesimc %[inv_rk].16b, %[rk].16b
|
||||
: [inv_rk] "=x" (-> BlockVec),
|
||||
: [inv_rk] "=x" (-> Repr),
|
||||
: [rk] "x" (round_keys[rounds - i].repr),
|
||||
),
|
||||
};
|
||||
|
@ -2,16 +2,16 @@ const std = @import("../../std.zig");
|
||||
const math = std.math;
|
||||
const mem = std.mem;
|
||||
|
||||
const BlockVec = [4]u32;
|
||||
|
||||
const side_channels_mitigations = std.options.side_channels_mitigations;
|
||||
|
||||
/// A single AES block.
|
||||
pub const Block = struct {
|
||||
const Repr = [4]u32;
|
||||
|
||||
pub const block_length: usize = 16;
|
||||
|
||||
/// Internal representation of a block.
|
||||
repr: BlockVec align(16),
|
||||
repr: Repr align(16),
|
||||
|
||||
/// Convert a byte sequence into an internal representation.
|
||||
pub inline fn fromBytes(bytes: *const [16]u8) Block {
|
||||
@ -19,7 +19,7 @@ pub const Block = struct {
|
||||
const s1 = mem.readInt(u32, bytes[4..8], .little);
|
||||
const s2 = mem.readInt(u32, bytes[8..12], .little);
|
||||
const s3 = mem.readInt(u32, bytes[12..16], .little);
|
||||
return Block{ .repr = BlockVec{ s0, s1, s2, s3 } };
|
||||
return Block{ .repr = Repr{ s0, s1, s2, s3 } };
|
||||
}
|
||||
|
||||
/// Convert the internal representation of a block into a byte sequence.
|
||||
@ -65,7 +65,7 @@ pub const Block = struct {
|
||||
t2 ^= round_key.repr[2];
|
||||
t3 ^= round_key.repr[3];
|
||||
|
||||
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
|
||||
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
|
||||
}
|
||||
|
||||
/// Encrypt a block with a round key *WITHOUT ANY PROTECTION AGAINST SIDE CHANNELS*
|
||||
@ -110,7 +110,7 @@ pub const Block = struct {
|
||||
t2 ^= round_key.repr[2];
|
||||
t3 ^= round_key.repr[3];
|
||||
|
||||
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
|
||||
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
|
||||
}
|
||||
|
||||
/// Encrypt a block with the last round key.
|
||||
@ -136,7 +136,7 @@ pub const Block = struct {
|
||||
t2 ^= round_key.repr[2];
|
||||
t3 ^= round_key.repr[3];
|
||||
|
||||
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
|
||||
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
|
||||
}
|
||||
|
||||
/// Decrypt a block with a round key.
|
||||
@ -161,7 +161,7 @@ pub const Block = struct {
|
||||
t2 ^= round_key.repr[2];
|
||||
t3 ^= round_key.repr[3];
|
||||
|
||||
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
|
||||
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
|
||||
}
|
||||
|
||||
/// Decrypt a block with a round key *WITHOUT ANY PROTECTION AGAINST SIDE CHANNELS*
|
||||
@ -206,7 +206,7 @@ pub const Block = struct {
|
||||
t2 ^= round_key.repr[2];
|
||||
t3 ^= round_key.repr[3];
|
||||
|
||||
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
|
||||
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
|
||||
}
|
||||
|
||||
/// Decrypt a block with the last round key.
|
||||
@ -232,12 +232,12 @@ pub const Block = struct {
|
||||
t2 ^= round_key.repr[2];
|
||||
t3 ^= round_key.repr[3];
|
||||
|
||||
return Block{ .repr = BlockVec{ t0, t1, t2, t3 } };
|
||||
return Block{ .repr = Repr{ t0, t1, t2, t3 } };
|
||||
}
|
||||
|
||||
/// Apply the bitwise XOR operation to the content of two blocks.
|
||||
pub inline fn xorBlocks(block1: Block, block2: Block) Block {
|
||||
var x: BlockVec = undefined;
|
||||
var x: Repr = undefined;
|
||||
comptime var i = 0;
|
||||
inline while (i < 4) : (i += 1) {
|
||||
x[i] = block1.repr[i] ^ block2.repr[i];
|
||||
@ -247,7 +247,7 @@ pub const Block = struct {
|
||||
|
||||
/// Apply the bitwise AND operation to the content of two blocks.
|
||||
pub inline fn andBlocks(block1: Block, block2: Block) Block {
|
||||
var x: BlockVec = undefined;
|
||||
var x: Repr = undefined;
|
||||
comptime var i = 0;
|
||||
inline while (i < 4) : (i += 1) {
|
||||
x[i] = block1.repr[i] & block2.repr[i];
|
||||
@ -257,7 +257,7 @@ pub const Block = struct {
|
||||
|
||||
/// Apply the bitwise OR operation to the content of two blocks.
|
||||
pub inline fn orBlocks(block1: Block, block2: Block) Block {
|
||||
var x: BlockVec = undefined;
|
||||
var x: Repr = undefined;
|
||||
comptime var i = 0;
|
||||
inline while (i < 4) : (i += 1) {
|
||||
x[i] = block1.repr[i] | block2.repr[i];
|
||||
@ -332,6 +332,118 @@ pub const Block = struct {
|
||||
};
|
||||
};
|
||||
|
||||
/// A fixed-size vector of AES blocks.
|
||||
/// All operations are performed in parallel, using SIMD instructions when available.
|
||||
pub fn BlockVec(comptime blocks_count: comptime_int) type {
|
||||
return struct {
|
||||
const Self = @This();
|
||||
|
||||
/// The number of AES blocks the target architecture can process with a single instruction.
|
||||
pub const native_vector_size = 1;
|
||||
|
||||
/// The size of the AES block vector that the target architecture can process with a single instruction, in bytes.
|
||||
pub const native_word_size = native_vector_size * 16;
|
||||
|
||||
const native_words = blocks_count;
|
||||
|
||||
/// Internal representation of a block vector.
|
||||
repr: [native_words]Block,
|
||||
|
||||
/// Length of the block vector in bytes.
|
||||
pub const block_length: usize = blocks_count * 16;
|
||||
|
||||
/// Convert a byte sequence into an internal representation.
|
||||
pub inline fn fromBytes(bytes: *const [blocks_count * 16]u8) Self {
|
||||
var out: Self = undefined;
|
||||
for (0..native_words) |i| {
|
||||
out.repr[i] = Block.fromBytes(bytes[i * native_word_size ..][0..native_word_size]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Convert the internal representation of a block vector into a byte sequence.
|
||||
pub inline fn toBytes(block_vec: Self) [blocks_count * 16]u8 {
|
||||
var out: [blocks_count * 16]u8 = undefined;
|
||||
for (0..native_words) |i| {
|
||||
out[i * native_word_size ..][0..native_word_size].* = block_vec.repr[i].toBytes();
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// XOR the block vector with a byte sequence.
|
||||
pub inline fn xorBytes(block_vec: Self, bytes: *const [blocks_count * 16]u8) [32]u8 {
|
||||
var out: Self = undefined;
|
||||
for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec.repr[i].xorBytes(bytes[i * native_word_size ..][0..native_word_size]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the forward AES operation to the block vector with a vector of round keys.
|
||||
pub inline fn encrypt(block_vec: Self, round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec.repr[i].encrypt(round_key_vec.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the forward AES operation to the block vector with a vector of last round keys.
|
||||
pub inline fn encryptLast(block_vec: Self, round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec.repr[i].encryptLast(round_key_vec.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the inverse AES operation to the block vector with a vector of round keys.
|
||||
pub inline fn decrypt(block_vec: Self, inv_round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec.repr[i].decrypt(inv_round_key_vec.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the inverse AES operation to the block vector with a vector of last round keys.
|
||||
pub inline fn decryptLast(block_vec: Self, inv_round_key_vec: Self) Self {
|
||||
var out: Self = undefined;
|
||||
for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec.repr[i].decryptLast(inv_round_key_vec.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the bitwise XOR operation to the content of two block vectors.
|
||||
pub inline fn xorBlocks(block_vec1: Self, block_vec2: Self) Self {
|
||||
var out: Self = undefined;
|
||||
for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec1.repr[i].xorBlocks(block_vec2.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the bitwise AND operation to the content of two block vectors.
|
||||
pub inline fn andBlocks(block_vec1: Self, block_vec2: Self) Self {
|
||||
var out: Self = undefined;
|
||||
for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec1.repr[i].andBlocks(block_vec2.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Apply the bitwise OR operation to the content of two block vectors.
|
||||
pub inline fn orBlocks(block_vec1: Self, block_vec2: Block) Self {
|
||||
var out: Self = undefined;
|
||||
for (0..native_words) |i| {
|
||||
out.repr[i] = block_vec1.repr[i].orBlocks(block_vec2.repr[i]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn KeySchedule(comptime Aes: type) type {
|
||||
std.debug.assert(Aes.rounds == 10 or Aes.rounds == 14);
|
||||
const key_length = Aes.key_bits / 8;
|
||||
@ -671,7 +783,7 @@ fn mul(a: u8, b: u8) u8 {
|
||||
|
||||
const cache_line_bytes = std.atomic.cache_line;
|
||||
|
||||
inline fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u8 {
|
||||
fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u8 {
|
||||
if (side_channels_mitigations == .none) {
|
||||
return [4]u8{
|
||||
sbox[idx0],
|
||||
@ -709,7 +821,7 @@ inline fn sbox_lookup(sbox: *align(64) const [256]u8, idx0: u8, idx1: u8, idx2:
|
||||
}
|
||||
}
|
||||
|
||||
inline fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u32 {
|
||||
fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8, idx2: u8, idx3: u8) [4]u32 {
|
||||
if (side_channels_mitigations == .none) {
|
||||
return [4]u32{
|
||||
table[0][idx0],
|
||||
@ -718,17 +830,18 @@ inline fn table_lookup(table: *align(64) const [4][256]u32, idx0: u8, idx1: u8,
|
||||
table[3][idx3],
|
||||
};
|
||||
} else {
|
||||
const table_len: usize = 256;
|
||||
const stride = switch (side_channels_mitigations) {
|
||||
.none => unreachable,
|
||||
.basic => table[0].len / 4,
|
||||
.medium => @max(1, @min(table[0].len, 2 * cache_line_bytes / 4)),
|
||||
.full => @max(1, @min(table[0].len, cache_line_bytes / 4)),
|
||||
.basic => table_len / 4,
|
||||
.medium => @max(1, @min(table_len, 2 * cache_line_bytes / 4)),
|
||||
.full => @max(1, @min(table_len, cache_line_bytes / 4)),
|
||||
};
|
||||
const of0 = idx0 % stride;
|
||||
const of1 = idx1 % stride;
|
||||
const of2 = idx2 % stride;
|
||||
const of3 = idx3 % stride;
|
||||
var t: [4][table[0].len / stride]u32 align(64) = undefined;
|
||||
var t: [4][table_len / stride]u32 align(64) = undefined;
|
||||
var i: usize = 0;
|
||||
while (i < t[0].len) : (i += 1) {
|
||||
const tx = table[0][i * stride ..];
|
||||
|
@ -72,6 +72,10 @@ const macs = [_]Crypto{
|
||||
Crypto{ .ty = crypto.auth.siphash.SipHash64(1, 3), .name = "siphash-1-3" },
|
||||
Crypto{ .ty = crypto.auth.siphash.SipHash128(2, 4), .name = "siphash128-2-4" },
|
||||
Crypto{ .ty = crypto.auth.siphash.SipHash128(1, 3), .name = "siphash128-1-3" },
|
||||
Crypto{ .ty = crypto.auth.aegis.Aegis128X4Mac, .name = "aegis-128x4 mac" },
|
||||
Crypto{ .ty = crypto.auth.aegis.Aegis256X4Mac, .name = "aegis-256x4 mac" },
|
||||
Crypto{ .ty = crypto.auth.aegis.Aegis128X2Mac, .name = "aegis-128x2 mac" },
|
||||
Crypto{ .ty = crypto.auth.aegis.Aegis256X2Mac, .name = "aegis-256x2 mac" },
|
||||
Crypto{ .ty = crypto.auth.aegis.Aegis128LMac, .name = "aegis-128l mac" },
|
||||
Crypto{ .ty = crypto.auth.aegis.Aegis256Mac, .name = "aegis-256 mac" },
|
||||
Crypto{ .ty = crypto.auth.cmac.CmacAes128, .name = "aes-cmac" },
|
||||
@ -283,7 +287,11 @@ const aeads = [_]Crypto{
|
||||
Crypto{ .ty = crypto.aead.chacha_poly.XChaCha20Poly1305, .name = "xchacha20Poly1305" },
|
||||
Crypto{ .ty = crypto.aead.chacha_poly.XChaCha8Poly1305, .name = "xchacha8Poly1305" },
|
||||
Crypto{ .ty = crypto.aead.salsa_poly.XSalsa20Poly1305, .name = "xsalsa20Poly1305" },
|
||||
Crypto{ .ty = crypto.aead.aegis.Aegis128X4, .name = "aegis-128x4" },
|
||||
Crypto{ .ty = crypto.aead.aegis.Aegis128X2, .name = "aegis-128x2" },
|
||||
Crypto{ .ty = crypto.aead.aegis.Aegis128L, .name = "aegis-128l" },
|
||||
Crypto{ .ty = crypto.aead.aegis.Aegis256X4, .name = "aegis-256x4" },
|
||||
Crypto{ .ty = crypto.aead.aegis.Aegis256X2, .name = "aegis-256x2" },
|
||||
Crypto{ .ty = crypto.aead.aegis.Aegis256, .name = "aegis-256" },
|
||||
Crypto{ .ty = crypto.aead.aes_gcm.Aes128Gcm, .name = "aes128-gcm" },
|
||||
Crypto{ .ty = crypto.aead.aes_gcm.Aes256Gcm, .name = "aes256-gcm" },
|
||||
|
Loading…
Reference in New Issue
Block a user