^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0 OR MIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <crypto/algapi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <crypto/internal/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <crypto/internal/poly1305.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <crypto/internal/simd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/crypto.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/jump_label.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/sizes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <asm/intel-family.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <asm/simd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) asmlinkage void poly1305_init_x86_64(void *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) const u8 key[POLY1305_BLOCK_SIZE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) const size_t len, const u32 padbit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) const u32 nonce[4]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) const u32 nonce[4]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) const u32 padbit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) const u32 padbit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) const size_t len, const u32 padbit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) struct poly1305_arch_internal {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) u32 h[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) u32 is_base2_26;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) u64 hs[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) u64 r[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) u64 pad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) struct { u32 r2, r1, r4, r3; } rn[9];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) /* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * the unfortunate situation of using AVX and then having to go back to scalar
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * -- because the user is silly and has called the update function from two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * separate contexts -- then we need to convert back to the original base before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * proceeding. It is possible to reason that the initial reduction below is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * sufficient given the implementation invariants. However, for an avoidance of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * doubt and because this is not performance critical, we do the full reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) static void convert_to_base2_64(void *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) struct poly1305_arch_internal *state = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) u32 cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) if (!state->is_base2_26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) state->hs[2] = state->h[4] >> 24;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) state->hs[2] &= 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) state->hs[0] += cy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) state->hs[1] += (cy = ULT(state->hs[0], cy));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) state->hs[2] += ULT(state->hs[1], cy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #undef ULT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) state->is_base2_26 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) poly1305_init_x86_64(ctx, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) const u32 padbit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) struct poly1305_arch_internal *state = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) /* SIMD disables preemption, so relax after processing each page. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) SZ_4K % POLY1305_BLOCK_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) if (!static_branch_likely(&poly1305_use_avx) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) !crypto_simd_usable()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) convert_to_base2_64(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) poly1305_blocks_x86_64(ctx, inp, len, padbit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) const size_t bytes = min_t(size_t, len, SZ_4K);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) poly1305_blocks_avx512(ctx, inp, bytes, padbit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) else if (static_branch_likely(&poly1305_use_avx2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) poly1305_blocks_avx2(ctx, inp, bytes, padbit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) poly1305_blocks_avx(ctx, inp, bytes, padbit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) len -= bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) inp += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) } while (len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) const u32 nonce[4])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (!static_branch_likely(&poly1305_use_avx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) poly1305_emit_x86_64(ctx, mac, nonce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) poly1305_emit_avx(ctx, mac, nonce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) poly1305_simd_init(&dctx->h, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) dctx->s[0] = get_unaligned_le32(&key[16]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) dctx->s[1] = get_unaligned_le32(&key[20]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) dctx->s[2] = get_unaligned_le32(&key[24]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) dctx->s[3] = get_unaligned_le32(&key[28]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) dctx->buflen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) dctx->sset = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) EXPORT_SYMBOL(poly1305_init_arch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) const u8 *inp, unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) unsigned int acc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if (unlikely(!dctx->sset)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) poly1305_simd_init(&dctx->h, inp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) inp += POLY1305_BLOCK_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) len -= POLY1305_BLOCK_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) acc += POLY1305_BLOCK_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) dctx->rset = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) if (len >= POLY1305_BLOCK_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) dctx->s[0] = get_unaligned_le32(&inp[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) dctx->s[1] = get_unaligned_le32(&inp[4]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) dctx->s[2] = get_unaligned_le32(&inp[8]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) dctx->s[3] = get_unaligned_le32(&inp[12]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) acc += POLY1305_BLOCK_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) dctx->sset = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) return acc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) unsigned int srclen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) unsigned int bytes, used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) if (unlikely(dctx->buflen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) memcpy(dctx->buf + dctx->buflen, src, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) src += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) srclen -= bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) dctx->buflen += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) if (dctx->buflen == POLY1305_BLOCK_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf, POLY1305_BLOCK_SIZE)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) dctx->buflen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) srclen -= bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) used = crypto_poly1305_setdctxkey(dctx, src, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (likely(bytes - used))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) poly1305_simd_blocks(&dctx->h, src + used, bytes - used, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) src += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) if (unlikely(srclen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) dctx->buflen = srclen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) memcpy(dctx->buf, src, srclen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) EXPORT_SYMBOL(poly1305_update_arch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) if (unlikely(dctx->buflen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) dctx->buf[dctx->buflen++] = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) memset(dctx->buf + dctx->buflen, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) POLY1305_BLOCK_SIZE - dctx->buflen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) poly1305_simd_emit(&dctx->h, dst, dctx->s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) *dctx = (struct poly1305_desc_ctx){};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) EXPORT_SYMBOL(poly1305_final_arch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) static int crypto_poly1305_init(struct shash_desc *desc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) *dctx = (struct poly1305_desc_ctx){};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) static int crypto_poly1305_update(struct shash_desc *desc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) const u8 *src, unsigned int srclen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) poly1305_update_arch(dctx, src, srclen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) if (unlikely(!dctx->sset))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) return -ENOKEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) poly1305_final_arch(dctx, dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) static struct shash_alg alg = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) .digestsize = POLY1305_DIGEST_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) .init = crypto_poly1305_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) .update = crypto_poly1305_update,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) .final = crypto_poly1305_final,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) .descsize = sizeof(struct poly1305_desc_ctx),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) .base = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) .cra_name = "poly1305",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) .cra_driver_name = "poly1305-simd",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) .cra_priority = 300,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) .cra_blocksize = POLY1305_BLOCK_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) .cra_module = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) static int __init poly1305_simd_mod_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) if (boot_cpu_has(X86_FEATURE_AVX) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) static_branch_enable(&poly1305_use_avx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) static_branch_enable(&poly1305_use_avx2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) static_branch_enable(&poly1305_use_avx512);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) static void __exit poly1305_simd_mod_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) crypto_unregister_shash(&alg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) module_init(poly1305_simd_mod_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) module_exit(poly1305_simd_mod_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) MODULE_LICENSE("GPL");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) MODULE_DESCRIPTION("Poly1305 authenticator");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) MODULE_ALIAS_CRYPTO("poly1305");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) MODULE_ALIAS_CRYPTO("poly1305-simd");