From 3928787aead66aeb084f9f84b91fddab234b30a4 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Sun, 11 Nov 2018 10:36:29 +0100 Subject: [PATCH] UPSTREAM: crypto: x86/chacha20 - Add a 2-block AVX2 variant This variant uses the same principle as the single block SSSE3 variant by shuffling the state matrix after each round. With the wider AVX registers, we can do two blocks in parallel, though. This function can increase performance and efficiency significantly for lengths that would otherwise require a 4-block function. Signed-off-by: Martin Willi Signed-off-by: Herbert Xu (cherry picked from commit a5dd97f86211e91219807db607d740f9896b8e0b) Bug: 152722841 Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman Change-Id: Ie0011f26fd59257ace84299f7a65a6c11da13b47 --- arch/x86/crypto/chacha20_glue.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 396ab6141e96..c5259723c196 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -24,6 +24,8 @@ asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, unsigned int len); #ifdef CONFIG_AS_AVX2 +asmlinkage void chacha20_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src, + unsigned int len); asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, unsigned int len); static bool chacha20_use_avx2; @@ -52,6 +54,11 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, state[12] += chacha20_advance(bytes, 8); return; } + if (bytes > CHACHA_BLOCK_SIZE) { + chacha20_2block_xor_avx2(state, dst, src, bytes); + state[12] += chacha20_advance(bytes, 2); + return; + } } #endif while (bytes >= CHACHA_BLOCK_SIZE * 4) {