mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-09 12:17:12 +09:00
crypto: p10-aes-gcm - Revert implementation
Revert the changes that added p10-aes-gcm:0781bbd7ea("crypto: p10-aes-gcm - A perl script to process PowerPC assembler source")41a6437ab4("crypto: p10-aes-gcm - Supporting functions for ghash")3b47eccaaf("crypto: p10-aes-gcm - Supporting functions for AES")ca68a96c37("crypto: p10-aes-gcm - An accelerated AES/GCM stitched implementation")cc40379b6e("crypto: p10-aes-gcm - Glue code for AES/GCM stitched implementation")3c657e8689("crypto: p10-aes-gcm - Update Kconfig and Makefile") These changes fail to build in many configurations and are not ready for prime time. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
@@ -94,15 +94,4 @@ config CRYPTO_AES_PPC_SPE
|
||||
architecture specific assembler implementations that work on 1KB
|
||||
tables or 256 bytes S-boxes.
|
||||
|
||||
config CRYPTO_P10_AES_GCM
|
||||
tristate "Stitched AES/GCM acceleration support on P10+ CPU (PPC)"
|
||||
depends on PPC64
|
||||
select CRYPTO_LIB_AES
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AEAD
|
||||
default m
|
||||
help
|
||||
Support for cryptographic acceleration instructions on Power10+ CPU.
|
||||
This module supports stitched acceleration for AES/GCM in hardware.
|
||||
|
||||
endmenu
|
||||
|
||||
@@ -13,7 +13,6 @@ obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
|
||||
obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o
|
||||
obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o
|
||||
obj-$(CONFIG_CRYPTO_P10_AES_GCM) += p10-aes-gcm-crypto.o
|
||||
|
||||
aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
|
||||
md5-ppc-y := md5-asm.o md5-glue.o
|
||||
@@ -22,12 +21,3 @@ sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
|
||||
sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
|
||||
crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
|
||||
crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
|
||||
p10-aes-gcm-crypto-y := p10-aes-gcm-glue.o p10_aes_gcm.o ghashp8-ppc.o aesp8-ppc.o
|
||||
|
||||
quiet_cmd_perl = PERL $@
|
||||
cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@
|
||||
|
||||
targets += aesp8-ppc.S ghashp8-ppc.S
|
||||
|
||||
$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
|
||||
$(call if_changed,perl)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,370 +0,0 @@
|
||||
#!/usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
# has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
# you can redistribute it and/or modify it under the terms of the GNU General
|
||||
# Public License version 2 as published by the Free Software Foundation.
|
||||
#
|
||||
# The original headers, including the original license headers, are
|
||||
# included below for completeness.
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see https://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# GHASH for PowerISA v2.07.
|
||||
#
|
||||
# July 2014
|
||||
#
|
||||
# Accurate performance measurements are problematic, because it's
|
||||
# always virtualized setup with possibly throttled processor.
|
||||
# Relative comparison is therefore more informative. This initial
|
||||
# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
|
||||
# faster than "4-bit" integer-only compiler-generated 64-bit code.
|
||||
# "Initial version" means that there is room for futher improvement.
|
||||
|
||||
$flavour=shift;
|
||||
$output =shift;
|
||||
|
||||
if ($flavour =~ /64/) {
|
||||
$SIZE_T=8;
|
||||
$LRSAVE=2*$SIZE_T;
|
||||
$STU="stdu";
|
||||
$POP="ld";
|
||||
$PUSH="std";
|
||||
} elsif ($flavour =~ /32/) {
|
||||
$SIZE_T=4;
|
||||
$LRSAVE=$SIZE_T;
|
||||
$STU="stwu";
|
||||
$POP="lwz";
|
||||
$PUSH="stw";
|
||||
} else { die "nonsense $flavour"; }
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
||||
die "can't locate ppc-xlate.pl";
|
||||
|
||||
open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
|
||||
|
||||
my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
|
||||
|
||||
my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
|
||||
my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
|
||||
my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
|
||||
my $vrsave="r12";
|
||||
my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
|
||||
|
||||
$code=<<___;
|
||||
.machine "any"
|
||||
|
||||
.text
|
||||
|
||||
.globl .gcm_init_p8
|
||||
lis r0,0xfff0
|
||||
li r8,0x10
|
||||
mfspr $vrsave,256
|
||||
li r9,0x20
|
||||
mtspr 256,r0
|
||||
li r10,0x30
|
||||
lvx_u $H,0,r4 # load H
|
||||
le?xor r7,r7,r7
|
||||
le?addi r7,r7,0x8 # need a vperm start with 08
|
||||
le?lvsr 5,0,r7
|
||||
le?vspltisb 6,0x0f
|
||||
le?vxor 5,5,6 # set a b-endian mask
|
||||
le?vperm $H,$H,$H,5
|
||||
|
||||
vspltisb $xC2,-16 # 0xf0
|
||||
vspltisb $t0,1 # one
|
||||
vaddubm $xC2,$xC2,$xC2 # 0xe0
|
||||
vxor $zero,$zero,$zero
|
||||
vor $xC2,$xC2,$t0 # 0xe1
|
||||
vsldoi $xC2,$xC2,$zero,15 # 0xe1...
|
||||
vsldoi $t1,$zero,$t0,1 # ...1
|
||||
vaddubm $xC2,$xC2,$xC2 # 0xc2...
|
||||
vspltisb $t2,7
|
||||
vor $xC2,$xC2,$t1 # 0xc2....01
|
||||
vspltb $t1,$H,0 # most significant byte
|
||||
vsl $H,$H,$t0 # H<<=1
|
||||
vsrab $t1,$t1,$t2 # broadcast carry bit
|
||||
vand $t1,$t1,$xC2
|
||||
vxor $H,$H,$t1 # twisted H
|
||||
|
||||
vsldoi $H,$H,$H,8 # twist even more ...
|
||||
vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
|
||||
vsldoi $Hl,$zero,$H,8 # ... and split
|
||||
vsldoi $Hh,$H,$zero,8
|
||||
|
||||
stvx_u $xC2,0,r3 # save pre-computed table
|
||||
stvx_u $Hl,r8,r3
|
||||
stvx_u $H, r9,r3
|
||||
stvx_u $Hh,r10,r3
|
||||
|
||||
mtspr 256,$vrsave
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size .gcm_init_p8,.-.gcm_init_p8
|
||||
|
||||
.globl .gcm_init_htable
|
||||
lis r0,0xfff0
|
||||
li r8,0x10
|
||||
mfspr $vrsave,256
|
||||
li r9,0x20
|
||||
mtspr 256,r0
|
||||
li r10,0x30
|
||||
lvx_u $H,0,r4 # load H
|
||||
|
||||
vspltisb $xC2,-16 # 0xf0
|
||||
vspltisb $t0,1 # one
|
||||
vaddubm $xC2,$xC2,$xC2 # 0xe0
|
||||
vxor $zero,$zero,$zero
|
||||
vor $xC2,$xC2,$t0 # 0xe1
|
||||
vsldoi $xC2,$xC2,$zero,15 # 0xe1...
|
||||
vsldoi $t1,$zero,$t0,1 # ...1
|
||||
vaddubm $xC2,$xC2,$xC2 # 0xc2...
|
||||
vspltisb $t2,7
|
||||
vor $xC2,$xC2,$t1 # 0xc2....01
|
||||
vspltb $t1,$H,0 # most significant byte
|
||||
vsl $H,$H,$t0 # H<<=1
|
||||
vsrab $t1,$t1,$t2 # broadcast carry bit
|
||||
vand $t1,$t1,$xC2
|
||||
vxor $IN,$H,$t1 # twisted H
|
||||
|
||||
vsldoi $H,$IN,$IN,8 # twist even more ...
|
||||
vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
|
||||
vsldoi $Hl,$zero,$H,8 # ... and split
|
||||
vsldoi $Hh,$H,$zero,8
|
||||
|
||||
stvx_u $xC2,0,r3 # save pre-computed table
|
||||
stvx_u $Hl,r8,r3
|
||||
li r8,0x40
|
||||
stvx_u $H, r9,r3
|
||||
li r9,0x50
|
||||
stvx_u $Hh,r10,r3
|
||||
li r10,0x60
|
||||
|
||||
vpmsumd $Xl,$IN,$Hl # H.lo·H.lo
|
||||
vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi
|
||||
vpmsumd $Xh,$IN,$Hh # H.hi·H.hi
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
||||
|
||||
vsldoi $t0,$Xm,$zero,8
|
||||
vsldoi $t1,$zero,$Xm,8
|
||||
vxor $Xl,$Xl,$t0
|
||||
vxor $Xh,$Xh,$t1
|
||||
|
||||
vsldoi $Xl,$Xl,$Xl,8
|
||||
vxor $Xl,$Xl,$t2
|
||||
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
||||
vpmsumd $Xl,$Xl,$xC2
|
||||
vxor $t1,$t1,$Xh
|
||||
vxor $IN1,$Xl,$t1
|
||||
|
||||
vsldoi $H2,$IN1,$IN1,8
|
||||
vsldoi $H2l,$zero,$H2,8
|
||||
vsldoi $H2h,$H2,$zero,8
|
||||
|
||||
stvx_u $H2l,r8,r3 # save H^2
|
||||
li r8,0x70
|
||||
stvx_u $H2,r9,r3
|
||||
li r9,0x80
|
||||
stvx_u $H2h,r10,r3
|
||||
li r10,0x90
|
||||
|
||||
vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo
|
||||
vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo
|
||||
vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi
|
||||
vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi
|
||||
vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi
|
||||
vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
||||
vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase
|
||||
|
||||
vsldoi $t0,$Xm,$zero,8
|
||||
vsldoi $t1,$zero,$Xm,8
|
||||
vsldoi $t4,$Xm1,$zero,8
|
||||
vsldoi $t5,$zero,$Xm1,8
|
||||
vxor $Xl,$Xl,$t0
|
||||
vxor $Xh,$Xh,$t1
|
||||
vxor $Xl1,$Xl1,$t4
|
||||
vxor $Xh1,$Xh1,$t5
|
||||
|
||||
vsldoi $Xl,$Xl,$Xl,8
|
||||
vsldoi $Xl1,$Xl1,$Xl1,8
|
||||
vxor $Xl,$Xl,$t2
|
||||
vxor $Xl1,$Xl1,$t6
|
||||
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
||||
vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase
|
||||
vpmsumd $Xl,$Xl,$xC2
|
||||
vpmsumd $Xl1,$Xl1,$xC2
|
||||
vxor $t1,$t1,$Xh
|
||||
vxor $t5,$t5,$Xh1
|
||||
vxor $Xl,$Xl,$t1
|
||||
vxor $Xl1,$Xl1,$t5
|
||||
|
||||
vsldoi $H,$Xl,$Xl,8
|
||||
vsldoi $H2,$Xl1,$Xl1,8
|
||||
vsldoi $Hl,$zero,$H,8
|
||||
vsldoi $Hh,$H,$zero,8
|
||||
vsldoi $H2l,$zero,$H2,8
|
||||
vsldoi $H2h,$H2,$zero,8
|
||||
|
||||
stvx_u $Hl,r8,r3 # save H^3
|
||||
li r8,0xa0
|
||||
stvx_u $H,r9,r3
|
||||
li r9,0xb0
|
||||
stvx_u $Hh,r10,r3
|
||||
li r10,0xc0
|
||||
stvx_u $H2l,r8,r3 # save H^4
|
||||
stvx_u $H2,r9,r3
|
||||
stvx_u $H2h,r10,r3
|
||||
|
||||
mtspr 256,$vrsave
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size .gcm_init_htable,.-.gcm_init_htable
|
||||
|
||||
.globl .gcm_gmult_p8
|
||||
lis r0,0xfff8
|
||||
li r8,0x10
|
||||
mfspr $vrsave,256
|
||||
li r9,0x20
|
||||
mtspr 256,r0
|
||||
li r10,0x30
|
||||
lvx_u $IN,0,$Xip # load Xi
|
||||
|
||||
lvx_u $Hl,r8,$Htbl # load pre-computed table
|
||||
le?lvsl $lemask,r0,r0
|
||||
lvx_u $H, r9,$Htbl
|
||||
le?vspltisb $t0,0x07
|
||||
lvx_u $Hh,r10,$Htbl
|
||||
le?vxor $lemask,$lemask,$t0
|
||||
lvx_u $xC2,0,$Htbl
|
||||
le?vperm $IN,$IN,$IN,$lemask
|
||||
vxor $zero,$zero,$zero
|
||||
|
||||
vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
|
||||
vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
|
||||
vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st phase
|
||||
|
||||
vsldoi $t0,$Xm,$zero,8
|
||||
vsldoi $t1,$zero,$Xm,8
|
||||
vxor $Xl,$Xl,$t0
|
||||
vxor $Xh,$Xh,$t1
|
||||
|
||||
vsldoi $Xl,$Xl,$Xl,8
|
||||
vxor $Xl,$Xl,$t2
|
||||
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd phase
|
||||
vpmsumd $Xl,$Xl,$xC2
|
||||
vxor $t1,$t1,$Xh
|
||||
vxor $Xl,$Xl,$t1
|
||||
|
||||
le?vperm $Xl,$Xl,$Xl,$lemask
|
||||
stvx_u $Xl,0,$Xip # write out Xi
|
||||
|
||||
mtspr 256,$vrsave
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size .gcm_gmult_p8,.-.gcm_gmult_p8
|
||||
|
||||
.globl .gcm_ghash_p8
|
||||
lis r0,0xfff8
|
||||
li r8,0x10
|
||||
mfspr $vrsave,256
|
||||
li r9,0x20
|
||||
mtspr 256,r0
|
||||
li r10,0x30
|
||||
lvx_u $Xl,0,$Xip # load Xi
|
||||
|
||||
lvx_u $Hl,r8,$Htbl # load pre-computed table
|
||||
le?lvsl $lemask,r0,r0
|
||||
lvx_u $H, r9,$Htbl
|
||||
le?vspltisb $t0,0x07
|
||||
lvx_u $Hh,r10,$Htbl
|
||||
le?vxor $lemask,$lemask,$t0
|
||||
lvx_u $xC2,0,$Htbl
|
||||
le?vperm $Xl,$Xl,$Xl,$lemask
|
||||
vxor $zero,$zero,$zero
|
||||
|
||||
lvx_u $IN,0,$inp
|
||||
addi $inp,$inp,16
|
||||
subi $len,$len,16
|
||||
le?vperm $IN,$IN,$IN,$lemask
|
||||
vxor $IN,$IN,$Xl
|
||||
b Loop
|
||||
|
||||
.align 5
|
||||
Loop:
|
||||
subic $len,$len,16
|
||||
vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
|
||||
subfe. r0,r0,r0 # borrow?-1:0
|
||||
vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
|
||||
and r0,r0,$len
|
||||
vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
|
||||
add $inp,$inp,r0
|
||||
|
||||
vpmsumd $t2,$Xl,$xC2 # 1st phase
|
||||
|
||||
vsldoi $t0,$Xm,$zero,8
|
||||
vsldoi $t1,$zero,$Xm,8
|
||||
vxor $Xl,$Xl,$t0
|
||||
vxor $Xh,$Xh,$t1
|
||||
|
||||
vsldoi $Xl,$Xl,$Xl,8
|
||||
vxor $Xl,$Xl,$t2
|
||||
lvx_u $IN,0,$inp
|
||||
addi $inp,$inp,16
|
||||
|
||||
vsldoi $t1,$Xl,$Xl,8 # 2nd phase
|
||||
vpmsumd $Xl,$Xl,$xC2
|
||||
le?vperm $IN,$IN,$IN,$lemask
|
||||
vxor $t1,$t1,$Xh
|
||||
vxor $IN,$IN,$t1
|
||||
vxor $IN,$IN,$Xl
|
||||
beq Loop # did $len-=16 borrow?
|
||||
|
||||
vxor $Xl,$Xl,$t1
|
||||
le?vperm $Xl,$Xl,$Xl,$lemask
|
||||
stvx_u $Xl,0,$Xip # write out Xi
|
||||
|
||||
mtspr 256,$vrsave
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,4,0
|
||||
.long 0
|
||||
.size .gcm_ghash_p8,.-.gcm_ghash_p8
|
||||
|
||||
.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 2
|
||||
___
|
||||
|
||||
foreach (split("\n",$code)) {
|
||||
if ($flavour =~ /le$/o) { # little-endian
|
||||
s/le\?//o or
|
||||
s/be\?/#be#/o;
|
||||
} else {
|
||||
s/le\?/#le#/o or
|
||||
s/be\?//o;
|
||||
}
|
||||
print $_,"\n";
|
||||
}
|
||||
|
||||
close STDOUT; # enforce flush
|
||||
@@ -1,345 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Glue code for accelerated AES-GCM stitched implementation for ppc64le.
|
||||
*
|
||||
* Copyright 2022- IBM Inc. All rights reserved
|
||||
*/
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/switch_to.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/b128ops.h>
|
||||
#include <crypto/gf128mul.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define PPC_MODULE_FEATURE_P10 (32 + ilog2(PPC_FEATURE2_ARCH_3_1))
|
||||
#define PPC_ALIGN 16
|
||||
#define GCM_IV_SIZE 12
|
||||
|
||||
MODULE_DESCRIPTION("PPC64le AES-GCM with Stitched implementation");
|
||||
MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("aes");
|
||||
|
||||
asmlinkage int aes_p8_set_encrypt_key(const u8 *userKey, const int bits,
|
||||
void *key);
|
||||
asmlinkage void aes_p8_encrypt(const u8 *in, u8 *out, const void *key);
|
||||
asmlinkage void aes_p10_gcm_encrypt(u8 *in, u8 *out, size_t len,
|
||||
void *rkey, u8 *iv, void *Xi);
|
||||
asmlinkage void aes_p10_gcm_decrypt(u8 *in, u8 *out, size_t len,
|
||||
void *rkey, u8 *iv, void *Xi);
|
||||
asmlinkage void gcm_init_htable(unsigned char htable[256], unsigned char Xi[16]);
|
||||
asmlinkage void gcm_ghash_p8(unsigned char *Xi, unsigned char *Htable,
|
||||
unsigned char *aad, unsigned int alen);
|
||||
|
||||
struct aes_key {
|
||||
u8 key[AES_MAX_KEYLENGTH];
|
||||
u64 rounds;
|
||||
};
|
||||
|
||||
struct gcm_ctx {
|
||||
u8 iv[16];
|
||||
u8 ivtag[16];
|
||||
u8 aad_hash[16];
|
||||
u64 aadLen;
|
||||
u64 Plen; /* offset 56 - used in aes_p10_gcm_{en/de}crypt */
|
||||
};
|
||||
struct Hash_ctx {
|
||||
u8 H[16]; /* subkey */
|
||||
u8 Htable[256]; /* Xi, Hash table(offset 32) */
|
||||
};
|
||||
|
||||
struct p10_aes_gcm_ctx {
|
||||
struct aes_key enc_key;
|
||||
};
|
||||
|
||||
static void vsx_begin(void)
|
||||
{
|
||||
preempt_disable();
|
||||
enable_kernel_vsx();
|
||||
}
|
||||
|
||||
static void vsx_end(void)
|
||||
{
|
||||
disable_kernel_vsx();
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void set_subkey(unsigned char *hash)
|
||||
{
|
||||
*(u64 *)&hash[0] = be64_to_cpup((__be64 *)&hash[0]);
|
||||
*(u64 *)&hash[8] = be64_to_cpup((__be64 *)&hash[8]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute aad if any.
|
||||
* - Hash aad and copy to Xi.
|
||||
*/
|
||||
static void set_aad(struct gcm_ctx *gctx, struct Hash_ctx *hash,
|
||||
unsigned char *aad, int alen)
|
||||
{
|
||||
int i;
|
||||
u8 nXi[16] = {0, };
|
||||
|
||||
gctx->aadLen = alen;
|
||||
i = alen & ~0xf;
|
||||
if (i) {
|
||||
gcm_ghash_p8(nXi, hash->Htable+32, aad, i);
|
||||
aad += i;
|
||||
alen -= i;
|
||||
}
|
||||
if (alen) {
|
||||
for (i = 0; i < alen; i++)
|
||||
nXi[i] ^= aad[i];
|
||||
|
||||
memset(gctx->aad_hash, 0, 16);
|
||||
gcm_ghash_p8(gctx->aad_hash, hash->Htable+32, nXi, 16);
|
||||
} else {
|
||||
memcpy(gctx->aad_hash, nXi, 16);
|
||||
}
|
||||
|
||||
memcpy(hash->Htable, gctx->aad_hash, 16);
|
||||
}
|
||||
|
||||
static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey,
|
||||
struct Hash_ctx *hash, u8 *assoc, unsigned int assoclen)
|
||||
{
|
||||
__be32 counter = cpu_to_be32(1);
|
||||
|
||||
aes_p8_encrypt(hash->H, hash->H, rdkey);
|
||||
set_subkey(hash->H);
|
||||
gcm_init_htable(hash->Htable+32, hash->H);
|
||||
|
||||
*((__be32 *)(iv+12)) = counter;
|
||||
|
||||
gctx->Plen = 0;
|
||||
|
||||
/*
|
||||
* Encrypt counter vector as iv tag and increment counter.
|
||||
*/
|
||||
aes_p8_encrypt(iv, gctx->ivtag, rdkey);
|
||||
|
||||
counter = cpu_to_be32(2);
|
||||
*((__be32 *)(iv+12)) = counter;
|
||||
memcpy(gctx->iv, iv, 16);
|
||||
|
||||
gctx->aadLen = assoclen;
|
||||
memset(gctx->aad_hash, 0, 16);
|
||||
if (assoclen)
|
||||
set_aad(gctx, hash, assoc, assoclen);
|
||||
}
|
||||
|
||||
static void finish_tag(struct gcm_ctx *gctx, struct Hash_ctx *hash, int len)
|
||||
{
|
||||
int i;
|
||||
unsigned char len_ac[16 + PPC_ALIGN];
|
||||
unsigned char *aclen = PTR_ALIGN((void *)len_ac, PPC_ALIGN);
|
||||
__be64 clen = cpu_to_be64(len << 3);
|
||||
__be64 alen = cpu_to_be64(gctx->aadLen << 3);
|
||||
|
||||
if (len == 0 && gctx->aadLen == 0) {
|
||||
memcpy(hash->Htable, gctx->ivtag, 16);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Len is in bits.
|
||||
*/
|
||||
*((__be64 *)(aclen)) = alen;
|
||||
*((__be64 *)(aclen+8)) = clen;
|
||||
|
||||
/*
|
||||
* hash (AAD len and len)
|
||||
*/
|
||||
gcm_ghash_p8(hash->Htable, hash->Htable+32, aclen, 16);
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
hash->Htable[i] ^= gctx->ivtag[i];
|
||||
}
|
||||
|
||||
static int set_authsize(struct crypto_aead *tfm, unsigned int authsize)
|
||||
{
|
||||
switch (authsize) {
|
||||
case 4:
|
||||
case 8:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
case 16:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct crypto_tfm *tfm = crypto_aead_tfm(aead);
|
||||
struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int ret;
|
||||
|
||||
vsx_begin();
|
||||
ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
|
||||
vsx_end();
|
||||
|
||||
return ret ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
static int p10_aes_gcm_crypt(struct aead_request *req, int enc)
|
||||
{
|
||||
struct crypto_tfm *tfm = req->base.tfm;
|
||||
struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
u8 databuf[sizeof(struct gcm_ctx) + PPC_ALIGN];
|
||||
struct gcm_ctx *gctx = PTR_ALIGN((void *)databuf, PPC_ALIGN);
|
||||
u8 hashbuf[sizeof(struct Hash_ctx) + PPC_ALIGN];
|
||||
struct Hash_ctx *hash = PTR_ALIGN((void *)hashbuf, PPC_ALIGN);
|
||||
struct scatter_walk assoc_sg_walk;
|
||||
struct skcipher_walk walk;
|
||||
u8 *assocmem = NULL;
|
||||
u8 *assoc;
|
||||
unsigned int assoclen = req->assoclen;
|
||||
unsigned int cryptlen = req->cryptlen;
|
||||
unsigned char ivbuf[AES_BLOCK_SIZE+PPC_ALIGN];
|
||||
unsigned char *iv = PTR_ALIGN((void *)ivbuf, PPC_ALIGN);
|
||||
int ret;
|
||||
unsigned long auth_tag_len = crypto_aead_authsize(__crypto_aead_cast(tfm));
|
||||
u8 otag[16];
|
||||
int total_processed = 0;
|
||||
|
||||
memset(databuf, 0, sizeof(databuf));
|
||||
memset(hashbuf, 0, sizeof(hashbuf));
|
||||
memset(ivbuf, 0, sizeof(ivbuf));
|
||||
memcpy(iv, req->iv, GCM_IV_SIZE);
|
||||
|
||||
/* Linearize assoc, if not already linear */
|
||||
if (req->src->length >= assoclen && req->src->length) {
|
||||
scatterwalk_start(&assoc_sg_walk, req->src);
|
||||
assoc = scatterwalk_map(&assoc_sg_walk);
|
||||
} else {
|
||||
gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
|
||||
GFP_KERNEL : GFP_ATOMIC;
|
||||
|
||||
/* assoc can be any length, so must be on heap */
|
||||
assocmem = kmalloc(assoclen, flags);
|
||||
if (unlikely(!assocmem))
|
||||
return -ENOMEM;
|
||||
assoc = assocmem;
|
||||
|
||||
scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
|
||||
}
|
||||
|
||||
vsx_begin();
|
||||
gcmp10_init(gctx, iv, (unsigned char *) &ctx->enc_key, hash, assoc, assoclen);
|
||||
vsx_end();
|
||||
|
||||
if (!assocmem)
|
||||
scatterwalk_unmap(assoc);
|
||||
else
|
||||
kfree(assocmem);
|
||||
|
||||
if (enc)
|
||||
ret = skcipher_walk_aead_encrypt(&walk, req, false);
|
||||
else
|
||||
ret = skcipher_walk_aead_decrypt(&walk, req, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
while (walk.nbytes > 0 && ret == 0) {
|
||||
|
||||
vsx_begin();
|
||||
if (enc)
|
||||
aes_p10_gcm_encrypt(walk.src.virt.addr,
|
||||
walk.dst.virt.addr,
|
||||
walk.nbytes,
|
||||
&ctx->enc_key, gctx->iv, hash->Htable);
|
||||
else
|
||||
aes_p10_gcm_decrypt(walk.src.virt.addr,
|
||||
walk.dst.virt.addr,
|
||||
walk.nbytes,
|
||||
&ctx->enc_key, gctx->iv, hash->Htable);
|
||||
vsx_end();
|
||||
|
||||
total_processed += walk.nbytes;
|
||||
ret = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Finalize hash */
|
||||
vsx_begin();
|
||||
finish_tag(gctx, hash, total_processed);
|
||||
vsx_end();
|
||||
|
||||
/* copy Xi to end of dst */
|
||||
if (enc)
|
||||
scatterwalk_map_and_copy(hash->Htable, req->dst, req->assoclen + cryptlen,
|
||||
auth_tag_len, 1);
|
||||
else {
|
||||
scatterwalk_map_and_copy(otag, req->src,
|
||||
req->assoclen + cryptlen - auth_tag_len,
|
||||
auth_tag_len, 0);
|
||||
|
||||
if (crypto_memneq(otag, hash->Htable, auth_tag_len)) {
|
||||
memzero_explicit(hash->Htable, 16);
|
||||
return -EBADMSG;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int p10_aes_gcm_encrypt(struct aead_request *req)
|
||||
{
|
||||
return p10_aes_gcm_crypt(req, 1);
|
||||
}
|
||||
|
||||
static int p10_aes_gcm_decrypt(struct aead_request *req)
|
||||
{
|
||||
return p10_aes_gcm_crypt(req, 0);
|
||||
}
|
||||
|
||||
static struct aead_alg gcm_aes_alg = {
|
||||
.ivsize = GCM_IV_SIZE,
|
||||
.maxauthsize = 16,
|
||||
|
||||
.setauthsize = set_authsize,
|
||||
.setkey = p10_aes_gcm_setkey,
|
||||
.encrypt = p10_aes_gcm_encrypt,
|
||||
.decrypt = p10_aes_gcm_decrypt,
|
||||
|
||||
.base.cra_name = "gcm(aes)",
|
||||
.base.cra_driver_name = "p10_aes_gcm",
|
||||
.base.cra_priority = 2100,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct p10_aes_gcm_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __init p10_init(void)
|
||||
{
|
||||
return crypto_register_aead(&gcm_aes_alg);
|
||||
}
|
||||
|
||||
static void __exit p10_exit(void)
|
||||
{
|
||||
crypto_unregister_aead(&gcm_aes_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(PPC_MODULE_FEATURE_P10, p10_init);
|
||||
module_exit(p10_exit);
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,229 +0,0 @@
|
||||
#!/usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
# PowerPC assembler distiller by <appro>.
|
||||
|
||||
my $flavour = shift;
|
||||
my $output = shift;
|
||||
open STDOUT,">$output" || die "can't open $output: $!";
|
||||
|
||||
my %GLOBALS;
|
||||
my $dotinlocallabels=($flavour=~/linux/)?1:0;
|
||||
|
||||
################################################################
|
||||
# directives which need special treatment on different platforms
|
||||
################################################################
|
||||
my $globl = sub {
|
||||
my $junk = shift;
|
||||
my $name = shift;
|
||||
my $global = \$GLOBALS{$name};
|
||||
my $ret;
|
||||
|
||||
$name =~ s|^[\.\_]||;
|
||||
|
||||
SWITCH: for ($flavour) {
|
||||
/aix/ && do { $name = ".$name";
|
||||
last;
|
||||
};
|
||||
/osx/ && do { $name = "_$name";
|
||||
last;
|
||||
};
|
||||
/linux/
|
||||
&& do { $ret = "_GLOBAL($name)";
|
||||
last;
|
||||
};
|
||||
}
|
||||
|
||||
$ret = ".globl $name\nalign 5\n$name:" if (!$ret);
|
||||
$$global = $name;
|
||||
$ret;
|
||||
};
|
||||
my $text = sub {
|
||||
my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
|
||||
$ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
|
||||
$ret;
|
||||
};
|
||||
my $machine = sub {
|
||||
my $junk = shift;
|
||||
my $arch = shift;
|
||||
if ($flavour =~ /osx/)
|
||||
{ $arch =~ s/\"//g;
|
||||
$arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
|
||||
}
|
||||
".machine $arch";
|
||||
};
|
||||
my $size = sub {
|
||||
if ($flavour =~ /linux/)
|
||||
{ shift;
|
||||
my $name = shift; $name =~ s|^[\.\_]||;
|
||||
my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name;
|
||||
$ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/);
|
||||
$ret;
|
||||
}
|
||||
else
|
||||
{ ""; }
|
||||
};
|
||||
my $asciz = sub {
|
||||
shift;
|
||||
my $line = join(",",@_);
|
||||
if ($line =~ /^"(.*)"$/)
|
||||
{ ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
|
||||
else
|
||||
{ ""; }
|
||||
};
|
||||
my $quad = sub {
|
||||
shift;
|
||||
my @ret;
|
||||
my ($hi,$lo);
|
||||
for (@_) {
|
||||
if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
|
||||
{ $hi=$1?"0x$1":"0"; $lo="0x$2"; }
|
||||
elsif (/^([0-9]+)$/o)
|
||||
{ $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
|
||||
else
|
||||
{ $hi=undef; $lo=$_; }
|
||||
|
||||
if (defined($hi))
|
||||
{ push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
|
||||
else
|
||||
{ push(@ret,".quad $lo"); }
|
||||
}
|
||||
join("\n",@ret);
|
||||
};
|
||||
|
||||
################################################################
|
||||
# simplified mnemonics not handled by at least one assembler
|
||||
################################################################
|
||||
my $cmplw = sub {
|
||||
my $f = shift;
|
||||
my $cr = 0; $cr = shift if ($#_>1);
|
||||
# Some out-of-date 32-bit GNU assembler just can't handle cmplw...
|
||||
($flavour =~ /linux.*32/) ?
|
||||
" .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
|
||||
" cmplw ".join(',',$cr,@_);
|
||||
};
|
||||
my $bdnz = sub {
|
||||
my $f = shift;
|
||||
my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
|
||||
" bc $bo,0,".shift;
|
||||
} if ($flavour!~/linux/);
|
||||
my $bltlr = sub {
|
||||
my $f = shift;
|
||||
my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
|
||||
($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
|
||||
" .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
|
||||
" bclr $bo,0";
|
||||
};
|
||||
my $bnelr = sub {
|
||||
my $f = shift;
|
||||
my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
|
||||
($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
|
||||
" .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
|
||||
" bclr $bo,2";
|
||||
};
|
||||
my $beqlr = sub {
|
||||
my $f = shift;
|
||||
my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
|
||||
($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
|
||||
" .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
|
||||
" bclr $bo,2";
|
||||
};
|
||||
# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
|
||||
# arguments is 64, with "operand out of range" error.
|
||||
my $extrdi = sub {
|
||||
my ($f,$ra,$rs,$n,$b) = @_;
|
||||
$b = ($b+$n)&63; $n = 64-$n;
|
||||
" rldicl $ra,$rs,$b,$n";
|
||||
};
|
||||
my $vmr = sub {
|
||||
my ($f,$vx,$vy) = @_;
|
||||
" vor $vx,$vy,$vy";
|
||||
};
|
||||
|
||||
# Some ABIs specify vrsave, special-purpose register #256, as reserved
|
||||
# for system use.
|
||||
my $no_vrsave = ($flavour =~ /linux-ppc64le/);
|
||||
my $mtspr = sub {
|
||||
my ($f,$idx,$ra) = @_;
|
||||
if ($idx == 256 && $no_vrsave) {
|
||||
" or $ra,$ra,$ra";
|
||||
} else {
|
||||
" mtspr $idx,$ra";
|
||||
}
|
||||
};
|
||||
my $mfspr = sub {
|
||||
my ($f,$rd,$idx) = @_;
|
||||
if ($idx == 256 && $no_vrsave) {
|
||||
" li $rd,-1";
|
||||
} else {
|
||||
" mfspr $rd,$idx";
|
||||
}
|
||||
};
|
||||
|
||||
# PowerISA 2.06 stuff
|
||||
sub vsxmem_op {
|
||||
my ($f, $vrt, $ra, $rb, $op) = @_;
|
||||
" .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
|
||||
}
|
||||
# made-up unaligned memory reference AltiVec/VMX instructions
|
||||
my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
|
||||
my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
|
||||
my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
|
||||
my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
|
||||
my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
|
||||
my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
|
||||
|
||||
# PowerISA 2.07 stuff
|
||||
sub vcrypto_op {
|
||||
my ($f, $vrt, $vra, $vrb, $op) = @_;
|
||||
" .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
|
||||
}
|
||||
my $vcipher = sub { vcrypto_op(@_, 1288); };
|
||||
my $vcipherlast = sub { vcrypto_op(@_, 1289); };
|
||||
my $vncipher = sub { vcrypto_op(@_, 1352); };
|
||||
my $vncipherlast= sub { vcrypto_op(@_, 1353); };
|
||||
my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
|
||||
my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
|
||||
my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
|
||||
my $vpmsumb = sub { vcrypto_op(@_, 1032); };
|
||||
my $vpmsumd = sub { vcrypto_op(@_, 1224); };
|
||||
my $vpmsubh = sub { vcrypto_op(@_, 1096); };
|
||||
my $vpmsumw = sub { vcrypto_op(@_, 1160); };
|
||||
my $vaddudm = sub { vcrypto_op(@_, 192); };
|
||||
my $vadduqm = sub { vcrypto_op(@_, 256); };
|
||||
|
||||
my $mtsle = sub {
|
||||
my ($f, $arg) = @_;
|
||||
" .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
|
||||
};
|
||||
|
||||
print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/;
|
||||
|
||||
while($line=<>) {
|
||||
|
||||
$line =~ s|[#!;].*$||; # get rid of asm-style comments...
|
||||
$line =~ s|/\*.*\*/||; # ... and C-style comments...
|
||||
$line =~ s|^\s+||; # ... and skip white spaces in beginning...
|
||||
$line =~ s|\s+$||; # ... and at the end
|
||||
|
||||
{
|
||||
$line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel
|
||||
$line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
|
||||
}
|
||||
|
||||
{
|
||||
$line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
|
||||
my $c = $1; $c = "\t" if ($c eq "");
|
||||
my $mnemonic = $2;
|
||||
my $f = $3;
|
||||
my $opcode = eval("\$$mnemonic");
|
||||
$line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
|
||||
if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
|
||||
elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
|
||||
}
|
||||
|
||||
print $line if ($line);
|
||||
print "\n";
|
||||
}
|
||||
|
||||
close STDOUT;
|
||||
Reference in New Issue
Block a user