Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 6 Sep 2017 22:17:17 +0000 (15:17 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 6 Sep 2017 22:17:17 +0000 (15:17 -0700)
Pull crypto updates from Herbert Xu:
 "Here is the crypto update for 4.14:

  API:
   - Defer scompress scratch buffer allocation to first use.
   - Add __crypto_xor that takes separte src and dst operands.
   - Add ahash multiple registration interface.
   - Revamped aead/skcipher algif code to fix async IO properly.

  Drivers:
   - Add non-SIMD fallback code path on ARM for SVE.
   - Add AMD Security Processor framework for ccp.
   - Add support for RSA in ccp.
   - Add XTS-AES-256 support for CCP version 5.
   - Add support for PRNG in sun4i-ss.
   - Add support for DPAA2 in caam.
   - Add ARTPEC crypto support.
   - Add Freescale RNGC hwrng support.
   - Add Microchip / Atmel ECC driver.
   - Add support for STM32 HASH module"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (116 commits)
  crypto: af_alg - get_page upon reassignment to TX SGL
  crypto: cavium/nitrox - Fix an error handling path in 'nitrox_probe()'
  crypto: inside-secure - fix an error handling path in safexcel_probe()
  crypto: rockchip - Don't dequeue the request when device is busy
  crypto: cavium - add release_firmware to all return case
  crypto: sahara - constify platform_device_id
  MAINTAINERS: Add ARTPEC crypto maintainer
  crypto: axis - add ARTPEC-6/7 crypto accelerator driver
  crypto: hash - add crypto_(un)register_ahashes()
  dt-bindings: crypto: add ARTPEC crypto
  crypto: algif_aead - fix comment regarding memory layout
  crypto: ccp - use dma_mapping_error to check map error
  lib/mpi: fix build with clang
  crypto: sahara - Remove leftover from previous used spinlock
  crypto: sahara - Fix dma unmap direction
  crypto: af_alg - consolidation of duplicate code
  crypto: caam - Remove unused dentry members
  crypto: ccp - select CONFIG_CRYPTO_RSA
  crypto: ccp - avoid uninitialized variable warning
  crypto: serpent - improve __serpent_setkey with UBSAN
  ...

136 files changed:
Documentation/devicetree/bindings/crypto/artpec6-crypto.txt [new file with mode: 0644]
Documentation/devicetree/bindings/crypto/atmel-crypto.txt
Documentation/devicetree/bindings/crypto/st,stm32-hash.txt [new file with mode: 0644]
Documentation/devicetree/bindings/rng/imx-rngc.txt [new file with mode: 0644]
MAINTAINERS
arch/arm/crypto/Kconfig
arch/arm/crypto/aes-ce-glue.c
arch/arm/crypto/aes-cipher-core.S
arch/arm/crypto/aes-neonbs-glue.c
arch/arm/crypto/ghash-ce-core.S
arch/arm/crypto/ghash-ce-glue.c
arch/arm64/crypto/Kconfig
arch/arm64/crypto/aes-ce-ccm-core.S
arch/arm64/crypto/aes-ce-ccm-glue.c
arch/arm64/crypto/aes-ce-cipher.c
arch/arm64/crypto/aes-ce.S
arch/arm64/crypto/aes-cipher-core.S
arch/arm64/crypto/aes-ctr-fallback.h [new file with mode: 0644]
arch/arm64/crypto/aes-glue.c
arch/arm64/crypto/aes-neonbs-glue.c
arch/arm64/crypto/chacha20-neon-glue.c
arch/arm64/crypto/crc32-ce-glue.c
arch/arm64/crypto/crct10dif-ce-glue.c
arch/arm64/crypto/ghash-ce-core.S
arch/arm64/crypto/ghash-ce-glue.c
arch/arm64/crypto/sha1-ce-glue.c
arch/arm64/crypto/sha2-ce-glue.c
arch/arm64/crypto/sha256-glue.c
arch/sparc/crypto/aes_glue.c
arch/x86/crypto/aesni-intel_glue.c
arch/x86/crypto/blowfish_glue.c
arch/x86/crypto/cast5_avx_glue.c
arch/x86/crypto/des3_ede_glue.c
crypto/Kconfig
crypto/af_alg.c
crypto/ahash.c
crypto/algapi.c
crypto/algif_aead.c
crypto/algif_skcipher.c
crypto/ctr.c
crypto/ecdh.c
crypto/pcbc.c
crypto/rng.c
crypto/scompress.c
crypto/serpent_generic.c
crypto/tcrypt.c
drivers/char/hw_random/Kconfig
drivers/char/hw_random/Makefile
drivers/char/hw_random/core.c
drivers/char/hw_random/imx-rngc.c [new file with mode: 0644]
drivers/crypto/Kconfig
drivers/crypto/Makefile
drivers/crypto/atmel-ecc.c [new file with mode: 0644]
drivers/crypto/atmel-ecc.h [new file with mode: 0644]
drivers/crypto/atmel-sha.c
drivers/crypto/atmel-tdes.c
drivers/crypto/axis/Makefile [new file with mode: 0644]
drivers/crypto/axis/artpec6_crypto.c [new file with mode: 0644]
drivers/crypto/bcm/cipher.c
drivers/crypto/bcm/cipher.h
drivers/crypto/caam/caamalg.c
drivers/crypto/caam/caamalg_desc.c
drivers/crypto/caam/caamalg_qi.c
drivers/crypto/caam/caamhash.c
drivers/crypto/caam/caamrng.c
drivers/crypto/caam/ctrl.c
drivers/crypto/caam/ctrl.h
drivers/crypto/caam/error.c
drivers/crypto/caam/error.h
drivers/crypto/caam/intern.h
drivers/crypto/caam/jr.c
drivers/crypto/caam/qi.c
drivers/crypto/caam/qi.h
drivers/crypto/caam/regs.h
drivers/crypto/caam/sg_sw_qm2.h [new file with mode: 0644]
drivers/crypto/caam/sg_sw_sec4.h
drivers/crypto/cavium/cpt/cptpf_main.c
drivers/crypto/cavium/nitrox/nitrox_main.c
drivers/crypto/ccp/Kconfig
drivers/crypto/ccp/Makefile
drivers/crypto/ccp/ccp-crypto-aes-galois.c
drivers/crypto/ccp/ccp-crypto-aes-xts.c
drivers/crypto/ccp/ccp-crypto-des3.c
drivers/crypto/ccp/ccp-crypto-main.c
drivers/crypto/ccp/ccp-crypto-rsa.c [new file with mode: 0644]
drivers/crypto/ccp/ccp-crypto-sha.c
drivers/crypto/ccp/ccp-crypto.h
drivers/crypto/ccp/ccp-debugfs.c
drivers/crypto/ccp/ccp-dev-v3.c
drivers/crypto/ccp/ccp-dev-v5.c
drivers/crypto/ccp/ccp-dev.c
drivers/crypto/ccp/ccp-dev.h
drivers/crypto/ccp/ccp-dmaengine.c
drivers/crypto/ccp/ccp-ops.c
drivers/crypto/ccp/ccp-pci.c [deleted file]
drivers/crypto/ccp/ccp-platform.c [deleted file]
drivers/crypto/ccp/sp-dev.c [new file with mode: 0644]
drivers/crypto/ccp/sp-dev.h [new file with mode: 0644]
drivers/crypto/ccp/sp-pci.c [new file with mode: 0644]
drivers/crypto/ccp/sp-platform.c [new file with mode: 0644]
drivers/crypto/geode-aes.c
drivers/crypto/img-hash.c
drivers/crypto/inside-secure/safexcel.c
drivers/crypto/mediatek/mtk-platform.c
drivers/crypto/mxc-scc.c
drivers/crypto/mxs-dcp.c
drivers/crypto/n2_core.c
drivers/crypto/omap-aes.c
drivers/crypto/omap-des.c
drivers/crypto/omap-sham.c
drivers/crypto/qat/qat_common/adf_aer.c
drivers/crypto/rockchip/rk3288_crypto.c
drivers/crypto/rockchip/rk3288_crypto.h
drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
drivers/crypto/rockchip/rk3288_crypto_ahash.c
drivers/crypto/sahara.c
drivers/crypto/stm32/Kconfig
drivers/crypto/stm32/Makefile
drivers/crypto/stm32/stm32-hash.c [new file with mode: 0644]
drivers/crypto/stm32/stm32_crc32.c
drivers/crypto/sunxi-ss/Makefile
drivers/crypto/sunxi-ss/sun4i-ss-core.c
drivers/crypto/sunxi-ss/sun4i-ss-prng.c [new file with mode: 0644]
drivers/crypto/sunxi-ss/sun4i-ss.h
drivers/crypto/virtio/virtio_crypto_algs.c
drivers/crypto/virtio/virtio_crypto_common.h
drivers/crypto/virtio/virtio_crypto_core.c
drivers/crypto/vmx/aes_ctr.c
drivers/md/dm-crypt.c
include/crypto/algapi.h
include/crypto/if_alg.h
include/crypto/internal/akcipher.h
include/crypto/internal/hash.h
include/crypto/kpp.h
include/linux/ccp.h
lib/mpi/longlong.h

diff --git a/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt b/Documentation/devicetree/bindings/crypto/artpec6-crypto.txt
new file mode 100644 (file)
index 0000000..d9cca48
--- /dev/null
@@ -0,0 +1,16 @@
+Axis crypto engine with PDMA interface.
+
+Required properties:
+- compatible : Should be one of the following strings:
+       "axis,artpec6-crypto" for the version in the Axis ARTPEC-6 SoC
+       "axis,artpec7-crypto" for the version in the Axis ARTPEC-7 SoC.
+- reg: Base address and size for the PDMA register area.
+- interrupts: Interrupt handle for the PDMA interrupt line.
+
+Example:
+
+crypto@f4264000 {
+       compatible = "axis,artpec6-crypto";
+       reg = <0xf4264000 0x1000>;
+       interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+};
index f2aab3dc2b523459f2f1a48c7a7360b31f6abc22..7de1a9674c7075d33833ccd2f310bd905d8ef4fe 100644 (file)
@@ -66,3 +66,16 @@ sha@f8034000 {
        dmas = <&dma1 2 17>;
        dma-names = "tx";
 };
+
+* Eliptic Curve Cryptography (I2C)
+
+Required properties:
+- compatible : must be "atmel,atecc508a".
+- reg: I2C bus address of the device.
+- clock-frequency: must be present in the i2c controller node.
+
+Example:
+atecc508a@C0 {
+       compatible = "atmel,atecc508a";
+       reg = <0xC0>;
+};
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt b/Documentation/devicetree/bindings/crypto/st,stm32-hash.txt
new file mode 100644 (file)
index 0000000..04fc246
--- /dev/null
@@ -0,0 +1,30 @@
+* STMicroelectronics STM32 HASH
+
+Required properties:
+- compatible: Should contain entries for this and backward compatible
+  HASH versions:
+  - "st,stm32f456-hash" for stm32 F456.
+  - "st,stm32f756-hash" for stm32 F756.
+- reg: The address and length of the peripheral registers space
+- interrupts: the interrupt specifier for the HASH
+- clocks: The input clock of the HASH instance
+
+Optional properties:
+- resets: The input reset of the HASH instance
+- dmas: DMA specifiers for the HASH. See the DMA client binding,
+        Documentation/devicetree/bindings/dma/dma.txt
+- dma-names: DMA request name. Should be "in" if a dma is present.
+- dma-maxburst: Set number of maximum dma burst supported
+
+Example:
+
+hash1: hash@50060400 {
+       compatible = "st,stm32f756-hash";
+       reg = <0x50060400 0x400>;
+       interrupts = <80>;
+       clocks = <&rcc 0 STM32F7_AHB2_CLOCK(HASH)>;
+       resets = <&rcc STM32F7_AHB2_RESET(HASH)>;
+       dmas = <&dma2 7 2 0x400 0x0>;
+       dma-names = "in";
+       dma-maxburst = <0>;
+};
diff --git a/Documentation/devicetree/bindings/rng/imx-rngc.txt b/Documentation/devicetree/bindings/rng/imx-rngc.txt
new file mode 100644 (file)
index 0000000..93c7174
--- /dev/null
@@ -0,0 +1,21 @@
+Freescale RNGC (Random Number Generator Version C)
+
+The driver also supports version B, which is mostly compatible
+to version C.
+
+Required properties:
+- compatible : should be one of
+               "fsl,imx25-rngb"
+               "fsl,imx35-rngc"
+- reg : offset and length of the register set of this block
+- interrupts : the interrupt number for the RNGC block
+- clocks : the RNGC clk source
+
+Example:
+
+rng@53fb0000 {
+       compatible = "fsl,imx25-rngb";
+       reg = <0x53fb0000 0x4000>;
+       interrupts = <22>;
+       clocks = <&trng_clk>;
+};
index 961423bac24ca17d904a408712ec40849ffe662a..11dde284a426e626ac0e240696e2bfe82f6cc752 100644 (file)
@@ -1162,6 +1162,7 @@ L:        linux-arm-kernel@axis.com
 F:     arch/arm/mach-artpec
 F:     arch/arm/boot/dts/artpec6*
 F:     drivers/clk/axis
+F:     drivers/crypto/axis
 F:     drivers/pinctrl/pinctrl-artpec*
 F:     Documentation/devicetree/bindings/pinctrl/axis,artpec6-pinctrl.txt
 
@@ -8770,6 +8771,12 @@ F:       drivers/dma/at_hdmac.c
 F:     drivers/dma/at_hdmac_regs.h
 F:     include/linux/platform_data/dma-atmel.h
 
+MICROCHIP / ATMEL ECC DRIVER
+M:     Tudor Ambarus <tudor.ambarus@microchip.com>
+L:     linux-crypto@vger.kernel.org
+S:     Maintained
+F:     drivers/crypto/atmel-ecc.*
+
 MICROCHIP / ATMEL ISC DRIVER
 M:     Songjun Wu <songjun.wu@microchip.com>
 L:     linux-media@vger.kernel.org
index b9adedcc5b2e2d58b43be91b803f7ed8a61ed8a4..ec72752d5668eb3bbe1e013ecf5f2b9f9406b69a 100644 (file)
@@ -94,14 +94,15 @@ config CRYPTO_AES_ARM_CE
          ARMv8 Crypto Extensions
 
 config CRYPTO_GHASH_ARM_CE
-       tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
+       tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
        depends on KERNEL_MODE_NEON
        select CRYPTO_HASH
        select CRYPTO_CRYPTD
        help
          Use an implementation of GHASH (used by the GCM AEAD chaining mode)
          that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
-         that is part of the ARMv8 Crypto Extensions
+         that is part of the ARMv8 Crypto Extensions, or a slower variant that
+         uses the vmull.p8 instruction that is part of the basic NEON ISA.
 
 config CRYPTO_CRCT10DIF_ARM_CE
        tristate "CRCT10DIF digest algorithm using PMULL instructions"
index 0f966a8ca1cef5f9b122530652d38b9a784e7876..d0a9cec73707b7be885b509e5befe393e90b902f 100644 (file)
@@ -285,9 +285,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
                ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
                                   num_rounds(ctx), blocks, walk.iv);
-               if (tdst != tsrc)
-                       memcpy(tdst, tsrc, nbytes);
-               crypto_xor(tdst, tail, nbytes);
+               crypto_xor_cpy(tdst, tsrc, tail, nbytes);
                err = skcipher_walk_done(&walk, 0);
        }
        kernel_neon_end();
index c817a86c4ca89e1472a15b0ede4ba6e654b4c399..54b384084637b7e574a1d15a52d16ffc572b815e 100644 (file)
@@ -10,6 +10,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/cache.h>
 
        .text
        .align          5
        .endif
        .endm
 
-       .macro          __load, out, in, idx
+       .macro          __load, out, in, idx, sz, op
        .if             __LINUX_ARM_ARCH__ < 7 && \idx > 0
-       ldr             \out, [ttab, \in, lsr #(8 * \idx) - 2]
+       ldr\op          \out, [ttab, \in, lsr #(8 * \idx) - \sz]
        .else
-       ldr             \out, [ttab, \in, lsl #2]
+       ldr\op          \out, [ttab, \in, lsl #\sz]
        .endif
        .endm
 
-       .macro          __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
+       .macro          __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
        __select        \out0, \in0, 0
        __select        t0, \in1, 1
-       __load          \out0, \out0, 0
-       __load          t0, t0, 1
+       __load          \out0, \out0, 0, \sz, \op
+       __load          t0, t0, 1, \sz, \op
 
        .if             \enc
        __select        \out1, \in1, 0
        __select        \out1, \in3, 0
        __select        t1, \in0, 1
        .endif
-       __load          \out1, \out1, 0
+       __load          \out1, \out1, 0, \sz, \op
        __select        t2, \in2, 2
-       __load          t1, t1, 1
-       __load          t2, t2, 2
+       __load          t1, t1, 1, \sz, \op
+       __load          t2, t2, 2, \sz, \op
 
        eor             \out0, \out0, t0, ror #24
 
@@ -68,9 +69,9 @@
        __select        \t3, \in1, 2
        __select        \t4, \in2, 3
        .endif
-       __load          \t3, \t3, 2
-       __load          t0, t0, 3
-       __load          \t4, \t4, 3
+       __load          \t3, \t3, 2, \sz, \op
+       __load          t0, t0, 3, \sz, \op
+       __load          \t4, \t4, 3, \sz, \op
 
        eor             \out1, \out1, t1, ror #24
        eor             \out0, \out0, t2, ror #16
        eor             \out1, \out1, t2
        .endm
 
-       .macro          fround, out0, out1, out2, out3, in0, in1, in2, in3
-       __hround        \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
-       __hround        \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+       .macro          fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+       __hround        \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+       __hround        \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
        .endm
 
-       .macro          iround, out0, out1, out2, out3, in0, in1, in2, in3
-       __hround        \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
-       __hround        \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+       .macro          iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+       __hround        \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+       __hround        \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
        .endm
 
        .macro          __rev, out, in
        .endif
        .endm
 
-       .macro          do_crypt, round, ttab, ltab
+       .macro          do_crypt, round, ttab, ltab, bsz
        push            {r3-r11, lr}
 
        ldr             r4, [in]
 
 1:     subs            rounds, rounds, #4
        \round          r8, r9, r10, r11, r4, r5, r6, r7
-       __adrl          ttab, \ltab, ls
+       bls             2f
        \round          r4, r5, r6, r7, r8, r9, r10, r11
-       bhi             0b
+       b               0b
+
+2:     __adrl          ttab, \ltab
+       \round          r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
        __rev           r4, r4
        .ltorg
        .endm
 
+       .align          L1_CACHE_SHIFT
+       .type           __aes_arm_inverse_sbox, %object
+__aes_arm_inverse_sbox:
+       .byte           0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+       .byte           0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+       .byte           0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+       .byte           0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+       .byte           0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+       .byte           0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+       .byte           0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+       .byte           0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+       .byte           0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+       .byte           0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+       .byte           0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+       .byte           0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+       .byte           0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+       .byte           0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+       .byte           0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+       .byte           0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+       .byte           0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+       .byte           0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+       .byte           0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+       .byte           0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+       .byte           0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+       .byte           0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+       .byte           0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+       .byte           0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+       .byte           0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+       .byte           0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+       .byte           0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+       .byte           0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+       .byte           0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+       .byte           0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+       .byte           0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+       .byte           0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+       .size           __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
+
 ENTRY(__aes_arm_encrypt)
-       do_crypt        fround, crypto_ft_tab, crypto_fl_tab
+       do_crypt        fround, crypto_ft_tab, crypto_ft_tab + 1, 2
 ENDPROC(__aes_arm_encrypt)
 
+       .align          5
 ENTRY(__aes_arm_decrypt)
-       do_crypt        iround, crypto_it_tab, crypto_il_tab
+       do_crypt        iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
 ENDPROC(__aes_arm_decrypt)
index c763779614442c22cbb303b9d095d30d470092ed..18768f3304495157f1ccdc411901f3ebdb4dd635 100644 (file)
@@ -221,9 +221,8 @@ static int ctr_encrypt(struct skcipher_request *req)
                        u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
                        u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 
-                       if (dst != src)
-                               memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
-                       crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+                       crypto_xor_cpy(dst, src, final,
+                                      walk.total % AES_BLOCK_SIZE);
 
                        err = skcipher_walk_done(&walk, 0);
                        break;
index f6ab8bcc9efe7f1b8e11b1b42f2530a55dd25b1b..2f78c10b188152f80409869a5062c63a751b9442 100644 (file)
@@ -1,7 +1,7 @@
 /*
- * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
  *
- * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
 #include <asm/assembler.h>
 
        SHASH           .req    q0
-       SHASH2          .req    q1
-       T1              .req    q2
-       T2              .req    q3
-       MASK            .req    q4
-       XL              .req    q5
-       XM              .req    q6
-       XH              .req    q7
-       IN1             .req    q7
+       T1              .req    q1
+       XL              .req    q2
+       XM              .req    q3
+       XH              .req    q4
+       IN1             .req    q4
 
        SHASH_L         .req    d0
        SHASH_H         .req    d1
-       SHASH2_L        .req    d2
-       T1_L            .req    d4
-       MASK_L          .req    d8
-       XL_L            .req    d10
-       XL_H            .req    d11
-       XM_L            .req    d12
-       XM_H            .req    d13
-       XH_L            .req    d14
+       T1_L            .req    d2
+       T1_H            .req    d3
+       XL_L            .req    d4
+       XL_H            .req    d5
+       XM_L            .req    d6
+       XM_H            .req    d7
+       XH_L            .req    d8
+
+       t0l             .req    d10
+       t0h             .req    d11
+       t1l             .req    d12
+       t1h             .req    d13
+       t2l             .req    d14
+       t2h             .req    d15
+       t3l             .req    d16
+       t3h             .req    d17
+       t4l             .req    d18
+       t4h             .req    d19
+
+       t0q             .req    q5
+       t1q             .req    q6
+       t2q             .req    q7
+       t3q             .req    q8
+       t4q             .req    q9
+       T2              .req    q9
+
+       s1l             .req    d20
+       s1h             .req    d21
+       s2l             .req    d22
+       s2h             .req    d23
+       s3l             .req    d24
+       s3h             .req    d25
+       s4l             .req    d26
+       s4h             .req    d27
+
+       MASK            .req    d28
+       SHASH2_p8       .req    d28
+
+       k16             .req    d29
+       k32             .req    d30
+       k48             .req    d31
+       SHASH2_p64      .req    d31
 
        .text
        .fpu            crypto-neon-fp-armv8
 
+       .macro          __pmull_p64, rd, rn, rm, b1, b2, b3, b4
+       vmull.p64       \rd, \rn, \rm
+       .endm
+
        /*
-        * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-        *                         struct ghash_key const *k, const char *head)
+        * This implementation of 64x64 -> 128 bit polynomial multiplication
+        * using vmull.p8 instructions (8x8 -> 16) is taken from the paper
+        * "Fast Software Polynomial Multiplication on ARM Processors Using
+        * the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
+        * Ricardo Dahab (https://hal.inria.fr/hal-01506572)
+        *
+        * It has been slightly tweaked for in-order performance, and to allow
+        * 'rq' to overlap with 'ad' or 'bd'.
         */
-ENTRY(pmull_ghash_update)
-       vld1.64         {SHASH}, [r3]
+       .macro          __pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l
+       vext.8          t0l, \ad, \ad, #1       @ A1
+       .ifc            \b1, t4l
+       vext.8          t4l, \bd, \bd, #1       @ B1
+       .endif
+       vmull.p8        t0q, t0l, \bd           @ F = A1*B
+       vext.8          t1l, \ad, \ad, #2       @ A2
+       vmull.p8        t4q, \ad, \b1           @ E = A*B1
+       .ifc            \b2, t3l
+       vext.8          t3l, \bd, \bd, #2       @ B2
+       .endif
+       vmull.p8        t1q, t1l, \bd           @ H = A2*B
+       vext.8          t2l, \ad, \ad, #3       @ A3
+       vmull.p8        t3q, \ad, \b2           @ G = A*B2
+       veor            t0q, t0q, t4q           @ L = E + F
+       .ifc            \b3, t4l
+       vext.8          t4l, \bd, \bd, #3       @ B3
+       .endif
+       vmull.p8        t2q, t2l, \bd           @ J = A3*B
+       veor            t0l, t0l, t0h           @ t0 = (L) (P0 + P1) << 8
+       veor            t1q, t1q, t3q           @ M = G + H
+       .ifc            \b4, t3l
+       vext.8          t3l, \bd, \bd, #4       @ B4
+       .endif
+       vmull.p8        t4q, \ad, \b3           @ I = A*B3
+       veor            t1l, t1l, t1h           @ t1 = (M) (P2 + P3) << 16
+       vmull.p8        t3q, \ad, \b4           @ K = A*B4
+       vand            t0h, t0h, k48
+       vand            t1h, t1h, k32
+       veor            t2q, t2q, t4q           @ N = I + J
+       veor            t0l, t0l, t0h
+       veor            t1l, t1l, t1h
+       veor            t2l, t2l, t2h           @ t2 = (N) (P4 + P5) << 24
+       vand            t2h, t2h, k16
+       veor            t3l, t3l, t3h           @ t3 = (K) (P6 + P7) << 32
+       vmov.i64        t3h, #0
+       vext.8          t0q, t0q, t0q, #15
+       veor            t2l, t2l, t2h
+       vext.8          t1q, t1q, t1q, #14
+       vmull.p8        \rq, \ad, \bd           @ D = A*B
+       vext.8          t2q, t2q, t2q, #13
+       vext.8          t3q, t3q, t3q, #12
+       veor            t0q, t0q, t1q
+       veor            t2q, t2q, t3q
+       veor            \rq, \rq, t0q
+       veor            \rq, \rq, t2q
+       .endm
+
+       //
+       // PMULL (64x64->128) based reduction for CPUs that can do
+       // it in a single instruction.
+       //
+       .macro          __pmull_reduce_p64
+       vmull.p64       T1, XL_L, MASK
+
+       veor            XH_L, XH_L, XM_H
+       vext.8          T1, T1, T1, #8
+       veor            XL_H, XL_H, XM_L
+       veor            T1, T1, XL
+
+       vmull.p64       XL, T1_H, MASK
+       .endm
+
+       //
+       // Alternative reduction for CPUs that lack support for the
+       // 64x64->128 PMULL instruction
+       //
+       .macro          __pmull_reduce_p8
+       veor            XL_H, XL_H, XM_L
+       veor            XH_L, XH_L, XM_H
+
+       vshl.i64        T1, XL, #57
+       vshl.i64        T2, XL, #62
+       veor            T1, T1, T2
+       vshl.i64        T2, XL, #63
+       veor            T1, T1, T2
+       veor            XL_H, XL_H, T1_L
+       veor            XH_L, XH_L, T1_H
+
+       vshr.u64        T1, XL, #1
+       veor            XH, XH, XL
+       veor            XL, XL, T1
+       vshr.u64        T1, T1, #6
+       vshr.u64        XL, XL, #1
+       .endm
+
+       .macro          ghash_update, pn
        vld1.64         {XL}, [r1]
-       vmov.i8         MASK, #0xe1
-       vext.8          SHASH2, SHASH, SHASH, #8
-       vshl.u64        MASK, MASK, #57
-       veor            SHASH2, SHASH2, SHASH
 
        /* do the head block first, if supplied */
        ldr             ip, [sp]
@@ -62,33 +184,59 @@ ENTRY(pmull_ghash_update)
 #ifndef CONFIG_CPU_BIG_ENDIAN
        vrev64.8        T1, T1
 #endif
-       vext.8          T2, XL, XL, #8
        vext.8          IN1, T1, T1, #8
-       veor            T1, T1, T2
+       veor            T1_L, T1_L, XL_H
        veor            XL, XL, IN1
 
-       vmull.p64       XH, SHASH_H, XL_H               @ a1 * b1
+       __pmull_\pn     XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h   @ a1 * b1
        veor            T1, T1, XL
-       vmull.p64       XL, SHASH_L, XL_L               @ a0 * b0
-       vmull.p64       XM, SHASH2_L, T1_L              @ (a1 + a0)(b1 + b0)
+       __pmull_\pn     XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l   @ a0 * b0
+       __pmull_\pn     XM, T1_L, SHASH2_\pn                    @ (a1+a0)(b1+b0)
 
-       vext.8          T1, XL, XH, #8
-       veor            T2, XL, XH
+       veor            T1, XL, XH
        veor            XM, XM, T1
-       veor            XM, XM, T2
-       vmull.p64       T2, XL_L, MASK_L
 
-       vmov            XH_L, XM_H
-       vmov            XM_H, XL_L
+       __pmull_reduce_\pn
 
-       veor            XL, XM, T2
-       vext.8          T2, XL, XL, #8
-       vmull.p64       XL, XL_L, MASK_L
-       veor            T2, T2, XH
-       veor            XL, XL, T2
+       veor            T1, T1, XH
+       veor            XL, XL, T1
 
        bne             0b
 
        vst1.64         {XL}, [r1]
        bx              lr
-ENDPROC(pmull_ghash_update)
+       .endm
+
+       /*
+        * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+        *                         struct ghash_key const *k, const char *head)
+        */
+ENTRY(pmull_ghash_update_p64)
+       vld1.64         {SHASH}, [r3]
+       veor            SHASH2_p64, SHASH_L, SHASH_H
+
+       vmov.i8         MASK, #0xe1
+       vshl.u64        MASK, MASK, #57
+
+       ghash_update    p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+       vld1.64         {SHASH}, [r3]
+       veor            SHASH2_p8, SHASH_L, SHASH_H
+
+       vext.8          s1l, SHASH_L, SHASH_L, #1
+       vext.8          s2l, SHASH_L, SHASH_L, #2
+       vext.8          s3l, SHASH_L, SHASH_L, #3
+       vext.8          s4l, SHASH_L, SHASH_L, #4
+       vext.8          s1h, SHASH_H, SHASH_H, #1
+       vext.8          s2h, SHASH_H, SHASH_H, #2
+       vext.8          s3h, SHASH_H, SHASH_H, #3
+       vext.8          s4h, SHASH_H, SHASH_H, #4
+
+       vmov.i64        k16, #0xffff
+       vmov.i64        k32, #0xffffffff
+       vmov.i64        k48, #0xffffffffffff
+
+       ghash_update    p8
+ENDPROC(pmull_ghash_update_p8)
index 6bac8bea9f1e8e95a9ca80892bcdf03b02b8f48b..d9bb52cae2ac9246fab3626bc5d8438639e97fbc 100644 (file)
@@ -22,6 +22,7 @@
 MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
 
 #define GHASH_BLOCK_SIZE       16
 #define GHASH_DIGEST_SIZE      16
@@ -41,8 +42,17 @@ struct ghash_async_ctx {
        struct cryptd_ahash *cryptd_tfm;
 };
 
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-                                  struct ghash_key const *k, const char *head);
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+                                      struct ghash_key const *k,
+                                      const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+                                     struct ghash_key const *k,
+                                     const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+                                 struct ghash_key const *k,
+                                 const char *head);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -312,6 +322,14 @@ static int __init ghash_ce_mod_init(void)
 {
        int err;
 
+       if (!(elf_hwcap & HWCAP_NEON))
+               return -ENODEV;
+
+       if (elf_hwcap2 & HWCAP2_PMULL)
+               pmull_ghash_update = pmull_ghash_update_p64;
+       else
+               pmull_ghash_update = pmull_ghash_update_p8;
+
        err = crypto_register_shash(&ghash_alg);
        if (err)
                return err;
@@ -332,5 +350,5 @@ static void __exit ghash_ce_mod_exit(void)
        crypto_unregister_shash(&ghash_alg);
 }
 
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_init(ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);
index d92293747d63149f8101ff1290044f8283ab0b24..7ca54a76f6b9f1aabd8aaa52f313930702b718fc 100644 (file)
@@ -18,18 +18,23 @@ config CRYPTO_SHA512_ARM64
 
 config CRYPTO_SHA1_ARM64_CE
        tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
-       depends on ARM64 && KERNEL_MODE_NEON
+       depends on KERNEL_MODE_NEON
        select CRYPTO_HASH
+       select CRYPTO_SHA1
 
 config CRYPTO_SHA2_ARM64_CE
        tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
-       depends on ARM64 && KERNEL_MODE_NEON
+       depends on KERNEL_MODE_NEON
        select CRYPTO_HASH
+       select CRYPTO_SHA256_ARM64
 
 config CRYPTO_GHASH_ARM64_CE
-       tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
-       depends on ARM64 && KERNEL_MODE_NEON
+       tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
+       depends on KERNEL_MODE_NEON
        select CRYPTO_HASH
+       select CRYPTO_GF128MUL
+       select CRYPTO_AES
+       select CRYPTO_AES_ARM64
 
 config CRYPTO_CRCT10DIF_ARM64_CE
        tristate "CRCT10DIF digest algorithm using PMULL instructions"
@@ -49,25 +54,29 @@ config CRYPTO_AES_ARM64_CE
        tristate "AES core cipher using ARMv8 Crypto Extensions"
        depends on ARM64 && KERNEL_MODE_NEON
        select CRYPTO_ALGAPI
+       select CRYPTO_AES_ARM64
 
 config CRYPTO_AES_ARM64_CE_CCM
        tristate "AES in CCM mode using ARMv8 Crypto Extensions"
        depends on ARM64 && KERNEL_MODE_NEON
        select CRYPTO_ALGAPI
        select CRYPTO_AES_ARM64_CE
+       select CRYPTO_AES_ARM64
        select CRYPTO_AEAD
 
 config CRYPTO_AES_ARM64_CE_BLK
        tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
-       depends on ARM64 && KERNEL_MODE_NEON
+       depends on KERNEL_MODE_NEON
        select CRYPTO_BLKCIPHER
        select CRYPTO_AES_ARM64_CE
+       select CRYPTO_AES_ARM64
        select CRYPTO_SIMD
 
 config CRYPTO_AES_ARM64_NEON_BLK
        tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
-       depends on ARM64 && KERNEL_MODE_NEON
+       depends on KERNEL_MODE_NEON
        select CRYPTO_BLKCIPHER
+       select CRYPTO_AES_ARM64
        select CRYPTO_AES
        select CRYPTO_SIMD
 
@@ -82,6 +91,7 @@ config CRYPTO_AES_ARM64_BS
        depends on KERNEL_MODE_NEON
        select CRYPTO_BLKCIPHER
        select CRYPTO_AES_ARM64_NEON_BLK
+       select CRYPTO_AES_ARM64
        select CRYPTO_SIMD
 
 endif
index 3363560c79b7e69376ac4a50a5401456640a35af..e3a375c4cb83c383242ac6b9cc8b3247939e0947 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -32,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
        beq     8f                              /* out of input? */
        cbnz    w8, 0b
        eor     v0.16b, v0.16b, v1.16b
-1:     ld1     {v3.16b}, [x4]                  /* load first round key */
+1:     ld1     {v3.4s}, [x4]                   /* load first round key */
        prfm    pldl1strm, [x1]
        cmp     w5, #12                         /* which key size? */
        add     x6, x4, #16
@@ -42,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
        mov     v5.16b, v3.16b
        b       4f
 2:     mov     v4.16b, v3.16b
-       ld1     {v5.16b}, [x6], #16             /* load 2nd round key */
+       ld1     {v5.4s}, [x6], #16              /* load 2nd round key */
 3:     aese    v0.16b, v4.16b
        aesmc   v0.16b, v0.16b
-4:     ld1     {v3.16b}, [x6], #16             /* load next round key */
+4:     ld1     {v3.4s}, [x6], #16              /* load next round key */
        aese    v0.16b, v5.16b
        aesmc   v0.16b, v0.16b
-5:     ld1     {v4.16b}, [x6], #16             /* load next round key */
+5:     ld1     {v4.4s}, [x6], #16              /* load next round key */
        subs    w7, w7, #3
        aese    v0.16b, v3.16b
        aesmc   v0.16b, v0.16b
-       ld1     {v5.16b}, [x6], #16             /* load next round key */
+       ld1     {v5.4s}, [x6], #16              /* load next round key */
        bpl     3b
        aese    v0.16b, v4.16b
        subs    w2, w2, #16                     /* last data? */
@@ -90,7 +90,7 @@ ENDPROC(ce_aes_ccm_auth_data)
         *                       u32 rounds);
         */
 ENTRY(ce_aes_ccm_final)
-       ld1     {v3.16b}, [x2], #16             /* load first round key */
+       ld1     {v3.4s}, [x2], #16              /* load first round key */
        ld1     {v0.16b}, [x0]                  /* load mac */
        cmp     w3, #12                         /* which key size? */
        sub     w3, w3, #2                      /* modified # of rounds */
@@ -100,17 +100,17 @@ ENTRY(ce_aes_ccm_final)
        mov     v5.16b, v3.16b
        b       2f
 0:     mov     v4.16b, v3.16b
-1:     ld1     {v5.16b}, [x2], #16             /* load next round key */
+1:     ld1     {v5.4s}, [x2], #16              /* load next round key */
        aese    v0.16b, v4.16b
        aesmc   v0.16b, v0.16b
        aese    v1.16b, v4.16b
        aesmc   v1.16b, v1.16b
-2:     ld1     {v3.16b}, [x2], #16             /* load next round key */
+2:     ld1     {v3.4s}, [x2], #16              /* load next round key */
        aese    v0.16b, v5.16b
        aesmc   v0.16b, v0.16b
        aese    v1.16b, v5.16b
        aesmc   v1.16b, v1.16b
-3:     ld1     {v4.16b}, [x2], #16             /* load next round key */
+3:     ld1     {v4.4s}, [x2], #16              /* load next round key */
        subs    w3, w3, #3
        aese    v0.16b, v3.16b
        aesmc   v0.16b, v0.16b
@@ -137,31 +137,31 @@ CPU_LE(   rev     x8, x8                  )       /* keep swabbed ctr in reg */
        cmp     w4, #12                         /* which key size? */
        sub     w7, w4, #2                      /* get modified # of rounds */
        ins     v1.d[1], x9                     /* no carry in lower ctr */
-       ld1     {v3.16b}, [x3]                  /* load first round key */
+       ld1     {v3.4s}, [x3]                   /* load first round key */
        add     x10, x3, #16
        bmi     1f
        bne     4f
        mov     v5.16b, v3.16b
        b       3f
 1:     mov     v4.16b, v3.16b
-       ld1     {v5.16b}, [x10], #16            /* load 2nd round key */
+       ld1     {v5.4s}, [x10], #16             /* load 2nd round key */
 2:     /* inner loop: 3 rounds, 2x interleaved */
        aese    v0.16b, v4.16b
        aesmc   v0.16b, v0.16b
        aese    v1.16b, v4.16b
        aesmc   v1.16b, v1.16b
-3:     ld1     {v3.16b}, [x10], #16            /* load next round key */
+3:     ld1     {v3.4s}, [x10], #16             /* load next round key */
        aese    v0.16b, v5.16b
        aesmc   v0.16b, v0.16b
        aese    v1.16b, v5.16b
        aesmc   v1.16b, v1.16b
-4:     ld1     {v4.16b}, [x10], #16            /* load next round key */
+4:     ld1     {v4.4s}, [x10], #16             /* load next round key */
        subs    w7, w7, #3
        aese    v0.16b, v3.16b
        aesmc   v0.16b, v0.16b
        aese    v1.16b, v3.16b
        aesmc   v1.16b, v1.16b
-       ld1     {v5.16b}, [x10], #16            /* load next round key */
+       ld1     {v5.4s}, [x10], #16             /* load next round key */
        bpl     2b
        aese    v0.16b, v4.16b
        aese    v1.16b, v4.16b
index 6a7dbc7c83a61d75c02fbd77ba73ea890f00ae69..a1254036f2b1e0df0e3083dae606a543f914f7d3 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
@@ -44,6 +45,8 @@ asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
 asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
                                 u32 rounds);
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
                      unsigned int key_len)
 {
@@ -103,7 +106,45 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
        return 0;
 }
 
-static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
+                          u32 abytes, u32 *macp, bool use_neon)
+{
+       if (likely(use_neon)) {
+               ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
+                                    num_rounds(key));
+       } else {
+               if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
+                       int added = min(abytes, AES_BLOCK_SIZE - *macp);
+
+                       crypto_xor(&mac[*macp], in, added);
+
+                       *macp += added;
+                       in += added;
+                       abytes -= added;
+               }
+
+               while (abytes > AES_BLOCK_SIZE) {
+                       __aes_arm64_encrypt(key->key_enc, mac, mac,
+                                           num_rounds(key));
+                       crypto_xor(mac, in, AES_BLOCK_SIZE);
+
+                       in += AES_BLOCK_SIZE;
+                       abytes -= AES_BLOCK_SIZE;
+               }
+
+               if (abytes > 0) {
+                       __aes_arm64_encrypt(key->key_enc, mac, mac,
+                                           num_rounds(key));
+                       crypto_xor(mac, in, abytes);
+                       *macp = abytes;
+               } else {
+                       *macp = 0;
+               }
+       }
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
+                                  bool use_neon)
 {
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
        struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
@@ -122,8 +163,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
                ltag.len = 6;
        }
 
-       ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
-                            num_rounds(ctx));
+       ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp, use_neon);
        scatterwalk_start(&walk, req->src);
 
        do {
@@ -135,8 +175,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
                        n = scatterwalk_clamp(&walk, len);
                }
                p = scatterwalk_map(&walk);
-               ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
-                                    num_rounds(ctx));
+               ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
                len -= n;
 
                scatterwalk_unmap(p);
@@ -145,6 +184,56 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
        } while (len);
 }
 
+static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[],
+                             struct crypto_aes_ctx *ctx, bool enc)
+{
+       u8 buf[AES_BLOCK_SIZE];
+       int err = 0;
+
+       while (walk->nbytes) {
+               int blocks = walk->nbytes / AES_BLOCK_SIZE;
+               u32 tail = walk->nbytes % AES_BLOCK_SIZE;
+               u8 *dst = walk->dst.virt.addr;
+               u8 *src = walk->src.virt.addr;
+               u32 nbytes = walk->nbytes;
+
+               if (nbytes == walk->total && tail > 0) {
+                       blocks++;
+                       tail = 0;
+               }
+
+               do {
+                       u32 bsize = AES_BLOCK_SIZE;
+
+                       if (nbytes < AES_BLOCK_SIZE)
+                               bsize = nbytes;
+
+                       crypto_inc(walk->iv, AES_BLOCK_SIZE);
+                       __aes_arm64_encrypt(ctx->key_enc, buf, walk->iv,
+                                           num_rounds(ctx));
+                       __aes_arm64_encrypt(ctx->key_enc, mac, mac,
+                                           num_rounds(ctx));
+                       if (enc)
+                               crypto_xor(mac, src, bsize);
+                       crypto_xor_cpy(dst, src, buf, bsize);
+                       if (!enc)
+                               crypto_xor(mac, dst, bsize);
+                       dst += bsize;
+                       src += bsize;
+                       nbytes -= bsize;
+               } while (--blocks);
+
+               err = skcipher_walk_done(walk, tail);
+       }
+
+       if (!err) {
+               __aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx));
+               __aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx));
+               crypto_xor(mac, buf, AES_BLOCK_SIZE);
+       }
+       return err;
+}
+
 static int ccm_encrypt(struct aead_request *req)
 {
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -153,39 +242,46 @@ static int ccm_encrypt(struct aead_request *req)
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u32 len = req->cryptlen;
+       bool use_neon = may_use_simd();
        int err;
 
        err = ccm_init_mac(req, mac, len);
        if (err)
                return err;
 
-       kernel_neon_begin_partial(6);
+       if (likely(use_neon))
+               kernel_neon_begin();
 
        if (req->assoclen)
-               ccm_calculate_auth_mac(req, mac);
+               ccm_calculate_auth_mac(req, mac, use_neon);
 
        /* preserve the original iv for the final round */
        memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
        err = skcipher_walk_aead_encrypt(&walk, req, true);
 
-       while (walk.nbytes) {
-               u32 tail = walk.nbytes % AES_BLOCK_SIZE;
-
-               if (walk.nbytes == walk.total)
-                       tail = 0;
+       if (likely(use_neon)) {
+               while (walk.nbytes) {
+                       u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-               ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-                                  walk.nbytes - tail, ctx->key_enc,
-                                  num_rounds(ctx), mac, walk.iv);
+                       if (walk.nbytes == walk.total)
+                               tail = 0;
 
-               err = skcipher_walk_done(&walk, tail);
-       }
-       if (!err)
-               ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+                       ce_aes_ccm_encrypt(walk.dst.virt.addr,
+                                          walk.src.virt.addr,
+                                          walk.nbytes - tail, ctx->key_enc,
+                                          num_rounds(ctx), mac, walk.iv);
 
-       kernel_neon_end();
+                       err = skcipher_walk_done(&walk, tail);
+               }
+               if (!err)
+                       ce_aes_ccm_final(mac, buf, ctx->key_enc,
+                                        num_rounds(ctx));
 
+               kernel_neon_end();
+       } else {
+               err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
+       }
        if (err)
                return err;
 
@@ -205,38 +301,46 @@ static int ccm_decrypt(struct aead_request *req)
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u32 len = req->cryptlen - authsize;
+       bool use_neon = may_use_simd();
        int err;
 
        err = ccm_init_mac(req, mac, len);
        if (err)
                return err;
 
-       kernel_neon_begin_partial(6);
+       if (likely(use_neon))
+               kernel_neon_begin();
 
        if (req->assoclen)
-               ccm_calculate_auth_mac(req, mac);
+               ccm_calculate_auth_mac(req, mac, use_neon);
 
        /* preserve the original iv for the final round */
        memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
        err = skcipher_walk_aead_decrypt(&walk, req, true);
 
-       while (walk.nbytes) {
-               u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+       if (likely(use_neon)) {
+               while (walk.nbytes) {
+                       u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-               if (walk.nbytes == walk.total)
-                       tail = 0;
+                       if (walk.nbytes == walk.total)
+                               tail = 0;
 
-               ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-                                  walk.nbytes - tail, ctx->key_enc,
-                                  num_rounds(ctx), mac, walk.iv);
+                       ce_aes_ccm_decrypt(walk.dst.virt.addr,
+                                          walk.src.virt.addr,
+                                          walk.nbytes - tail, ctx->key_enc,
+                                          num_rounds(ctx), mac, walk.iv);
 
-               err = skcipher_walk_done(&walk, tail);
-       }
-       if (!err)
-               ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+                       err = skcipher_walk_done(&walk, tail);
+               }
+               if (!err)
+                       ce_aes_ccm_final(mac, buf, ctx->key_enc,
+                                        num_rounds(ctx));
 
-       kernel_neon_end();
+               kernel_neon_end();
+       } else {
+               err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
+       }
 
        if (err)
                return err;
index 50d9fe11d0c862bbd9cdd75855bfbc0fbffcfea7..6a75cd75ed11c60f828ff96b4ed0f09c4b288c6c 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,8 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
@@ -20,6 +22,9 @@ MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 struct aes_block {
        u8 b[AES_BLOCK_SIZE];
 };
@@ -44,27 +49,32 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
        void *dummy0;
        int dummy1;
 
-       kernel_neon_begin_partial(4);
+       if (!may_use_simd()) {
+               __aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
+               return;
+       }
+
+       kernel_neon_begin();
 
        __asm__("       ld1     {v0.16b}, %[in]                 ;"
-               "       ld1     {v1.16b}, [%[key]], #16         ;"
+               "       ld1     {v1.4s}, [%[key]], #16          ;"
                "       cmp     %w[rounds], #10                 ;"
                "       bmi     0f                              ;"
                "       bne     3f                              ;"
                "       mov     v3.16b, v1.16b                  ;"
                "       b       2f                              ;"
                "0:     mov     v2.16b, v1.16b                  ;"
-               "       ld1     {v3.16b}, [%[key]], #16         ;"
+               "       ld1     {v3.4s}, [%[key]], #16          ;"
                "1:     aese    v0.16b, v2.16b                  ;"
                "       aesmc   v0.16b, v0.16b                  ;"
-               "2:     ld1     {v1.16b}, [%[key]], #16         ;"
+               "2:     ld1     {v1.4s}, [%[key]], #16          ;"
                "       aese    v0.16b, v3.16b                  ;"
                "       aesmc   v0.16b, v0.16b                  ;"
-               "3:     ld1     {v2.16b}, [%[key]], #16         ;"
+               "3:     ld1     {v2.4s}, [%[key]], #16          ;"
                "       subs    %w[rounds], %w[rounds], #3      ;"
                "       aese    v0.16b, v1.16b                  ;"
                "       aesmc   v0.16b, v0.16b                  ;"
-               "       ld1     {v3.16b}, [%[key]], #16         ;"
+               "       ld1     {v3.4s}, [%[key]], #16          ;"
                "       bpl     1b                              ;"
                "       aese    v0.16b, v2.16b                  ;"
                "       eor     v0.16b, v0.16b, v3.16b          ;"
@@ -89,27 +99,32 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
        void *dummy0;
        int dummy1;
 
-       kernel_neon_begin_partial(4);
+       if (!may_use_simd()) {
+               __aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
+               return;
+       }
+
+       kernel_neon_begin();
 
        __asm__("       ld1     {v0.16b}, %[in]                 ;"
-               "       ld1     {v1.16b}, [%[key]], #16         ;"
+               "       ld1     {v1.4s}, [%[key]], #16          ;"
                "       cmp     %w[rounds], #10                 ;"
                "       bmi     0f                              ;"
                "       bne     3f                              ;"
                "       mov     v3.16b, v1.16b                  ;"
                "       b       2f                              ;"
                "0:     mov     v2.16b, v1.16b                  ;"
-               "       ld1     {v3.16b}, [%[key]], #16         ;"
+               "       ld1     {v3.4s}, [%[key]], #16          ;"
                "1:     aesd    v0.16b, v2.16b                  ;"
                "       aesimc  v0.16b, v0.16b                  ;"
-               "2:     ld1     {v1.16b}, [%[key]], #16         ;"
+               "2:     ld1     {v1.4s}, [%[key]], #16          ;"
                "       aesd    v0.16b, v3.16b                  ;"
                "       aesimc  v0.16b, v0.16b                  ;"
-               "3:     ld1     {v2.16b}, [%[key]], #16         ;"
+               "3:     ld1     {v2.4s}, [%[key]], #16          ;"
                "       subs    %w[rounds], %w[rounds], #3      ;"
                "       aesd    v0.16b, v1.16b                  ;"
                "       aesimc  v0.16b, v0.16b                  ;"
-               "       ld1     {v3.16b}, [%[key]], #16         ;"
+               "       ld1     {v3.4s}, [%[key]], #16          ;"
                "       bpl     1b                              ;"
                "       aesd    v0.16b, v2.16b                  ;"
                "       eor     v0.16b, v0.16b, v3.16b          ;"
@@ -165,20 +180,16 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
            key_len != AES_KEYSIZE_256)
                return -EINVAL;
 
-       memcpy(ctx->key_enc, in_key, key_len);
        ctx->key_length = key_len;
+       for (i = 0; i < kwords; i++)
+               ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
 
-       kernel_neon_begin_partial(2);
+       kernel_neon_begin();
        for (i = 0; i < sizeof(rcon); i++) {
                u32 *rki = ctx->key_enc + (i * kwords);
                u32 *rko = rki + kwords;
 
-#ifndef CONFIG_CPU_BIG_ENDIAN
                rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
-#else
-               rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
-                        rki[0];
-#endif
                rko[1] = rko[0] ^ rki[1];
                rko[2] = rko[1] ^ rki[2];
                rko[3] = rko[2] ^ rki[3];
@@ -210,9 +221,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 
        key_dec[0] = key_enc[j];
        for (i = 1, j--; j > 0; i++, j--)
-               __asm__("ld1    {v0.16b}, %[in]         ;"
+               __asm__("ld1    {v0.4s}, %[in]          ;"
                        "aesimc v1.16b, v0.16b          ;"
-                       "st1    {v1.16b}, %[out]        ;"
+                       "st1    {v1.4s}, %[out] ;"
 
                :       [out]   "=Q"(key_dec[i])
                :       [in]    "Q"(key_enc[j])
index b46093d567e5449256dbf3bf1d4981144bdd8739..50330f5c3adc4f6fcac22794ead1af1c8ca956e2 100644 (file)
@@ -2,7 +2,7 @@
  * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
  *                                    Crypto Extensions
  *
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
        cmp             \rounds, #12
        blo             2222f           /* 128 bits */
        beq             1111f           /* 192 bits */
-       ld1             {v17.16b-v18.16b}, [\rk], #32
-1111:  ld1             {v19.16b-v20.16b}, [\rk], #32
-2222:  ld1             {v21.16b-v24.16b}, [\rk], #64
-       ld1             {v25.16b-v28.16b}, [\rk], #64
-       ld1             {v29.16b-v31.16b}, [\rk]
+       ld1             {v17.4s-v18.4s}, [\rk], #32
+1111:  ld1             {v19.4s-v20.4s}, [\rk], #32
+2222:  ld1             {v21.4s-v24.4s}, [\rk], #64
+       ld1             {v25.4s-v28.4s}, [\rk], #64
+       ld1             {v29.4s-v31.4s}, [\rk]
        .endm
 
        /* prepare for encryption with key in rk[] */
index f2f9cc519309c5609996c3a56d85bb5447aaf2b0..6d2445d603cc244e4cd08acf420ae8b8089e5d88 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 
        .text
 
        out             .req    x1
        in              .req    x2
        rounds          .req    x3
-       tt              .req    x4
-       lt              .req    x2
+       tt              .req    x2
 
-       .macro          __pair, enc, reg0, reg1, in0, in1e, in1d, shift
+       .macro          __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
+       .ifc            \op\shift, b0
+       ubfiz           \reg0, \in0, #2, #8
+       ubfiz           \reg1, \in1e, #2, #8
+       .else
        ubfx            \reg0, \in0, #\shift, #8
-       .if             \enc
        ubfx            \reg1, \in1e, #\shift, #8
-       .else
-       ubfx            \reg1, \in1d, #\shift, #8
        .endif
+
+       /*
+        * AArch64 cannot do byte size indexed loads from a table containing
+        * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
+        * valid instruction. So perform the shift explicitly first for the
+        * high bytes (the low byte is shifted implicitly by using ubfiz rather
+        * than ubfx above)
+        */
+       .ifnc           \op, b
        ldr             \reg0, [tt, \reg0, uxtw #2]
        ldr             \reg1, [tt, \reg1, uxtw #2]
+       .else
+       .if             \shift > 0
+       lsl             \reg0, \reg0, #2
+       lsl             \reg1, \reg1, #2
+       .endif
+       ldrb            \reg0, [tt, \reg0, uxtw]
+       ldrb            \reg1, [tt, \reg1, uxtw]
+       .endif
        .endm
 
-       .macro          __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc
+       .macro          __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
+       ubfx            \reg0, \in0, #\shift, #8
+       ubfx            \reg1, \in1d, #\shift, #8
+       ldr\op          \reg0, [tt, \reg0, uxtw #\sz]
+       ldr\op          \reg1, [tt, \reg1, uxtw #\sz]
+       .endm
+
+       .macro          __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
        ldp             \out0, \out1, [rk], #8
 
-       __pair          \enc, w13, w14, \in0, \in1, \in3, 0
-       __pair          \enc, w15, w16, \in1, \in2, \in0, 8
-       __pair          \enc, w17, w18, \in2, \in3, \in1, 16
-       __pair          \enc, \t0, \t1, \in3, \in0, \in2, 24
-
-       eor             \out0, \out0, w13
-       eor             \out1, \out1, w14
-       eor             \out0, \out0, w15, ror #24
-       eor             \out1, \out1, w16, ror #24
-       eor             \out0, \out0, w17, ror #16
-       eor             \out1, \out1, w18, ror #16
+       __pair\enc      \sz, \op, w12, w13, \in0, \in1, \in3, 0
+       __pair\enc      \sz, \op, w14, w15, \in1, \in2, \in0, 8
+       __pair\enc      \sz, \op, w16, w17, \in2, \in3, \in1, 16
+       __pair\enc      \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
+
+       eor             \out0, \out0, w12
+       eor             \out1, \out1, w13
+       eor             \out0, \out0, w14, ror #24
+       eor             \out1, \out1, w15, ror #24
+       eor             \out0, \out0, w16, ror #16
+       eor             \out1, \out1, w17, ror #16
        eor             \out0, \out0, \t0, ror #8
        eor             \out1, \out1, \t1, ror #8
        .endm
 
-       .macro          fround, out0, out1, out2, out3, in0, in1, in2, in3
-       __hround        \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
-       __hround        \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+       .macro          fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+       __hround        \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+       __hround        \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
        .endm
 
-       .macro          iround, out0, out1, out2, out3, in0, in1, in2, in3
-       __hround        \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
-       __hround        \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+       .macro          iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+       __hround        \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+       __hround        \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
        .endm
 
-       .macro          do_crypt, round, ttab, ltab
-       ldp             w5, w6, [in]
-       ldp             w7, w8, [in, #8]
-       ldp             w9, w10, [rk], #16
-       ldp             w11, w12, [rk, #-8]
+       .macro          do_crypt, round, ttab, ltab, bsz
+       ldp             w4, w5, [in]
+       ldp             w6, w7, [in, #8]
+       ldp             w8, w9, [rk], #16
+       ldp             w10, w11, [rk, #-8]
 
+CPU_BE(        rev             w4, w4          )
 CPU_BE(        rev             w5, w5          )
 CPU_BE(        rev             w6, w6          )
 CPU_BE(        rev             w7, w7          )
-CPU_BE(        rev             w8, w8          )
 
+       eor             w4, w4, w8
        eor             w5, w5, w9
        eor             w6, w6, w10
        eor             w7, w7, w11
-       eor             w8, w8, w12
 
        adr_l           tt, \ttab
-       adr_l           lt, \ltab
 
        tbnz            rounds, #1, 1f
 
-0:     \round          w9, w10, w11, w12, w5, w6, w7, w8
-       \round          w5, w6, w7, w8, w9, w10, w11, w12
+0:     \round          w8, w9, w10, w11, w4, w5, w6, w7
+       \round          w4, w5, w6, w7, w8, w9, w10, w11
 
 1:     subs            rounds, rounds, #4
-       \round          w9, w10, w11, w12, w5, w6, w7, w8
-       csel            tt, tt, lt, hi
-       \round          w5, w6, w7, w8, w9, w10, w11, w12
-       b.hi            0b
-
+       \round          w8, w9, w10, w11, w4, w5, w6, w7
+       b.ls            3f
+2:     \round          w4, w5, w6, w7, w8, w9, w10, w11
+       b               0b
+3:     adr_l           tt, \ltab
+       \round          w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
+
+CPU_BE(        rev             w4, w4          )
 CPU_BE(        rev             w5, w5          )
 CPU_BE(        rev             w6, w6          )
 CPU_BE(        rev             w7, w7          )
-CPU_BE(        rev             w8, w8          )
 
-       stp             w5, w6, [out]
-       stp             w7, w8, [out, #8]
+       stp             w4, w5, [out]
+       stp             w6, w7, [out, #8]
        ret
        .endm
 
-       .align          5
+       .align          L1_CACHE_SHIFT
+       .type           __aes_arm64_inverse_sbox, %object
+__aes_arm64_inverse_sbox:
+       .byte           0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+       .byte           0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+       .byte           0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+       .byte           0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+       .byte           0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+       .byte           0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+       .byte           0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+       .byte           0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+       .byte           0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+       .byte           0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+       .byte           0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+       .byte           0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+       .byte           0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+       .byte           0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+       .byte           0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+       .byte           0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+       .byte           0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+       .byte           0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+       .byte           0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+       .byte           0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+       .byte           0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+       .byte           0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+       .byte           0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+       .byte           0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+       .byte           0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+       .byte           0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+       .byte           0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+       .byte           0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+       .byte           0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+       .byte           0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+       .byte           0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+       .byte           0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+       .size           __aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
+
 ENTRY(__aes_arm64_encrypt)
-       do_crypt        fround, crypto_ft_tab, crypto_fl_tab
+       do_crypt        fround, crypto_ft_tab, crypto_ft_tab + 1, 2
 ENDPROC(__aes_arm64_encrypt)
 
        .align          5
 ENTRY(__aes_arm64_decrypt)
-       do_crypt        iround, crypto_it_tab, crypto_il_tab
+       do_crypt        iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
 ENDPROC(__aes_arm64_decrypt)
diff --git a/arch/arm64/crypto/aes-ctr-fallback.h b/arch/arm64/crypto/aes-ctr-fallback.h
new file mode 100644 (file)
index 0000000..c928571
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Fallback for sync aes(ctr) in contexts where kernel mode NEON
+ * is not allowed
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
+static inline int aes_ctr_encrypt_fallback(struct crypto_aes_ctx *ctx,
+                                          struct skcipher_request *req)
+{
+       struct skcipher_walk walk;
+       u8 buf[AES_BLOCK_SIZE];
+       int err;
+
+       err = skcipher_walk_virt(&walk, req, true);
+
+       while (walk.nbytes > 0) {
+               u8 *dst = walk.dst.virt.addr;
+               u8 *src = walk.src.virt.addr;
+               int nbytes = walk.nbytes;
+               int tail = 0;
+
+               if (nbytes < walk.total) {
+                       nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+                       tail = walk.nbytes % AES_BLOCK_SIZE;
+               }
+
+               do {
+                       int bsize = min(nbytes, AES_BLOCK_SIZE);
+
+                       __aes_arm64_encrypt(ctx->key_enc, buf, walk.iv,
+                                           6 + ctx->key_length / 4);
+                       crypto_xor_cpy(dst, src, buf, bsize);
+                       crypto_inc(walk.iv, AES_BLOCK_SIZE);
+
+                       dst += AES_BLOCK_SIZE;
+                       src += AES_BLOCK_SIZE;
+                       nbytes -= AES_BLOCK_SIZE;
+               } while (nbytes > 0);
+
+               err = skcipher_walk_done(&walk, tail);
+       }
+       return err;
+}
index bcf596b0197ef31cfffa60e04c69cd652f5eac09..998ba519a02666d51ef518f3533b649de1c371ac 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <asm/neon.h>
 #include <asm/hwcap.h>
+#include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
@@ -19,6 +20,7 @@
 #include <crypto/xts.h>
 
 #include "aes-ce-setkey.h"
+#include "aes-ctr-fallback.h"
 
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 #define MODE                   "ce"
@@ -241,9 +243,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
                aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
                                blocks, walk.iv, first);
-               if (tdst != tsrc)
-                       memcpy(tdst, tsrc, nbytes);
-               crypto_xor(tdst, tail, nbytes);
+               crypto_xor_cpy(tdst, tsrc, tail, nbytes);
                err = skcipher_walk_done(&walk, 0);
        }
        kernel_neon_end();
@@ -251,6 +251,17 @@ static int ctr_encrypt(struct skcipher_request *req)
        return err;
 }
 
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+       if (!may_use_simd())
+               return aes_ctr_encrypt_fallback(ctx, req);
+
+       return ctr_encrypt(req);
+}
+
 static int xts_encrypt(struct skcipher_request *req)
 {
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -357,8 +368,8 @@ static struct skcipher_alg aes_algs[] = { {
        .ivsize         = AES_BLOCK_SIZE,
        .chunksize      = AES_BLOCK_SIZE,
        .setkey         = skcipher_aes_setkey,
-       .encrypt        = ctr_encrypt,
-       .decrypt        = ctr_encrypt,
+       .encrypt        = ctr_encrypt_sync,
+       .decrypt        = ctr_encrypt_sync,
 }, {
        .base = {
                .cra_name               = "__xts(aes)",
@@ -460,11 +471,35 @@ static int mac_init(struct shash_desc *desc)
        return 0;
 }
 
+static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
+                         u8 dg[], int enc_before, int enc_after)
+{
+       int rounds = 6 + ctx->key_length / 4;
+
+       if (may_use_simd()) {
+               kernel_neon_begin();
+               aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before,
+                              enc_after);
+               kernel_neon_end();
+       } else {
+               if (enc_before)
+                       __aes_arm64_encrypt(ctx->key_enc, dg, dg, rounds);
+
+               while (blocks--) {
+                       crypto_xor(dg, in, AES_BLOCK_SIZE);
+                       in += AES_BLOCK_SIZE;
+
+                       if (blocks || enc_after)
+                               __aes_arm64_encrypt(ctx->key_enc, dg, dg,
+                                                   rounds);
+               }
+       }
+}
+
 static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 {
        struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
        struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-       int rounds = 6 + tctx->key.key_length / 4;
 
        while (len > 0) {
                unsigned int l;
@@ -476,10 +511,8 @@ static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 
                        len %= AES_BLOCK_SIZE;
 
-                       kernel_neon_begin();
-                       aes_mac_update(p, tctx->key.key_enc, rounds, blocks,
-                                      ctx->dg, (ctx->len != 0), (len != 0));
-                       kernel_neon_end();
+                       mac_do_update(&tctx->key, p, blocks, ctx->dg,
+                                     (ctx->len != 0), (len != 0));
 
                        p += blocks * AES_BLOCK_SIZE;
 
@@ -507,11 +540,8 @@ static int cbcmac_final(struct shash_desc *desc, u8 *out)
 {
        struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
        struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-       int rounds = 6 + tctx->key.key_length / 4;
 
-       kernel_neon_begin();
-       aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0);
-       kernel_neon_end();
+       mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1, 0);
 
        memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 
@@ -522,7 +552,6 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
 {
        struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
        struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-       int rounds = 6 + tctx->key.key_length / 4;
        u8 *consts = tctx->consts;
 
        if (ctx->len != AES_BLOCK_SIZE) {
@@ -530,9 +559,7 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
                consts += AES_BLOCK_SIZE;
        }
 
-       kernel_neon_begin();
-       aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1);
-       kernel_neon_end();
+       mac_do_update(&tctx->key, consts, 1, ctx->dg, 0, 1);
 
        memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 
index db2501d93550c35720c3d1adeb78fed49a9df1a2..c55d68ccb89f804a7db201ae23abe49ace0a1188 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Bit sliced AES using NEON instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,12 +9,15 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
 #include <linux/module.h>
 
+#include "aes-ctr-fallback.h"
+
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
@@ -58,6 +61,11 @@ struct aesbs_cbc_ctx {
        u32                     enc[AES_MAX_KEYLENGTH_U32];
 };
 
+struct aesbs_ctr_ctx {
+       struct aesbs_ctx        key;            /* must be first member */
+       struct crypto_aes_ctx   fallback;
+};
+
 struct aesbs_xts_ctx {
        struct aesbs_ctx        key;
        u32                     twkey[AES_MAX_KEYLENGTH_U32];
@@ -196,6 +204,25 @@ static int cbc_decrypt(struct skcipher_request *req)
        return err;
 }
 
+static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
+                                unsigned int key_len)
+{
+       struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+       int err;
+
+       err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len);
+       if (err)
+               return err;
+
+       ctx->key.rounds = 6 + key_len / 4;
+
+       kernel_neon_begin();
+       aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
+       kernel_neon_end();
+
+       return 0;
+}
+
 static int ctr_encrypt(struct skcipher_request *req)
 {
        struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -224,9 +251,8 @@ static int ctr_encrypt(struct skcipher_request *req)
                        u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
                        u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 
-                       if (dst != src)
-                               memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
-                       crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+                       crypto_xor_cpy(dst, src, final,
+                                      walk.total % AES_BLOCK_SIZE);
 
                        err = skcipher_walk_done(&walk, 0);
                        break;
@@ -260,6 +286,17 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
        return aesbs_setkey(tfm, in_key, key_len);
 }
 
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+       if (!may_use_simd())
+               return aes_ctr_encrypt_fallback(&ctx->fallback, req);
+
+       return ctr_encrypt(req);
+}
+
 static int __xts_crypt(struct skcipher_request *req,
                       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
                                  int rounds, int blocks, u8 iv[]))
@@ -356,7 +393,7 @@ static struct skcipher_alg aes_algs[] = { {
        .base.cra_driver_name   = "ctr-aes-neonbs",
        .base.cra_priority      = 250 - 1,
        .base.cra_blocksize     = 1,
-       .base.cra_ctxsize       = sizeof(struct aesbs_ctx),
+       .base.cra_ctxsize       = sizeof(struct aesbs_ctr_ctx),
        .base.cra_module        = THIS_MODULE,
 
        .min_keysize            = AES_MIN_KEY_SIZE,
@@ -364,9 +401,9 @@ static struct skcipher_alg aes_algs[] = { {
        .chunksize              = AES_BLOCK_SIZE,
        .walksize               = 8 * AES_BLOCK_SIZE,
        .ivsize                 = AES_BLOCK_SIZE,
-       .setkey                 = aesbs_setkey,
-       .encrypt                = ctr_encrypt,
-       .decrypt                = ctr_encrypt,
+       .setkey                 = aesbs_ctr_setkey_sync,
+       .encrypt                = ctr_encrypt_sync,
+       .decrypt                = ctr_encrypt_sync,
 }, {
        .base.cra_name          = "__xts(aes)",
        .base.cra_driver_name   = "__xts-aes-neonbs",
index a7cd575ea223348e6d2fa99dea5028e7202d04b6..cbdb75d15cd031596c84bdb3959f17910fea08f2 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
  *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -26,6 +26,7 @@
 
 #include <asm/hwcap.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 
 asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
 asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
@@ -64,7 +65,7 @@ static int chacha20_neon(struct skcipher_request *req)
        u32 state[16];
        int err;
 
-       if (req->cryptlen <= CHACHA20_BLOCK_SIZE)
+       if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
                return crypto_chacha20_crypt(req);
 
        err = skcipher_walk_virt(&walk, req, true);
index eccb1ae90064106e43d97bcf6b22b7bee1dcc49f..624f4137918ce5563a8e57576f5e0cb5d1ea1cd9 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
 
 #include <asm/hwcap.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 
 #define PMULL_MIN_LEN          64L     /* minimum size of buffer
@@ -105,10 +106,10 @@ static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
                length -= l;
        }
 
-       if (length >= PMULL_MIN_LEN) {
+       if (length >= PMULL_MIN_LEN && may_use_simd()) {
                l = round_down(length, SCALE_F);
 
-               kernel_neon_begin_partial(10);
+               kernel_neon_begin();
                *crc = crc32_pmull_le(data, l, *crc);
                kernel_neon_end();
 
@@ -137,10 +138,10 @@ static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
                length -= l;
        }
 
-       if (length >= PMULL_MIN_LEN) {
+       if (length >= PMULL_MIN_LEN && may_use_simd()) {
                l = round_down(length, SCALE_F);
 
-               kernel_neon_begin_partial(10);
+               kernel_neon_begin();
                *crc = crc32c_pmull_le(data, l, *crc);
                kernel_neon_end();
 
index 60cb590c2590b059007ea0be23e7b4da5836f164..96f0cae4a02258bad9cf63639da45190499d1696 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -18,6 +18,7 @@
 #include <crypto/internal/hash.h>
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 
 #define CRC_T10DIF_PMULL_CHUNK_SIZE    16U
 
@@ -48,9 +49,13 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data,
        }
 
        if (length > 0) {
-               kernel_neon_begin_partial(14);
-               *crc = crc_t10dif_pmull(*crc, data, length);
-               kernel_neon_end();
+               if (may_use_simd()) {
+                       kernel_neon_begin();
+                       *crc = crc_t10dif_pmull(*crc, data, length);
+                       kernel_neon_end();
+               } else {
+                       *crc = crc_t10dif_generic(*crc, data, length);
+               }
        }
 
        return 0;
index f0bb9f0b524fceb8b62ef4be45f57bee3308156e..11ebf1ae248a1b9b9ff9a61ddaa23a439b2cc359 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  *
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-       SHASH   .req    v0
-       SHASH2  .req    v1
-       T1      .req    v2
-       T2      .req    v3
-       MASK    .req    v4
-       XL      .req    v5
-       XM      .req    v6
-       XH      .req    v7
-       IN1     .req    v7
+       SHASH           .req    v0
+       SHASH2          .req    v1
+       T1              .req    v2
+       T2              .req    v3
+       MASK            .req    v4
+       XL              .req    v5
+       XM              .req    v6
+       XH              .req    v7
+       IN1             .req    v7
+
+       k00_16          .req    v8
+       k32_48          .req    v9
+
+       t3              .req    v10
+       t4              .req    v11
+       t5              .req    v12
+       t6              .req    v13
+       t7              .req    v14
+       t8              .req    v15
+       t9              .req    v16
+
+       perm1           .req    v17
+       perm2           .req    v18
+       perm3           .req    v19
+
+       sh1             .req    v20
+       sh2             .req    v21
+       sh3             .req    v22
+       sh4             .req    v23
+
+       ss1             .req    v24
+       ss2             .req    v25
+       ss3             .req    v26
+       ss4             .req    v27
 
        .text
        .arch           armv8-a+crypto
 
-       /*
-        * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-        *                         struct ghash_key const *k, const char *head)
-        */
-ENTRY(pmull_ghash_update)
+       .macro          __pmull_p64, rd, rn, rm
+       pmull           \rd\().1q, \rn\().1d, \rm\().1d
+       .endm
+
+       .macro          __pmull2_p64, rd, rn, rm
+       pmull2          \rd\().1q, \rn\().2d, \rm\().2d
+       .endm
+
+       .macro          __pmull_p8, rq, ad, bd
+       ext             t3.8b, \ad\().8b, \ad\().8b, #1         // A1
+       ext             t5.8b, \ad\().8b, \ad\().8b, #2         // A2
+       ext             t7.8b, \ad\().8b, \ad\().8b, #3         // A3
+
+       __pmull_p8_\bd  \rq, \ad
+       .endm
+
+       .macro          __pmull2_p8, rq, ad, bd
+       tbl             t3.16b, {\ad\().16b}, perm1.16b         // A1
+       tbl             t5.16b, {\ad\().16b}, perm2.16b         // A2
+       tbl             t7.16b, {\ad\().16b}, perm3.16b         // A3
+
+       __pmull2_p8_\bd \rq, \ad
+       .endm
+
+       .macro          __pmull_p8_SHASH, rq, ad
+       __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
+       .endm
+
+       .macro          __pmull_p8_SHASH2, rq, ad
+       __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
+       .endm
+
+       .macro          __pmull2_p8_SHASH, rq, ad
+       __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
+       .endm
+
+       .macro          __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
+       pmull\t         t3.8h, t3.\nb, \bd                      // F = A1*B
+       pmull\t         t4.8h, \ad, \b1\().\nb                  // E = A*B1
+       pmull\t         t5.8h, t5.\nb, \bd                      // H = A2*B
+       pmull\t         t6.8h, \ad, \b2\().\nb                  // G = A*B2
+       pmull\t         t7.8h, t7.\nb, \bd                      // J = A3*B
+       pmull\t         t8.8h, \ad, \b3\().\nb                  // I = A*B3
+       pmull\t         t9.8h, \ad, \b4\().\nb                  // K = A*B4
+       pmull\t         \rq\().8h, \ad, \bd                     // D = A*B
+
+       eor             t3.16b, t3.16b, t4.16b                  // L = E + F
+       eor             t5.16b, t5.16b, t6.16b                  // M = G + H
+       eor             t7.16b, t7.16b, t8.16b                  // N = I + J
+
+       uzp1            t4.2d, t3.2d, t5.2d
+       uzp2            t3.2d, t3.2d, t5.2d
+       uzp1            t6.2d, t7.2d, t9.2d
+       uzp2            t7.2d, t7.2d, t9.2d
+
+       // t3 = (L) (P0 + P1) << 8
+       // t5 = (M) (P2 + P3) << 16
+       eor             t4.16b, t4.16b, t3.16b
+       and             t3.16b, t3.16b, k32_48.16b
+
+       // t7 = (N) (P4 + P5) << 24
+       // t9 = (K) (P6 + P7) << 32
+       eor             t6.16b, t6.16b, t7.16b
+       and             t7.16b, t7.16b, k00_16.16b
+
+       eor             t4.16b, t4.16b, t3.16b
+       eor             t6.16b, t6.16b, t7.16b
+
+       zip2            t5.2d, t4.2d, t3.2d
+       zip1            t3.2d, t4.2d, t3.2d
+       zip2            t9.2d, t6.2d, t7.2d
+       zip1            t7.2d, t6.2d, t7.2d
+
+       ext             t3.16b, t3.16b, t3.16b, #15
+       ext             t5.16b, t5.16b, t5.16b, #14
+       ext             t7.16b, t7.16b, t7.16b, #13
+       ext             t9.16b, t9.16b, t9.16b, #12
+
+       eor             t3.16b, t3.16b, t5.16b
+       eor             t7.16b, t7.16b, t9.16b
+       eor             \rq\().16b, \rq\().16b, t3.16b
+       eor             \rq\().16b, \rq\().16b, t7.16b
+       .endm
+
+       .macro          __pmull_pre_p64
+       movi            MASK.16b, #0xe1
+       shl             MASK.2d, MASK.2d, #57
+       .endm
+
+       .macro          __pmull_pre_p8
+       // k00_16 := 0x0000000000000000_000000000000ffff
+       // k32_48 := 0x00000000ffffffff_0000ffffffffffff
+       movi            k32_48.2d, #0xffffffff
+       mov             k32_48.h[2], k32_48.h[0]
+       ushr            k00_16.2d, k32_48.2d, #32
+
+       // prepare the permutation vectors
+       mov_q           x5, 0x080f0e0d0c0b0a09
+       movi            T1.8b, #8
+       dup             perm1.2d, x5
+       eor             perm1.16b, perm1.16b, T1.16b
+       ushr            perm2.2d, perm1.2d, #8
+       ushr            perm3.2d, perm1.2d, #16
+       ushr            T1.2d, perm1.2d, #24
+       sli             perm2.2d, perm1.2d, #56
+       sli             perm3.2d, perm1.2d, #48
+       sli             T1.2d, perm1.2d, #40
+
+       // precompute loop invariants
+       tbl             sh1.16b, {SHASH.16b}, perm1.16b
+       tbl             sh2.16b, {SHASH.16b}, perm2.16b
+       tbl             sh3.16b, {SHASH.16b}, perm3.16b
+       tbl             sh4.16b, {SHASH.16b}, T1.16b
+       ext             ss1.8b, SHASH2.8b, SHASH2.8b, #1
+       ext             ss2.8b, SHASH2.8b, SHASH2.8b, #2
+       ext             ss3.8b, SHASH2.8b, SHASH2.8b, #3
+       ext             ss4.8b, SHASH2.8b, SHASH2.8b, #4
+       .endm
+
+       //
+       // PMULL (64x64->128) based reduction for CPUs that can do
+       // it in a single instruction.
+       //
+       .macro          __pmull_reduce_p64
+       pmull           T2.1q, XL.1d, MASK.1d
+       eor             XM.16b, XM.16b, T1.16b
+
+       mov             XH.d[0], XM.d[1]
+       mov             XM.d[1], XL.d[0]
+
+       eor             XL.16b, XM.16b, T2.16b
+       ext             T2.16b, XL.16b, XL.16b, #8
+       pmull           XL.1q, XL.1d, MASK.1d
+       .endm
+
+       //
+       // Alternative reduction for CPUs that lack support for the
+       // 64x64->128 PMULL instruction
+       //
+       .macro          __pmull_reduce_p8
+       eor             XM.16b, XM.16b, T1.16b
+
+       mov             XL.d[1], XM.d[0]
+       mov             XH.d[0], XM.d[1]
+
+       shl             T1.2d, XL.2d, #57
+       shl             T2.2d, XL.2d, #62
+       eor             T2.16b, T2.16b, T1.16b
+       shl             T1.2d, XL.2d, #63
+       eor             T2.16b, T2.16b, T1.16b
+       ext             T1.16b, XL.16b, XH.16b, #8
+       eor             T2.16b, T2.16b, T1.16b
+
+       mov             XL.d[1], T2.d[0]
+       mov             XH.d[0], T2.d[1]
+
+       ushr            T2.2d, XL.2d, #1
+       eor             XH.16b, XH.16b, XL.16b
+       eor             XL.16b, XL.16b, T2.16b
+       ushr            T2.2d, T2.2d, #6
+       ushr            XL.2d, XL.2d, #1
+       .endm
+
+       .macro          __pmull_ghash, pn
        ld1             {SHASH.2d}, [x3]
        ld1             {XL.2d}, [x1]
-       movi            MASK.16b, #0xe1
        ext             SHASH2.16b, SHASH.16b, SHASH.16b, #8
-       shl             MASK.2d, MASK.2d, #57
        eor             SHASH2.16b, SHASH2.16b, SHASH.16b
 
+       __pmull_pre_\pn
+
        /* do the head block first, if supplied */
        cbz             x4, 0f
        ld1             {T1.2d}, [x4]
@@ -52,28 +236,209 @@ CPU_LE(   rev64           T1.16b, T1.16b  )
        eor             T1.16b, T1.16b, T2.16b
        eor             XL.16b, XL.16b, IN1.16b
 
+       __pmull2_\pn    XH, XL, SHASH                   // a1 * b1
+       eor             T1.16b, T1.16b, XL.16b
+       __pmull_\pn     XL, XL, SHASH                   // a0 * b0
+       __pmull_\pn     XM, T1, SHASH2                  // (a1 + a0)(b1 + b0)
+
+       eor             T2.16b, XL.16b, XH.16b
+       ext             T1.16b, XL.16b, XH.16b, #8
+       eor             XM.16b, XM.16b, T2.16b
+
+       __pmull_reduce_\pn
+
+       eor             T2.16b, T2.16b, XH.16b
+       eor             XL.16b, XL.16b, T2.16b
+
+       cbnz            w0, 0b
+
+       st1             {XL.2d}, [x1]
+       ret
+       .endm
+
+       /*
+        * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+        *                         struct ghash_key const *k, const char *head)
+        */
+ENTRY(pmull_ghash_update_p64)
+       __pmull_ghash   p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+       __pmull_ghash   p8
+ENDPROC(pmull_ghash_update_p8)
+
+       KS              .req    v8
+       CTR             .req    v9
+       INP             .req    v10
+
+       .macro          load_round_keys, rounds, rk
+       cmp             \rounds, #12
+       blo             2222f           /* 128 bits */
+       beq             1111f           /* 192 bits */
+       ld1             {v17.4s-v18.4s}, [\rk], #32
+1111:  ld1             {v19.4s-v20.4s}, [\rk], #32
+2222:  ld1             {v21.4s-v24.4s}, [\rk], #64
+       ld1             {v25.4s-v28.4s}, [\rk], #64
+       ld1             {v29.4s-v31.4s}, [\rk]
+       .endm
+
+       .macro          enc_round, state, key
+       aese            \state\().16b, \key\().16b
+       aesmc           \state\().16b, \state\().16b
+       .endm
+
+       .macro          enc_block, state, rounds
+       cmp             \rounds, #12
+       b.lo            2222f           /* 128 bits */
+       b.eq            1111f           /* 192 bits */
+       enc_round       \state, v17
+       enc_round       \state, v18
+1111:  enc_round       \state, v19
+       enc_round       \state, v20
+2222:  .irp            key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+       enc_round       \state, \key
+       .endr
+       aese            \state\().16b, v30.16b
+       eor             \state\().16b, \state\().16b, v31.16b
+       .endm
+
+       .macro          pmull_gcm_do_crypt, enc
+       ld1             {SHASH.2d}, [x4]
+       ld1             {XL.2d}, [x1]
+       ldr             x8, [x5, #8]                    // load lower counter
+
+       movi            MASK.16b, #0xe1
+       ext             SHASH2.16b, SHASH.16b, SHASH.16b, #8
+CPU_LE(        rev             x8, x8          )
+       shl             MASK.2d, MASK.2d, #57
+       eor             SHASH2.16b, SHASH2.16b, SHASH.16b
+
+       .if             \enc == 1
+       ld1             {KS.16b}, [x7]
+       .endif
+
+0:     ld1             {CTR.8b}, [x5]                  // load upper counter
+       ld1             {INP.16b}, [x3], #16
+       rev             x9, x8
+       add             x8, x8, #1
+       sub             w0, w0, #1
+       ins             CTR.d[1], x9                    // set lower counter
+
+       .if             \enc == 1
+       eor             INP.16b, INP.16b, KS.16b        // encrypt input
+       st1             {INP.16b}, [x2], #16
+       .endif
+
+       rev64           T1.16b, INP.16b
+
+       cmp             w6, #12
+       b.ge            2f                              // AES-192/256?
+
+1:     enc_round       CTR, v21
+
+       ext             T2.16b, XL.16b, XL.16b, #8
+       ext             IN1.16b, T1.16b, T1.16b, #8
+
+       enc_round       CTR, v22
+
+       eor             T1.16b, T1.16b, T2.16b
+       eor             XL.16b, XL.16b, IN1.16b
+
+       enc_round       CTR, v23
+
        pmull2          XH.1q, SHASH.2d, XL.2d          // a1 * b1
        eor             T1.16b, T1.16b, XL.16b
+
+       enc_round       CTR, v24
+
        pmull           XL.1q, SHASH.1d, XL.1d          // a0 * b0
        pmull           XM.1q, SHASH2.1d, T1.1d         // (a1 + a0)(b1 + b0)
 
+       enc_round       CTR, v25
+
        ext             T1.16b, XL.16b, XH.16b, #8
        eor             T2.16b, XL.16b, XH.16b
        eor             XM.16b, XM.16b, T1.16b
+
+       enc_round       CTR, v26
+
        eor             XM.16b, XM.16b, T2.16b
        pmull           T2.1q, XL.1d, MASK.1d
 
+       enc_round       CTR, v27
+
        mov             XH.d[0], XM.d[1]
        mov             XM.d[1], XL.d[0]
 
+       enc_round       CTR, v28
+
        eor             XL.16b, XM.16b, T2.16b
+
+       enc_round       CTR, v29
+
        ext             T2.16b, XL.16b, XL.16b, #8
+
+       aese            CTR.16b, v30.16b
+
        pmull           XL.1q, XL.1d, MASK.1d
        eor             T2.16b, T2.16b, XH.16b
+
+       eor             KS.16b, CTR.16b, v31.16b
+
        eor             XL.16b, XL.16b, T2.16b
 
+       .if             \enc == 0
+       eor             INP.16b, INP.16b, KS.16b
+       st1             {INP.16b}, [x2], #16
+       .endif
+
        cbnz            w0, 0b
 
+CPU_LE(        rev             x8, x8          )
        st1             {XL.2d}, [x1]
+       str             x8, [x5, #8]                    // store lower counter
+
+       .if             \enc == 1
+       st1             {KS.16b}, [x7]
+       .endif
+
+       ret
+
+2:     b.eq            3f                              // AES-192?
+       enc_round       CTR, v17
+       enc_round       CTR, v18
+3:     enc_round       CTR, v19
+       enc_round       CTR, v20
+       b               1b
+       .endm
+
+       /*
+        * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+        *                        struct ghash_key const *k, u8 ctr[],
+        *                        int rounds, u8 ks[])
+        */
+ENTRY(pmull_gcm_encrypt)
+       pmull_gcm_do_crypt      1
+ENDPROC(pmull_gcm_encrypt)
+
+       /*
+        * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+        *                        struct ghash_key const *k, u8 ctr[],
+        *                        int rounds)
+        */
+ENTRY(pmull_gcm_decrypt)
+       pmull_gcm_do_crypt      0
+ENDPROC(pmull_gcm_decrypt)
+
+       /*
+        * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
+        */
+ENTRY(pmull_gcm_encrypt_block)
+       cbz             x2, 0f
+       load_round_keys w3, x2
+0:     ld1             {v0.16b}, [x1]
+       enc_block       v0, w3
+       st1             {v0.16b}, [x0]
        ret
-ENDPROC(pmull_ghash_update)
+ENDPROC(pmull_gcm_encrypt_block)
index 833ec1e3f3e9b7491cc26da24fc0ba386b73de81..cfc9c92814fd0cb74bfd3c9ad78973de57c07e20 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  *
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -9,22 +9,33 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 
-MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
 
 #define GHASH_BLOCK_SIZE       16
 #define GHASH_DIGEST_SIZE      16
+#define GCM_IV_SIZE            12
 
 struct ghash_key {
        u64 a;
        u64 b;
+       be128 k;
 };
 
 struct ghash_desc_ctx {
@@ -33,8 +44,35 @@ struct ghash_desc_ctx {
        u32 count;
 };
 
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-                                  struct ghash_key const *k, const char *head);
+struct gcm_aes_ctx {
+       struct crypto_aes_ctx   aes_key;
+       struct ghash_key        ghash_key;
+};
+
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+                                      struct ghash_key const *k,
+                                      const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+                                     struct ghash_key const *k,
+                                     const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+                                 struct ghash_key const *k,
+                                 const char *head);
+
+asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
+                                 const u8 src[], struct ghash_key const *k,
+                                 u8 ctr[], int rounds, u8 ks[]);
+
+asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
+                                 const u8 src[], struct ghash_key const *k,
+                                 u8 ctr[], int rounds);
+
+asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
+                                       u32 const rk[], int rounds);
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -44,6 +82,36 @@ static int ghash_init(struct shash_desc *desc)
        return 0;
 }
 
+static void ghash_do_update(int blocks, u64 dg[], const char *src,
+                           struct ghash_key *key, const char *head)
+{
+       if (likely(may_use_simd())) {
+               kernel_neon_begin();
+               pmull_ghash_update(blocks, dg, src, key, head);
+               kernel_neon_end();
+       } else {
+               be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
+
+               do {
+                       const u8 *in = src;
+
+                       if (head) {
+                               in = head;
+                               blocks++;
+                               head = NULL;
+                       } else {
+                               src += GHASH_BLOCK_SIZE;
+                       }
+
+                       crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
+                       gf128mul_lle(&dst, &key->k);
+               } while (--blocks);
+
+               dg[0] = be64_to_cpu(dst.b);
+               dg[1] = be64_to_cpu(dst.a);
+       }
+}
+
 static int ghash_update(struct shash_desc *desc, const u8 *src,
                        unsigned int len)
 {
@@ -67,10 +135,9 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
                blocks = len / GHASH_BLOCK_SIZE;
                len %= GHASH_BLOCK_SIZE;
 
-               kernel_neon_begin_partial(8);
-               pmull_ghash_update(blocks, ctx->digest, src, key,
-                                  partial ? ctx->buf : NULL);
-               kernel_neon_end();
+               ghash_do_update(blocks, ctx->digest, src, key,
+                               partial ? ctx->buf : NULL);
+
                src += blocks * GHASH_BLOCK_SIZE;
                partial = 0;
        }
@@ -89,9 +156,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 
                memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
 
-               kernel_neon_begin_partial(8);
-               pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
-               kernel_neon_end();
+               ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
        }
        put_unaligned_be64(ctx->digest[1], dst);
        put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -100,16 +165,13 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
        return 0;
 }
 
-static int ghash_setkey(struct crypto_shash *tfm,
-                       const u8 *inkey, unsigned int keylen)
+static int __ghash_setkey(struct ghash_key *key,
+                         const u8 *inkey, unsigned int keylen)
 {
-       struct ghash_key *key = crypto_shash_ctx(tfm);
        u64 a, b;
 
-       if (keylen != GHASH_BLOCK_SIZE) {
-               crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-               return -EINVAL;
-       }
+       /* needed for the fallback */
+       memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
 
        /* perform multiplication by 'x' in GF(2^128) */
        b = get_unaligned_be64(inkey);
@@ -124,33 +186,418 @@ static int ghash_setkey(struct crypto_shash *tfm,
        return 0;
 }
 
+static int ghash_setkey(struct crypto_shash *tfm,
+                       const u8 *inkey, unsigned int keylen)
+{
+       struct ghash_key *key = crypto_shash_ctx(tfm);
+
+       if (keylen != GHASH_BLOCK_SIZE) {
+               crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+
+       return __ghash_setkey(key, inkey, keylen);
+}
+
 static struct shash_alg ghash_alg = {
-       .digestsize     = GHASH_DIGEST_SIZE,
-       .init           = ghash_init,
-       .update         = ghash_update,
-       .final          = ghash_final,
-       .setkey         = ghash_setkey,
-       .descsize       = sizeof(struct ghash_desc_ctx),
-       .base           = {
-               .cra_name               = "ghash",
-               .cra_driver_name        = "ghash-ce",
-               .cra_priority           = 200,
-               .cra_flags              = CRYPTO_ALG_TYPE_SHASH,
-               .cra_blocksize          = GHASH_BLOCK_SIZE,
-               .cra_ctxsize            = sizeof(struct ghash_key),
-               .cra_module             = THIS_MODULE,
-       },
+       .base.cra_name          = "ghash",
+       .base.cra_driver_name   = "ghash-ce",
+       .base.cra_priority      = 200,
+       .base.cra_flags         = CRYPTO_ALG_TYPE_SHASH,
+       .base.cra_blocksize     = GHASH_BLOCK_SIZE,
+       .base.cra_ctxsize       = sizeof(struct ghash_key),
+       .base.cra_module        = THIS_MODULE,
+
+       .digestsize             = GHASH_DIGEST_SIZE,
+       .init                   = ghash_init,
+       .update                 = ghash_update,
+       .final                  = ghash_final,
+       .setkey                 = ghash_setkey,
+       .descsize               = sizeof(struct ghash_desc_ctx),
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+       /*
+        * # of rounds specified by AES:
+        * 128 bit key          10 rounds
+        * 192 bit key          12 rounds
+        * 256 bit key          14 rounds
+        * => n byte key        => 6 + (n/4) rounds
+        */
+       return 6 + ctx->key_length / 4;
+}
+
+static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
+                     unsigned int keylen)
+{
+       struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
+       u8 key[GHASH_BLOCK_SIZE];
+       int ret;
+
+       ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen);
+       if (ret) {
+               tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               return -EINVAL;
+       }
+
+       __aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){},
+                           num_rounds(&ctx->aes_key));
+
+       return __ghash_setkey(&ctx->ghash_key, key, sizeof(key));
+}
+
+static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+       switch (authsize) {
+       case 4:
+       case 8:
+       case 12 ... 16:
+               break;
+       default:
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
+                          int *buf_count, struct gcm_aes_ctx *ctx)
+{
+       if (*buf_count > 0) {
+               int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count);
+
+               memcpy(&buf[*buf_count], src, buf_added);
+
+               *buf_count += buf_added;
+               src += buf_added;
+               count -= buf_added;
+       }
+
+       if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
+               int blocks = count / GHASH_BLOCK_SIZE;
+
+               ghash_do_update(blocks, dg, src, &ctx->ghash_key,
+                               *buf_count ? buf : NULL);
+
+               src += blocks * GHASH_BLOCK_SIZE;
+               count %= GHASH_BLOCK_SIZE;
+               *buf_count = 0;
+       }
+
+       if (count > 0) {
+               memcpy(buf, src, count);
+               *buf_count = count;
+       }
+}
+
+static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
+{
+       struct crypto_aead *aead = crypto_aead_reqtfm(req);
+       struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+       u8 buf[GHASH_BLOCK_SIZE];
+       struct scatter_walk walk;
+       u32 len = req->assoclen;
+       int buf_count = 0;
+
+       scatterwalk_start(&walk, req->src);
+
+       do {
+               u32 n = scatterwalk_clamp(&walk, len);
+               u8 *p;
+
+               if (!n) {
+                       scatterwalk_start(&walk, sg_next(walk.sg));
+                       n = scatterwalk_clamp(&walk, len);
+               }
+               p = scatterwalk_map(&walk);
+
+               gcm_update_mac(dg, p, n, buf, &buf_count, ctx);
+               len -= n;
+
+               scatterwalk_unmap(p);
+               scatterwalk_advance(&walk, n);
+               scatterwalk_done(&walk, 0, len);
+       } while (len);
+
+       if (buf_count) {
+               memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
+               ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+       }
+}
+
+static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
+                     u64 dg[], u8 tag[], int cryptlen)
+{
+       u8 mac[AES_BLOCK_SIZE];
+       u128 lengths;
+
+       lengths.a = cpu_to_be64(req->assoclen * 8);
+       lengths.b = cpu_to_be64(cryptlen * 8);
+
+       ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL);
+
+       put_unaligned_be64(dg[1], mac);
+       put_unaligned_be64(dg[0], mac + 8);
+
+       crypto_xor(tag, mac, AES_BLOCK_SIZE);
+}
+
+static int gcm_encrypt(struct aead_request *req)
+{
+       struct crypto_aead *aead = crypto_aead_reqtfm(req);
+       struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+       struct skcipher_walk walk;
+       u8 iv[AES_BLOCK_SIZE];
+       u8 ks[AES_BLOCK_SIZE];
+       u8 tag[AES_BLOCK_SIZE];
+       u64 dg[2] = {};
+       int err;
+
+       if (req->assoclen)
+               gcm_calculate_auth_mac(req, dg);
+
+       memcpy(iv, req->iv, GCM_IV_SIZE);
+       put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+       if (likely(may_use_simd())) {
+               kernel_neon_begin();
+
+               pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+                                       num_rounds(&ctx->aes_key));
+               put_unaligned_be32(2, iv + GCM_IV_SIZE);
+               pmull_gcm_encrypt_block(ks, iv, NULL,
+                                       num_rounds(&ctx->aes_key));
+               put_unaligned_be32(3, iv + GCM_IV_SIZE);
+
+               err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+               while (walk.nbytes >= AES_BLOCK_SIZE) {
+                       int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+                       pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
+                                         walk.src.virt.addr, &ctx->ghash_key,
+                                         iv, num_rounds(&ctx->aes_key), ks);
+
+                       err = skcipher_walk_done(&walk,
+                                                walk.nbytes % AES_BLOCK_SIZE);
+               }
+               kernel_neon_end();
+       } else {
+               __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+                                   num_rounds(&ctx->aes_key));
+               put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+               err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+               while (walk.nbytes >= AES_BLOCK_SIZE) {
+                       int blocks = walk.nbytes / AES_BLOCK_SIZE;
+                       u8 *dst = walk.dst.virt.addr;
+                       u8 *src = walk.src.virt.addr;
+
+                       do {
+                               __aes_arm64_encrypt(ctx->aes_key.key_enc,
+                                                   ks, iv,
+                                                   num_rounds(&ctx->aes_key));
+                               crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
+                               crypto_inc(iv, AES_BLOCK_SIZE);
+
+                               dst += AES_BLOCK_SIZE;
+                               src += AES_BLOCK_SIZE;
+                       } while (--blocks > 0);
+
+                       ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
+                                       walk.dst.virt.addr, &ctx->ghash_key,
+                                       NULL);
+
+                       err = skcipher_walk_done(&walk,
+                                                walk.nbytes % AES_BLOCK_SIZE);
+               }
+               if (walk.nbytes)
+                       __aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv,
+                                           num_rounds(&ctx->aes_key));
+       }
+
+       /* handle the tail */
+       if (walk.nbytes) {
+               u8 buf[GHASH_BLOCK_SIZE];
+
+               crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
+                              walk.nbytes);
+
+               memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+               memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+               ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+               err = skcipher_walk_done(&walk, 0);
+       }
+
+       if (err)
+               return err;
+
+       gcm_final(req, ctx, dg, tag, req->cryptlen);
+
+       /* copy authtag to end of dst */
+       scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
+                                crypto_aead_authsize(aead), 1);
+
+       return 0;
+}
+
+static int gcm_decrypt(struct aead_request *req)
+{
+       struct crypto_aead *aead = crypto_aead_reqtfm(req);
+       struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+       unsigned int authsize = crypto_aead_authsize(aead);
+       struct skcipher_walk walk;
+       u8 iv[AES_BLOCK_SIZE];
+       u8 tag[AES_BLOCK_SIZE];
+       u8 buf[GHASH_BLOCK_SIZE];
+       u64 dg[2] = {};
+       int err;
+
+       if (req->assoclen)
+               gcm_calculate_auth_mac(req, dg);
+
+       memcpy(iv, req->iv, GCM_IV_SIZE);
+       put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+       if (likely(may_use_simd())) {
+               kernel_neon_begin();
+
+               pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+                                       num_rounds(&ctx->aes_key));
+               put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+               err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+               while (walk.nbytes >= AES_BLOCK_SIZE) {
+                       int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+                       pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
+                                         walk.src.virt.addr, &ctx->ghash_key,
+                                         iv, num_rounds(&ctx->aes_key));
+
+                       err = skcipher_walk_done(&walk,
+                                                walk.nbytes % AES_BLOCK_SIZE);
+               }
+               if (walk.nbytes)
+                       pmull_gcm_encrypt_block(iv, iv, NULL,
+                                               num_rounds(&ctx->aes_key));
+
+               kernel_neon_end();
+       } else {
+               __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+                                   num_rounds(&ctx->aes_key));
+               put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+               err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+               while (walk.nbytes >= AES_BLOCK_SIZE) {
+                       int blocks = walk.nbytes / AES_BLOCK_SIZE;
+                       u8 *dst = walk.dst.virt.addr;
+                       u8 *src = walk.src.virt.addr;
+
+                       ghash_do_update(blocks, dg, walk.src.virt.addr,
+                                       &ctx->ghash_key, NULL);
+
+                       do {
+                               __aes_arm64_encrypt(ctx->aes_key.key_enc,
+                                                   buf, iv,
+                                                   num_rounds(&ctx->aes_key));
+                               crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
+                               crypto_inc(iv, AES_BLOCK_SIZE);
+
+                               dst += AES_BLOCK_SIZE;
+                               src += AES_BLOCK_SIZE;
+                       } while (--blocks > 0);
+
+                       err = skcipher_walk_done(&walk,
+                                                walk.nbytes % AES_BLOCK_SIZE);
+               }
+               if (walk.nbytes)
+                       __aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv,
+                                           num_rounds(&ctx->aes_key));
+       }
+
+       /* handle the tail */
+       if (walk.nbytes) {
+               memcpy(buf, walk.src.virt.addr, walk.nbytes);
+               memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+               ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+               crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
+                              walk.nbytes);
+
+               err = skcipher_walk_done(&walk, 0);
+       }
+
+       if (err)
+               return err;
+
+       gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
+
+       /* compare calculated auth tag with the stored one */
+       scatterwalk_map_and_copy(buf, req->src,
+                                req->assoclen + req->cryptlen - authsize,
+                                authsize, 0);
+
+       if (crypto_memneq(tag, buf, authsize))
+               return -EBADMSG;
+       return 0;
+}
+
+static struct aead_alg gcm_aes_alg = {
+       .ivsize                 = GCM_IV_SIZE,
+       .chunksize              = AES_BLOCK_SIZE,
+       .maxauthsize            = AES_BLOCK_SIZE,
+       .setkey                 = gcm_setkey,
+       .setauthsize            = gcm_setauthsize,
+       .encrypt                = gcm_encrypt,
+       .decrypt                = gcm_decrypt,
+
+       .base.cra_name          = "gcm(aes)",
+       .base.cra_driver_name   = "gcm-aes-ce",
+       .base.cra_priority      = 300,
+       .base.cra_blocksize     = 1,
+       .base.cra_ctxsize       = sizeof(struct gcm_aes_ctx),
+       .base.cra_module        = THIS_MODULE,
 };
 
 static int __init ghash_ce_mod_init(void)
 {
-       return crypto_register_shash(&ghash_alg);
+       int ret;
+
+       if (!(elf_hwcap & HWCAP_ASIMD))
+               return -ENODEV;
+
+       if (elf_hwcap & HWCAP_PMULL)
+               pmull_ghash_update = pmull_ghash_update_p64;
+
+       else
+               pmull_ghash_update = pmull_ghash_update_p8;
+
+       ret = crypto_register_shash(&ghash_alg);
+       if (ret)
+               return ret;
+
+       if (elf_hwcap & HWCAP_PMULL) {
+               ret = crypto_register_aead(&gcm_aes_alg);
+               if (ret)
+                       crypto_unregister_shash(&ghash_alg);
+       }
+       return ret;
 }
 
 static void __exit ghash_ce_mod_exit(void)
 {
        crypto_unregister_shash(&ghash_alg);
+       crypto_unregister_aead(&gcm_aes_alg);
 }
 
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+static const struct cpu_feature ghash_cpu_feature[] = {
+       { cpu_feature(PMULL) }, { }
+};
+MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature);
+
+module_init(ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);
index ea319c055f5dfbee35a31c68ceb005501a8f26b7..efbeb3e0dcfb048099095a971512d8a9a1f62595 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
@@ -37,8 +38,11 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
 {
        struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
+       if (!may_use_simd())
+               return crypto_sha1_update(desc, data, len);
+
        sctx->finalize = 0;
-       kernel_neon_begin_partial(16);
+       kernel_neon_begin();
        sha1_base_do_update(desc, data, len,
                            (sha1_block_fn *)sha1_ce_transform);
        kernel_neon_end();
@@ -52,13 +56,16 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
        struct sha1_ce_state *sctx = shash_desc_ctx(desc);
        bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
 
+       if (!may_use_simd())
+               return crypto_sha1_finup(desc, data, len, out);
+
        /*
         * Allow the asm code to perform the finalization if there is no
         * partial data and the input is a round multiple of the block size.
         */
        sctx->finalize = finalize;
 
-       kernel_neon_begin_partial(16);
+       kernel_neon_begin();
        sha1_base_do_update(desc, data, len,
                            (sha1_block_fn *)sha1_ce_transform);
        if (!finalize)
@@ -71,8 +78,11 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 {
        struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
+       if (!may_use_simd())
+               return crypto_sha1_finup(desc, NULL, 0, out);
+
        sctx->finalize = 0;
-       kernel_neon_begin_partial(16);
+       kernel_neon_begin();
        sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
        kernel_neon_end();
        return sha1_base_finish(desc, out);
index 0ed9486f75dd928568c86d1fdd3ad9bca3cde47a..fd1ff2b13dfa35aa356b2511744808a848a56a97 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
@@ -34,13 +35,19 @@ const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
 const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
                                                 finalize);
 
+asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks);
+
 static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
                            unsigned int len)
 {
        struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
+       if (!may_use_simd())
+               return sha256_base_do_update(desc, data, len,
+                               (sha256_block_fn *)sha256_block_data_order);
+
        sctx->finalize = 0;
-       kernel_neon_begin_partial(28);
+       kernel_neon_begin();
        sha256_base_do_update(desc, data, len,
                              (sha256_block_fn *)sha2_ce_transform);
        kernel_neon_end();
@@ -54,13 +61,22 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
        struct sha256_ce_state *sctx = shash_desc_ctx(desc);
        bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
 
+       if (!may_use_simd()) {
+               if (len)
+                       sha256_base_do_update(desc, data, len,
+                               (sha256_block_fn *)sha256_block_data_order);
+               sha256_base_do_finalize(desc,
+                               (sha256_block_fn *)sha256_block_data_order);
+               return sha256_base_finish(desc, out);
+       }
+
        /*
         * Allow the asm code to perform the finalization if there is no
         * partial data and the input is a round multiple of the block size.
         */
        sctx->finalize = finalize;
 
-       kernel_neon_begin_partial(28);
+       kernel_neon_begin();
        sha256_base_do_update(desc, data, len,
                              (sha256_block_fn *)sha2_ce_transform);
        if (!finalize)
@@ -74,8 +90,14 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out)
 {
        struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
+       if (!may_use_simd()) {
+               sha256_base_do_finalize(desc,
+                               (sha256_block_fn *)sha256_block_data_order);
+               return sha256_base_finish(desc, out);
+       }
+
        sctx->finalize = 0;
-       kernel_neon_begin_partial(28);
+       kernel_neon_begin();
        sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
        kernel_neon_end();
        return sha256_base_finish(desc, out);
index a2226f8419609c461cab422337fd604ded53c8ee..b064d925fe2a7552222c568754e03d474c11a5aa 100644 (file)
@@ -29,6 +29,7 @@ MODULE_ALIAS_CRYPTO("sha256");
 
 asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
                                        unsigned int num_blks);
+EXPORT_SYMBOL(sha256_block_data_order);
 
 asmlinkage void sha256_block_neon(u32 *digest, const void *data,
                                  unsigned int num_blks);
index c90930de76ba8670041598ba0d6461ef439c9539..3cd4f6b198b65665046101184f579b51138a688d 100644 (file)
@@ -344,8 +344,7 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
 
        ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk,
                              keystream, AES_BLOCK_SIZE);
-       crypto_xor((u8 *) keystream, src, nbytes);
-       memcpy(dst, keystream, nbytes);
+       crypto_xor_cpy(dst, (u8 *) keystream, src, nbytes);
        crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 
index 4a55cdcdc0082790bcf6457ab43e1a84caea7388..5c15d6b573299e75c2eec54d98bbde9f48e67637 100644 (file)
@@ -475,8 +475,8 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
        unsigned int nbytes = walk->nbytes;
 
        aesni_enc(ctx, keystream, ctrblk);
-       crypto_xor(keystream, src, nbytes);
-       memcpy(dst, keystream, nbytes);
+       crypto_xor_cpy(dst, keystream, src, nbytes);
+
        crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 
index 17c05531dfd1752234eb9b6835f848b12ed07817..f9eca34301e20fccec4b8daa5a0cc98710fa3094 100644 (file)
@@ -271,8 +271,7 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
        unsigned int nbytes = walk->nbytes;
 
        blowfish_enc_blk(ctx, keystream, ctrblk);
-       crypto_xor(keystream, src, nbytes);
-       memcpy(dst, keystream, nbytes);
+       crypto_xor_cpy(dst, keystream, src, nbytes);
 
        crypto_inc(ctrblk, BF_BLOCK_SIZE);
 }
index 8648158f39166f1cf1603fc7f139af629e163bde..dbea6020ffe7d462bd93bf703ccecbe1f817d3f4 100644 (file)
@@ -256,8 +256,7 @@ static void ctr_crypt_final(struct blkcipher_desc *desc,
        unsigned int nbytes = walk->nbytes;
 
        __cast5_encrypt(ctx, keystream, ctrblk);
-       crypto_xor(keystream, src, nbytes);
-       memcpy(dst, keystream, nbytes);
+       crypto_xor_cpy(dst, keystream, src, nbytes);
 
        crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
 }
index d6fc59aaaadfbc0461a9aaab1557eb27f7841402..30c0a37f488264651460d5de9df30a097164ed88 100644 (file)
@@ -277,8 +277,7 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
        unsigned int nbytes = walk->nbytes;
 
        des3_ede_enc_blk(ctx, keystream, ctrblk);
-       crypto_xor(keystream, src, nbytes);
-       memcpy(dst, keystream, nbytes);
+       crypto_xor_cpy(dst, keystream, src, nbytes);
 
        crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
 }
index caa770e535a2e5c01b7dd2455ed80378014b1ec9..0a121f9ddf8e315affaef0f668e19dbf81cff9f0 100644 (file)
@@ -1753,6 +1753,8 @@ config CRYPTO_USER_API_AEAD
        tristate "User-space interface for AEAD cipher algorithms"
        depends on NET
        select CRYPTO_AEAD
+       select CRYPTO_BLKCIPHER
+       select CRYPTO_NULL
        select CRYPTO_USER_API
        help
          This option enables the user-spaces interface for AEAD
index 92a3d540d9201ff651f94b7d68dc70c672c5166b..ffa9f4ccd9b455ef36c48d3ba57d4256574cd450 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/rwsem.h>
+#include <linux/sched/signal.h>
 #include <linux/security.h>
 
 struct alg_type_list {
@@ -507,6 +508,696 @@ void af_alg_complete(struct crypto_async_request *req, int err)
 }
 EXPORT_SYMBOL_GPL(af_alg_complete);
 
+/**
+ * af_alg_alloc_tsgl - allocate the TX SGL
+ *
+ * @sk socket of connection to user space
+ * @return: 0 upon success, < 0 upon error
+ */
+int af_alg_alloc_tsgl(struct sock *sk)
+{
+       struct alg_sock *ask = alg_sk(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       struct af_alg_tsgl *sgl;
+       struct scatterlist *sg = NULL;
+
+       sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list);
+       if (!list_empty(&ctx->tsgl_list))
+               sg = sgl->sg;
+
+       if (!sg || sgl->cur >= MAX_SGL_ENTS) {
+               sgl = sock_kmalloc(sk, sizeof(*sgl) +
+                                      sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
+                                  GFP_KERNEL);
+               if (!sgl)
+                       return -ENOMEM;
+
+               sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
+               sgl->cur = 0;
+
+               if (sg)
+                       sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
+
+               list_add_tail(&sgl->list, &ctx->tsgl_list);
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_alloc_tsgl);
+
+/**
+ * aead_count_tsgl - Count number of TX SG entries
+ *
+ * The counting starts from the beginning of the SGL to @bytes. If
+ * an offset is provided, the counting of the SG entries starts at the offset.
+ *
+ * @sk socket of connection to user space
+ * @bytes Count the number of SG entries holding given number of bytes.
+ * @offset Start the counting of SG entries from the given offset.
+ * @return Number of TX SG entries found given the constraints
+ */
+unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset)
+{
+       struct alg_sock *ask = alg_sk(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       struct af_alg_tsgl *sgl, *tmp;
+       unsigned int i;
+       unsigned int sgl_count = 0;
+
+       if (!bytes)
+               return 0;
+
+       list_for_each_entry_safe(sgl, tmp, &ctx->tsgl_list, list) {
+               struct scatterlist *sg = sgl->sg;
+
+               for (i = 0; i < sgl->cur; i++) {
+                       size_t bytes_count;
+
+                       /* Skip offset */
+                       if (offset >= sg[i].length) {
+                               offset -= sg[i].length;
+                               bytes -= sg[i].length;
+                               continue;
+                       }
+
+                       bytes_count = sg[i].length - offset;
+
+                       offset = 0;
+                       sgl_count++;
+
+                       /* If we have seen requested number of bytes, stop */
+                       if (bytes_count >= bytes)
+                               return sgl_count;
+
+                       bytes -= bytes_count;
+               }
+       }
+
+       return sgl_count;
+}
+EXPORT_SYMBOL_GPL(af_alg_count_tsgl);
+
+/**
+ * aead_pull_tsgl - Release the specified buffers from TX SGL
+ *
+ * If @dst is non-null, reassign the pages to dst. The caller must release
+ * the pages. If @dst_offset is given only reassign the pages to @dst starting
+ * at the @dst_offset (byte). The caller must ensure that @dst is large
+ * enough (e.g. by using af_alg_count_tsgl with the same offset).
+ *
+ * @sk socket of connection to user space
+ * @used Number of bytes to pull from TX SGL
+ * @dst If non-NULL, buffer is reassigned to dst SGL instead of releasing. The
+ *     caller must release the buffers in dst.
+ * @dst_offset Reassign the TX SGL from given offset. All buffers before
+ *            reaching the offset is released.
+ */
+void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
+                     size_t dst_offset)
+{
+       struct alg_sock *ask = alg_sk(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       struct af_alg_tsgl *sgl;
+       struct scatterlist *sg;
+       unsigned int i, j;
+
+       while (!list_empty(&ctx->tsgl_list)) {
+               sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl,
+                                      list);
+               sg = sgl->sg;
+
+               for (i = 0, j = 0; i < sgl->cur; i++) {
+                       size_t plen = min_t(size_t, used, sg[i].length);
+                       struct page *page = sg_page(sg + i);
+
+                       if (!page)
+                               continue;
+
+                       /*
+                        * Assumption: caller created af_alg_count_tsgl(len)
+                        * SG entries in dst.
+                        */
+                       if (dst) {
+                               if (dst_offset >= plen) {
+                                       /* discard page before offset */
+                                       dst_offset -= plen;
+                               } else {
+                                       /* reassign page to dst after offset */
+                                       get_page(page);
+                                       sg_set_page(dst + j, page,
+                                                   plen - dst_offset,
+                                                   sg[i].offset + dst_offset);
+                                       dst_offset = 0;
+                                       j++;
+                               }
+                       }
+
+                       sg[i].length -= plen;
+                       sg[i].offset += plen;
+
+                       used -= plen;
+                       ctx->used -= plen;
+
+                       if (sg[i].length)
+                               return;
+
+                       put_page(page);
+                       sg_assign_page(sg + i, NULL);
+               }
+
+               list_del(&sgl->list);
+               sock_kfree_s(sk, sgl, sizeof(*sgl) + sizeof(sgl->sg[0]) *
+                                                    (MAX_SGL_ENTS + 1));
+       }
+
+       if (!ctx->used)
+               ctx->merge = 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_pull_tsgl);
+
+/**
+ * af_alg_free_areq_sgls - Release TX and RX SGLs of the request
+ *
+ * @areq Request holding the TX and RX SGL
+ */
+void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
+{
+       struct sock *sk = areq->sk;
+       struct alg_sock *ask = alg_sk(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       struct af_alg_rsgl *rsgl, *tmp;
+       struct scatterlist *tsgl;
+       struct scatterlist *sg;
+       unsigned int i;
+
+       list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
+               ctx->rcvused -= rsgl->sg_num_bytes;
+               af_alg_free_sg(&rsgl->sgl);
+               list_del(&rsgl->list);
+               if (rsgl != &areq->first_rsgl)
+                       sock_kfree_s(sk, rsgl, sizeof(*rsgl));
+       }
+
+       tsgl = areq->tsgl;
+       for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
+               if (!sg_page(sg))
+                       continue;
+               put_page(sg_page(sg));
+       }
+
+       if (areq->tsgl && areq->tsgl_entries)
+               sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
+}
+EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls);
+
+/**
+ * af_alg_wait_for_wmem - wait for availability of writable memory
+ *
+ * @sk socket of connection to user space
+ * @flags If MSG_DONTWAIT is set, then only report if function would sleep
+ * @return 0 when writable memory is available, < 0 upon error
+ */
+int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags)
+{
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
+       int err = -ERESTARTSYS;
+       long timeout;
+
+       if (flags & MSG_DONTWAIT)
+               return -EAGAIN;
+
+       sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+       add_wait_queue(sk_sleep(sk), &wait);
+       for (;;) {
+               if (signal_pending(current))
+                       break;
+               timeout = MAX_SCHEDULE_TIMEOUT;
+               if (sk_wait_event(sk, &timeout, af_alg_writable(sk), &wait)) {
+                       err = 0;
+                       break;
+               }
+       }
+       remove_wait_queue(sk_sleep(sk), &wait);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_wait_for_wmem);
+
+/**
+ * af_alg_wmem_wakeup - wakeup caller when writable memory is available
+ *
+ * @sk socket of connection to user space
+ */
+void af_alg_wmem_wakeup(struct sock *sk)
+{
+       struct socket_wq *wq;
+
+       if (!af_alg_writable(sk))
+               return;
+
+       rcu_read_lock();
+       wq = rcu_dereference(sk->sk_wq);
+       if (skwq_has_sleeper(wq))
+               wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
+                                                          POLLRDNORM |
+                                                          POLLRDBAND);
+       sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(af_alg_wmem_wakeup);
+
+/**
+ * af_alg_wait_for_data - wait for availability of TX data
+ *
+ * @sk socket of connection to user space
+ * @flags If MSG_DONTWAIT is set, then only report if function would sleep
+ * @return 0 when writable memory is available, < 0 upon error
+ */
+int af_alg_wait_for_data(struct sock *sk, unsigned flags)
+{
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
+       struct alg_sock *ask = alg_sk(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       long timeout;
+       int err = -ERESTARTSYS;
+
+       if (flags & MSG_DONTWAIT)
+               return -EAGAIN;
+
+       sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+
+       add_wait_queue(sk_sleep(sk), &wait);
+       for (;;) {
+               if (signal_pending(current))
+                       break;
+               timeout = MAX_SCHEDULE_TIMEOUT;
+               if (sk_wait_event(sk, &timeout, (ctx->used || !ctx->more),
+                                 &wait)) {
+                       err = 0;
+                       break;
+               }
+       }
+       remove_wait_queue(sk_sleep(sk), &wait);
+
+       sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_wait_for_data);
+
+/**
+ * af_alg_data_wakeup - wakeup caller when new data can be sent to kernel
+ *
+ * @sk socket of connection to user space
+ */
+
+void af_alg_data_wakeup(struct sock *sk)
+{
+       struct alg_sock *ask = alg_sk(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       struct socket_wq *wq;
+
+       if (!ctx->used)
+               return;
+
+       rcu_read_lock();
+       wq = rcu_dereference(sk->sk_wq);
+       if (skwq_has_sleeper(wq))
+               wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
+                                                          POLLRDNORM |
+                                                          POLLRDBAND);
+       sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(af_alg_data_wakeup);
+
+/**
+ * af_alg_sendmsg - implementation of sendmsg system call handler
+ *
+ * The sendmsg system call handler obtains the user data and stores it
+ * in ctx->tsgl_list. This implies allocation of the required numbers of
+ * struct af_alg_tsgl.
+ *
+ * In addition, the ctx is filled with the information sent via CMSG.
+ *
+ * @sock socket of connection to user space
+ * @msg message from user space
+ * @size size of message from user space
+ * @ivsize the size of the IV for the cipher operation to verify that the
+ *        user-space-provided IV has the right size
+ * @return the number of copied data upon success, < 0 upon error
+ */
+int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
+                  unsigned int ivsize)
+{
+       struct sock *sk = sock->sk;
+       struct alg_sock *ask = alg_sk(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       struct af_alg_tsgl *sgl;
+       struct af_alg_control con = {};
+       long copied = 0;
+       bool enc = 0;
+       bool init = 0;
+       int err = 0;
+
+       if (msg->msg_controllen) {
+               err = af_alg_cmsg_send(msg, &con);
+               if (err)
+                       return err;
+
+               init = 1;
+               switch (con.op) {
+               case ALG_OP_ENCRYPT:
+                       enc = 1;
+                       break;
+               case ALG_OP_DECRYPT:
+                       enc = 0;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
+               if (con.iv && con.iv->ivlen != ivsize)
+                       return -EINVAL;
+       }
+
+       lock_sock(sk);
+       if (!ctx->more && ctx->used) {
+               err = -EINVAL;
+               goto unlock;
+       }
+
+       if (init) {
+               ctx->enc = enc;
+               if (con.iv)
+                       memcpy(ctx->iv, con.iv->iv, ivsize);
+
+               ctx->aead_assoclen = con.aead_assoclen;
+       }
+
+       while (size) {
+               struct scatterlist *sg;
+               size_t len = size;
+               size_t plen;
+
+               /* use the existing memory in an allocated page */
+               if (ctx->merge) {
+                       sgl = list_entry(ctx->tsgl_list.prev,
+                                        struct af_alg_tsgl, list);
+                       sg = sgl->sg + sgl->cur - 1;
+                       len = min_t(size_t, len,
+                                   PAGE_SIZE - sg->offset - sg->length);
+
+                       err = memcpy_from_msg(page_address(sg_page(sg)) +
+                                             sg->offset + sg->length,
+                                             msg, len);
+                       if (err)
+                               goto unlock;
+
+                       sg->length += len;
+                       ctx->merge = (sg->offset + sg->length) &
+                                    (PAGE_SIZE - 1);
+
+                       ctx->used += len;
+                       copied += len;
+                       size -= len;
+                       continue;
+               }
+
+               if (!af_alg_writable(sk)) {
+                       err = af_alg_wait_for_wmem(sk, msg->msg_flags);
+                       if (err)
+                               goto unlock;
+               }
+
+               /* allocate a new page */
+               len = min_t(unsigned long, len, af_alg_sndbuf(sk));
+
+               err = af_alg_alloc_tsgl(sk);
+               if (err)
+                       goto unlock;
+
+               sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl,
+                                list);
+               sg = sgl->sg;
+               if (sgl->cur)
+                       sg_unmark_end(sg + sgl->cur - 1);
+
+               do {
+                       unsigned int i = sgl->cur;
+
+                       plen = min_t(size_t, len, PAGE_SIZE);
+
+                       sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
+                       if (!sg_page(sg + i)) {
+                               err = -ENOMEM;
+                               goto unlock;
+                       }
+
+                       err = memcpy_from_msg(page_address(sg_page(sg + i)),
+                                             msg, plen);
+                       if (err) {
+                               __free_page(sg_page(sg + i));
+                               sg_assign_page(sg + i, NULL);
+                               goto unlock;
+                       }
+
+                       sg[i].length = plen;
+                       len -= plen;
+                       ctx->used += plen;
+                       copied += plen;
+                       size -= plen;
+                       sgl->cur++;
+               } while (len && sgl->cur < MAX_SGL_ENTS);
+
+               if (!size)
+                       sg_mark_end(sg + sgl->cur - 1);
+
+               ctx->merge = plen & (PAGE_SIZE - 1);
+       }
+
+       err = 0;
+
+       ctx->more = msg->msg_flags & MSG_MORE;
+
+unlock:
+       af_alg_data_wakeup(sk);
+       release_sock(sk);
+
+       return copied ?: err;
+}
+EXPORT_SYMBOL_GPL(af_alg_sendmsg);
+
+/**
+ * af_alg_sendpage - sendpage system call handler
+ *
+ * This is a generic implementation of sendpage to fill ctx->tsgl_list.
+ */
+ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
+                       int offset, size_t size, int flags)
+{
+       struct sock *sk = sock->sk;
+       struct alg_sock *ask = alg_sk(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       struct af_alg_tsgl *sgl;
+       int err = -EINVAL;
+
+       if (flags & MSG_SENDPAGE_NOTLAST)
+               flags |= MSG_MORE;
+
+       lock_sock(sk);
+       if (!ctx->more && ctx->used)
+               goto unlock;
+
+       if (!size)
+               goto done;
+
+       if (!af_alg_writable(sk)) {
+               err = af_alg_wait_for_wmem(sk, flags);
+               if (err)
+                       goto unlock;
+       }
+
+       err = af_alg_alloc_tsgl(sk);
+       if (err)
+               goto unlock;
+
+       ctx->merge = 0;
+       sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list);
+
+       if (sgl->cur)
+               sg_unmark_end(sgl->sg + sgl->cur - 1);
+
+       sg_mark_end(sgl->sg + sgl->cur);
+
+       get_page(page);
+       sg_set_page(sgl->sg + sgl->cur, page, size, offset);
+       sgl->cur++;
+       ctx->used += size;
+
+done:
+       ctx->more = flags & MSG_MORE;
+
+unlock:
+       af_alg_data_wakeup(sk);
+       release_sock(sk);
+
+       return err ?: size;
+}
+EXPORT_SYMBOL_GPL(af_alg_sendpage);
+
+/**
+ * af_alg_async_cb - AIO callback handler
+ *
+ * This handler cleans up the struct af_alg_async_req upon completion of the
+ * AIO operation.
+ *
+ * The number of bytes to be generated with the AIO operation must be set
+ * in areq->outlen before the AIO callback handler is invoked.
+ */
+void af_alg_async_cb(struct crypto_async_request *_req, int err)
+{
+       struct af_alg_async_req *areq = _req->data;
+       struct sock *sk = areq->sk;
+       struct kiocb *iocb = areq->iocb;
+       unsigned int resultlen;
+
+       lock_sock(sk);
+
+       /* Buffer size written by crypto operation. */
+       resultlen = areq->outlen;
+
+       af_alg_free_areq_sgls(areq);
+       sock_kfree_s(sk, areq, areq->areqlen);
+       __sock_put(sk);
+
+       iocb->ki_complete(iocb, err ? err : resultlen, 0);
+
+       release_sock(sk);
+}
+EXPORT_SYMBOL_GPL(af_alg_async_cb);
+
+/**
+ * af_alg_poll - poll system call handler
+ */
+unsigned int af_alg_poll(struct file *file, struct socket *sock,
+                        poll_table *wait)
+{
+       struct sock *sk = sock->sk;
+       struct alg_sock *ask = alg_sk(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       unsigned int mask;
+
+       sock_poll_wait(file, sk_sleep(sk), wait);
+       mask = 0;
+
+       if (!ctx->more || ctx->used)
+               mask |= POLLIN | POLLRDNORM;
+
+       if (af_alg_writable(sk))
+               mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+       return mask;
+}
+EXPORT_SYMBOL_GPL(af_alg_poll);
+
+/**
+ * af_alg_alloc_areq - allocate struct af_alg_async_req
+ *
+ * @sk socket of connection to user space
+ * @areqlen size of struct af_alg_async_req + crypto_*_reqsize
+ * @return allocated data structure or ERR_PTR upon error
+ */
+struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
+                                          unsigned int areqlen)
+{
+       struct af_alg_async_req *areq = sock_kmalloc(sk, areqlen, GFP_KERNEL);
+
+       if (unlikely(!areq))
+               return ERR_PTR(-ENOMEM);
+
+       areq->areqlen = areqlen;
+       areq->sk = sk;
+       areq->last_rsgl = NULL;
+       INIT_LIST_HEAD(&areq->rsgl_list);
+       areq->tsgl = NULL;
+       areq->tsgl_entries = 0;
+
+       return areq;
+}
+EXPORT_SYMBOL_GPL(af_alg_alloc_areq);
+
+/**
+ * af_alg_get_rsgl - create the RX SGL for the output data from the crypto
+ *                  operation
+ *
+ * @sk socket of connection to user space
+ * @msg user space message
+ * @flags flags used to invoke recvmsg with
+ * @areq instance of the cryptographic request that will hold the RX SGL
+ * @maxsize maximum number of bytes to be pulled from user space
+ * @outlen number of bytes in the RX SGL
+ * @return 0 on success, < 0 upon error
+ */
+int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
+                   struct af_alg_async_req *areq, size_t maxsize,
+                   size_t *outlen)
+{
+       struct alg_sock *ask = alg_sk(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       size_t len = 0;
+
+       while (maxsize > len && msg_data_left(msg)) {
+               struct af_alg_rsgl *rsgl;
+               size_t seglen;
+               int err;
+
+               /* limit the amount of readable buffers */
+               if (!af_alg_readable(sk))
+                       break;
+
+               if (!ctx->used) {
+                       err = af_alg_wait_for_data(sk, flags);
+                       if (err)
+                               return err;
+               }
+
+               seglen = min_t(size_t, (maxsize - len),
+                              msg_data_left(msg));
+
+               if (list_empty(&areq->rsgl_list)) {
+                       rsgl = &areq->first_rsgl;
+               } else {
+                       rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
+                       if (unlikely(!rsgl))
+                               return -ENOMEM;
+               }
+
+               rsgl->sgl.npages = 0;
+               list_add_tail(&rsgl->list, &areq->rsgl_list);
+
+               /* make one iovec available as scatterlist */
+               err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
+               if (err < 0)
+                       return err;
+
+               /* chain the new scatterlist with previous one */
+               if (areq->last_rsgl)
+                       af_alg_link_sg(&areq->last_rsgl->sgl, &rsgl->sgl);
+
+               areq->last_rsgl = rsgl;
+               len += err;
+               ctx->rcvused += err;
+               rsgl->sg_num_bytes = err;
+               iov_iter_advance(&msg->msg_iter, err);
+       }
+
+       *outlen = len;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_get_rsgl);
+
 static int __init af_alg_init(void)
 {
        int err = proto_register(&alg_proto, 0);
index 826cd7ab4d4a2ec830438b40e987e230bd03f32f..5e8666e6ccaeaa430f85266ee92f73178c993f6a 100644 (file)
@@ -588,6 +588,35 @@ int crypto_unregister_ahash(struct ahash_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_ahash);
 
+int crypto_register_ahashes(struct ahash_alg *algs, int count)
+{
+       int i, ret;
+
+       for (i = 0; i < count; i++) {
+               ret = crypto_register_ahash(&algs[i]);
+               if (ret)
+                       goto err;
+       }
+
+       return 0;
+
+err:
+       for (--i; i >= 0; --i)
+               crypto_unregister_ahash(&algs[i]);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_ahashes);
+
+void crypto_unregister_ahashes(struct ahash_alg *algs, int count)
+{
+       int i;
+
+       for (i = count - 1; i >= 0; --i)
+               crypto_unregister_ahash(&algs[i]);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_ahashes);
+
 int ahash_register_instance(struct crypto_template *tmpl,
                            struct ahash_instance *inst)
 {
index e4cc7615a13954e581f020c7937eeec099102a26..aa699ff6c876565c0f716e39597edaa15ba9a6d8 100644 (file)
@@ -975,13 +975,15 @@ void crypto_inc(u8 *a, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(crypto_inc);
 
-void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
+void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
 {
        int relalign = 0;
 
        if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
                int size = sizeof(unsigned long);
-               int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);
+               int d = (((unsigned long)dst ^ (unsigned long)src1) |
+                        ((unsigned long)dst ^ (unsigned long)src2)) &
+                       (size - 1);
 
                relalign = d ? 1 << __ffs(d) : size;
 
@@ -992,34 +994,37 @@ void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
                 * process the remainder of the input using optimal strides.
                 */
                while (((unsigned long)dst & (relalign - 1)) && len > 0) {
-                       *dst++ ^= *src++;
+                       *dst++ = *src1++ ^ *src2++;
                        len--;
                }
        }
 
        while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
-               *(u64 *)dst ^= *(u64 *)src;
+               *(u64 *)dst = *(u64 *)src1 ^  *(u64 *)src2;
                dst += 8;
-               src += 8;
+               src1 += 8;
+               src2 += 8;
                len -= 8;
        }
 
        while (len >= 4 && !(relalign & 3)) {
-               *(u32 *)dst ^= *(u32 *)src;
+               *(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2;
                dst += 4;
-               src += 4;
+               src1 += 4;
+               src2 += 4;
                len -= 4;
        }
 
        while (len >= 2 && !(relalign & 1)) {
-               *(u16 *)dst ^= *(u16 *)src;
+               *(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2;
                dst += 2;
-               src += 2;
+               src1 += 2;
+               src2 += 2;
                len -= 2;
        }
 
        while (len--)
-               *dst++ ^= *src++;
+               *dst++ = *src1++ ^ *src2++;
 }
 EXPORT_SYMBOL_GPL(__crypto_xor);
 
index be117495eb43b6ef3caa5d49f6404fd073f5c679..516b38c3a169552dabca7e3912cd24ba240784ab 100644 (file)
@@ -5,88 +5,56 @@
  *
  * This file provides the user-space API for AEAD ciphers.
  *
- * This file is derived from algif_skcipher.c.
- *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
+ *
+ * The following concept of the memory management is used:
+ *
+ * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
+ * filled by user space with the data submitted via sendpage/sendmsg. Filling
+ * up the TX SGL does not cause a crypto operation -- the data will only be
+ * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
+ * provide a buffer which is tracked with the RX SGL.
+ *
+ * During the processing of the recvmsg operation, the cipher request is
+ * allocated and prepared. As part of the recvmsg operation, the processed
+ * TX buffers are extracted from the TX SGL into a separate SGL.
+ *
+ * After the completion of the crypto operation, the RX SGL and the cipher
+ * request is released. The extracted TX SGL parts are released together with
+ * the RX SGL release.
  */
 
 #include <crypto/internal/aead.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/if_alg.h>
+#include <crypto/skcipher.h>
+#include <crypto/null.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
-#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <net/sock.h>
 
-struct aead_sg_list {
-       unsigned int cur;
-       struct scatterlist sg[ALG_MAX_PAGES];
-};
-
-struct aead_async_rsgl {
-       struct af_alg_sgl sgl;
-       struct list_head list;
-};
-
-struct aead_async_req {
-       struct scatterlist *tsgl;
-       struct aead_async_rsgl first_rsgl;
-       struct list_head list;
-       struct kiocb *iocb;
-       struct sock *sk;
-       unsigned int tsgls;
-       char iv[];
-};
-
 struct aead_tfm {
        struct crypto_aead *aead;
        bool has_key;
+       struct crypto_skcipher *null_tfm;
 };
 
-struct aead_ctx {
-       struct aead_sg_list tsgl;
-       struct aead_async_rsgl first_rsgl;
-       struct list_head list;
-
-       void *iv;
-
-       struct af_alg_completion completion;
-
-       unsigned long used;
-
-       unsigned int len;
-       bool more;
-       bool merge;
-       bool enc;
-
-       size_t aead_assoclen;
-       struct aead_request aead_req;
-};
-
-static inline int aead_sndbuf(struct sock *sk)
+static inline bool aead_sufficient_data(struct sock *sk)
 {
        struct alg_sock *ask = alg_sk(sk);
-       struct aead_ctx *ctx = ask->private;
-
-       return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
-                         ctx->used, 0);
-}
-
-static inline bool aead_writable(struct sock *sk)
-{
-       return PAGE_SIZE <= aead_sndbuf(sk);
-}
-
-static inline bool aead_sufficient_data(struct aead_ctx *ctx)
-{
-       unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
+       struct af_alg_ctx *ctx = ask->private;
+       struct aead_tfm *aeadc = pask->private;
+       struct crypto_aead *tfm = aeadc->aead;
+       unsigned int as = crypto_aead_authsize(tfm);
 
        /*
         * The minimum amount of memory needed for an AEAD cipher is
@@ -95,484 +63,58 @@ static inline bool aead_sufficient_data(struct aead_ctx *ctx)
        return ctx->used >= ctx->aead_assoclen + (ctx->enc ? 0 : as);
 }
 
-static void aead_reset_ctx(struct aead_ctx *ctx)
-{
-       struct aead_sg_list *sgl = &ctx->tsgl;
-
-       sg_init_table(sgl->sg, ALG_MAX_PAGES);
-       sgl->cur = 0;
-       ctx->used = 0;
-       ctx->more = 0;
-       ctx->merge = 0;
-}
-
-static void aead_put_sgl(struct sock *sk)
-{
-       struct alg_sock *ask = alg_sk(sk);
-       struct aead_ctx *ctx = ask->private;
-       struct aead_sg_list *sgl = &ctx->tsgl;
-       struct scatterlist *sg = sgl->sg;
-       unsigned int i;
-
-       for (i = 0; i < sgl->cur; i++) {
-               if (!sg_page(sg + i))
-                       continue;
-
-               put_page(sg_page(sg + i));
-               sg_assign_page(sg + i, NULL);
-       }
-       aead_reset_ctx(ctx);
-}
-
-static void aead_wmem_wakeup(struct sock *sk)
-{
-       struct socket_wq *wq;
-
-       if (!aead_writable(sk))
-               return;
-
-       rcu_read_lock();
-       wq = rcu_dereference(sk->sk_wq);
-       if (skwq_has_sleeper(wq))
-               wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
-                                                          POLLRDNORM |
-                                                          POLLRDBAND);
-       sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-       rcu_read_unlock();
-}
-
-static int aead_wait_for_data(struct sock *sk, unsigned flags)
-{
-       DEFINE_WAIT_FUNC(wait, woken_wake_function);
-       struct alg_sock *ask = alg_sk(sk);
-       struct aead_ctx *ctx = ask->private;
-       long timeout;
-       int err = -ERESTARTSYS;
-
-       if (flags & MSG_DONTWAIT)
-               return -EAGAIN;
-
-       sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-       add_wait_queue(sk_sleep(sk), &wait);
-       for (;;) {
-               if (signal_pending(current))
-                       break;
-               timeout = MAX_SCHEDULE_TIMEOUT;
-               if (sk_wait_event(sk, &timeout, !ctx->more, &wait)) {
-                       err = 0;
-                       break;
-               }
-       }
-       remove_wait_queue(sk_sleep(sk), &wait);
-
-       sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-       return err;
-}
-
-static void aead_data_wakeup(struct sock *sk)
-{
-       struct alg_sock *ask = alg_sk(sk);
-       struct aead_ctx *ctx = ask->private;
-       struct socket_wq *wq;
-
-       if (ctx->more)
-               return;
-       if (!ctx->used)
-               return;
-
-       rcu_read_lock();
-       wq = rcu_dereference(sk->sk_wq);
-       if (skwq_has_sleeper(wq))
-               wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
-                                                          POLLRDNORM |
-                                                          POLLRDBAND);
-       sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-       rcu_read_unlock();
-}
-
 static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
-       struct aead_ctx *ctx = ask->private;
-       unsigned ivsize =
-               crypto_aead_ivsize(crypto_aead_reqtfm(&ctx->aead_req));
-       struct aead_sg_list *sgl = &ctx->tsgl;
-       struct af_alg_control con = {};
-       long copied = 0;
-       bool enc = 0;
-       bool init = 0;
-       int err = -EINVAL;
-
-       if (msg->msg_controllen) {
-               err = af_alg_cmsg_send(msg, &con);
-               if (err)
-                       return err;
-
-               init = 1;
-               switch (con.op) {
-               case ALG_OP_ENCRYPT:
-                       enc = 1;
-                       break;
-               case ALG_OP_DECRYPT:
-                       enc = 0;
-                       break;
-               default:
-                       return -EINVAL;
-               }
-
-               if (con.iv && con.iv->ivlen != ivsize)
-                       return -EINVAL;
-       }
-
-       lock_sock(sk);
-       if (!ctx->more && ctx->used)
-               goto unlock;
-
-       if (init) {
-               ctx->enc = enc;
-               if (con.iv)
-                       memcpy(ctx->iv, con.iv->iv, ivsize);
-
-               ctx->aead_assoclen = con.aead_assoclen;
-       }
-
-       while (size) {
-               size_t len = size;
-               struct scatterlist *sg = NULL;
-
-               /* use the existing memory in an allocated page */
-               if (ctx->merge) {
-                       sg = sgl->sg + sgl->cur - 1;
-                       len = min_t(unsigned long, len,
-                                   PAGE_SIZE - sg->offset - sg->length);
-                       err = memcpy_from_msg(page_address(sg_page(sg)) +
-                                             sg->offset + sg->length,
-                                             msg, len);
-                       if (err)
-                               goto unlock;
-
-                       sg->length += len;
-                       ctx->merge = (sg->offset + sg->length) &
-                                    (PAGE_SIZE - 1);
-
-                       ctx->used += len;
-                       copied += len;
-                       size -= len;
-                       continue;
-               }
-
-               if (!aead_writable(sk)) {
-                       /* user space sent too much data */
-                       aead_put_sgl(sk);
-                       err = -EMSGSIZE;
-                       goto unlock;
-               }
-
-               /* allocate a new page */
-               len = min_t(unsigned long, size, aead_sndbuf(sk));
-               while (len) {
-                       size_t plen = 0;
-
-                       if (sgl->cur >= ALG_MAX_PAGES) {
-                               aead_put_sgl(sk);
-                               err = -E2BIG;
-                               goto unlock;
-                       }
-
-                       sg = sgl->sg + sgl->cur;
-                       plen = min_t(size_t, len, PAGE_SIZE);
-
-                       sg_assign_page(sg, alloc_page(GFP_KERNEL));
-                       err = -ENOMEM;
-                       if (!sg_page(sg))
-                               goto unlock;
-
-                       err = memcpy_from_msg(page_address(sg_page(sg)),
-                                             msg, plen);
-                       if (err) {
-                               __free_page(sg_page(sg));
-                               sg_assign_page(sg, NULL);
-                               goto unlock;
-                       }
-
-                       sg->offset = 0;
-                       sg->length = plen;
-                       len -= plen;
-                       ctx->used += plen;
-                       copied += plen;
-                       sgl->cur++;
-                       size -= plen;
-                       ctx->merge = plen & (PAGE_SIZE - 1);
-               }
-       }
-
-       err = 0;
-
-       ctx->more = msg->msg_flags & MSG_MORE;
-       if (!ctx->more && !aead_sufficient_data(ctx)) {
-               aead_put_sgl(sk);
-               err = -EMSGSIZE;
-       }
-
-unlock:
-       aead_data_wakeup(sk);
-       release_sock(sk);
-
-       return err ?: copied;
-}
-
-static ssize_t aead_sendpage(struct socket *sock, struct page *page,
-                            int offset, size_t size, int flags)
-{
-       struct sock *sk = sock->sk;
-       struct alg_sock *ask = alg_sk(sk);
-       struct aead_ctx *ctx = ask->private;
-       struct aead_sg_list *sgl = &ctx->tsgl;
-       int err = -EINVAL;
-
-       if (flags & MSG_SENDPAGE_NOTLAST)
-               flags |= MSG_MORE;
-
-       if (sgl->cur >= ALG_MAX_PAGES)
-               return -E2BIG;
-
-       lock_sock(sk);
-       if (!ctx->more && ctx->used)
-               goto unlock;
-
-       if (!size)
-               goto done;
-
-       if (!aead_writable(sk)) {
-               /* user space sent too much data */
-               aead_put_sgl(sk);
-               err = -EMSGSIZE;
-               goto unlock;
-       }
-
-       ctx->merge = 0;
-
-       get_page(page);
-       sg_set_page(sgl->sg + sgl->cur, page, size, offset);
-       sgl->cur++;
-       ctx->used += size;
-
-       err = 0;
-
-done:
-       ctx->more = flags & MSG_MORE;
-       if (!ctx->more && !aead_sufficient_data(ctx)) {
-               aead_put_sgl(sk);
-               err = -EMSGSIZE;
-       }
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
+       struct aead_tfm *aeadc = pask->private;
+       struct crypto_aead *tfm = aeadc->aead;
+       unsigned int ivsize = crypto_aead_ivsize(tfm);
 
-unlock:
-       aead_data_wakeup(sk);
-       release_sock(sk);
-
-       return err ?: size;
+       return af_alg_sendmsg(sock, msg, size, ivsize);
 }
 
-#define GET_ASYM_REQ(req, tfm) (struct aead_async_req *) \
-               ((char *)req + sizeof(struct aead_request) + \
-                crypto_aead_reqsize(tfm))
-
- #define GET_REQ_SIZE(tfm) sizeof(struct aead_async_req) + \
-       crypto_aead_reqsize(tfm) + crypto_aead_ivsize(tfm) + \
-       sizeof(struct aead_request)
-
-static void aead_async_cb(struct crypto_async_request *_req, int err)
+static int crypto_aead_copy_sgl(struct crypto_skcipher *null_tfm,
+                               struct scatterlist *src,
+                               struct scatterlist *dst, unsigned int len)
 {
-       struct aead_request *req = _req->data;
-       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-       struct aead_async_req *areq = GET_ASYM_REQ(req, tfm);
-       struct sock *sk = areq->sk;
-       struct scatterlist *sg = areq->tsgl;
-       struct aead_async_rsgl *rsgl;
-       struct kiocb *iocb = areq->iocb;
-       unsigned int i, reqlen = GET_REQ_SIZE(tfm);
-
-       list_for_each_entry(rsgl, &areq->list, list) {
-               af_alg_free_sg(&rsgl->sgl);
-               if (rsgl != &areq->first_rsgl)
-                       sock_kfree_s(sk, rsgl, sizeof(*rsgl));
-       }
+       SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
 
-       for (i = 0; i < areq->tsgls; i++)
-               put_page(sg_page(sg + i));
+       skcipher_request_set_tfm(skreq, null_tfm);
+       skcipher_request_set_callback(skreq, CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                     NULL, NULL);
+       skcipher_request_set_crypt(skreq, src, dst, len, NULL);
 
-       sock_kfree_s(sk, areq->tsgl, sizeof(*areq->tsgl) * areq->tsgls);
-       sock_kfree_s(sk, req, reqlen);
-       __sock_put(sk);
-       iocb->ki_complete(iocb, err, err);
+       return crypto_skcipher_encrypt(skreq);
 }
 
-static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
-                             int flags)
+static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
+                        size_t ignored, int flags)
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
-       struct aead_ctx *ctx = ask->private;
-       struct crypto_aead *tfm = crypto_aead_reqtfm(&ctx->aead_req);
-       struct aead_async_req *areq;
-       struct aead_request *req = NULL;
-       struct aead_sg_list *sgl = &ctx->tsgl;
-       struct aead_async_rsgl *last_rsgl = NULL, *rsgl;
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
+       struct af_alg_ctx *ctx = ask->private;
+       struct aead_tfm *aeadc = pask->private;
+       struct crypto_aead *tfm = aeadc->aead;
+       struct crypto_skcipher *null_tfm = aeadc->null_tfm;
        unsigned int as = crypto_aead_authsize(tfm);
-       unsigned int i, reqlen = GET_REQ_SIZE(tfm);
-       int err = -ENOMEM;
-       unsigned long used;
-       size_t outlen = 0;
-       size_t usedpages = 0;
-
-       lock_sock(sk);
-       if (ctx->more) {
-               err = aead_wait_for_data(sk, flags);
-               if (err)
-                       goto unlock;
-       }
-
-       if (!aead_sufficient_data(ctx))
-               goto unlock;
-
-       used = ctx->used;
-       if (ctx->enc)
-               outlen = used + as;
-       else
-               outlen = used - as;
-
-       req = sock_kmalloc(sk, reqlen, GFP_KERNEL);
-       if (unlikely(!req))
-               goto unlock;
-
-       areq = GET_ASYM_REQ(req, tfm);
-       memset(&areq->first_rsgl, '\0', sizeof(areq->first_rsgl));
-       INIT_LIST_HEAD(&areq->list);
-       areq->iocb = msg->msg_iocb;
-       areq->sk = sk;
-       memcpy(areq->iv, ctx->iv, crypto_aead_ivsize(tfm));
-       aead_request_set_tfm(req, tfm);
-       aead_request_set_ad(req, ctx->aead_assoclen);
-       aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                 aead_async_cb, req);
-       used -= ctx->aead_assoclen;
-
-       /* take over all tx sgls from ctx */
-       areq->tsgl = sock_kmalloc(sk,
-                                 sizeof(*areq->tsgl) * max_t(u32, sgl->cur, 1),
-                                 GFP_KERNEL);
-       if (unlikely(!areq->tsgl))
-               goto free;
-
-       sg_init_table(areq->tsgl, max_t(u32, sgl->cur, 1));
-       for (i = 0; i < sgl->cur; i++)
-               sg_set_page(&areq->tsgl[i], sg_page(&sgl->sg[i]),
-                           sgl->sg[i].length, sgl->sg[i].offset);
-
-       areq->tsgls = sgl->cur;
-
-       /* create rx sgls */
-       while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
-               size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
-                                     (outlen - usedpages));
-
-               if (list_empty(&areq->list)) {
-                       rsgl = &areq->first_rsgl;
-
-               } else {
-                       rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
-                       if (unlikely(!rsgl)) {
-                               err = -ENOMEM;
-                               goto free;
-                       }
-               }
-               rsgl->sgl.npages = 0;
-               list_add_tail(&rsgl->list, &areq->list);
-
-               /* make one iovec available as scatterlist */
-               err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
-               if (err < 0)
-                       goto free;
-
-               usedpages += err;
-
-               /* chain the new scatterlist with previous one */
-               if (last_rsgl)
-                       af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
-
-               last_rsgl = rsgl;
-
-               iov_iter_advance(&msg->msg_iter, err);
-       }
-
-       /* ensure output buffer is sufficiently large */
-       if (usedpages < outlen) {
-               err = -EINVAL;
-               goto unlock;
-       }
-
-       aead_request_set_crypt(req, areq->tsgl, areq->first_rsgl.sgl.sg, used,
-                              areq->iv);
-       err = ctx->enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
-       if (err) {
-               if (err == -EINPROGRESS) {
-                       sock_hold(sk);
-                       err = -EIOCBQUEUED;
-                       aead_reset_ctx(ctx);
-                       goto unlock;
-               } else if (err == -EBADMSG) {
-                       aead_put_sgl(sk);
-               }
-               goto free;
-       }
-       aead_put_sgl(sk);
-
-free:
-       list_for_each_entry(rsgl, &areq->list, list) {
-               af_alg_free_sg(&rsgl->sgl);
-               if (rsgl != &areq->first_rsgl)
-                       sock_kfree_s(sk, rsgl, sizeof(*rsgl));
-       }
-       if (areq->tsgl)
-               sock_kfree_s(sk, areq->tsgl, sizeof(*areq->tsgl) * areq->tsgls);
-       if (req)
-               sock_kfree_s(sk, req, reqlen);
-unlock:
-       aead_wmem_wakeup(sk);
-       release_sock(sk);
-       return err ? err : outlen;
-}
-
-static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
-{
-       struct sock *sk = sock->sk;
-       struct alg_sock *ask = alg_sk(sk);
-       struct aead_ctx *ctx = ask->private;
-       unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
-       struct aead_sg_list *sgl = &ctx->tsgl;
-       struct aead_async_rsgl *last_rsgl = NULL;
-       struct aead_async_rsgl *rsgl, *tmp;
-       int err = -EINVAL;
-       unsigned long used = 0;
-       size_t outlen = 0;
-       size_t usedpages = 0;
-
-       lock_sock(sk);
+       struct af_alg_async_req *areq;
+       struct af_alg_tsgl *tsgl;
+       struct scatterlist *src;
+       int err = 0;
+       size_t used = 0;                /* [in]  TX bufs to be en/decrypted */
+       size_t outlen = 0;              /* [out] RX bufs produced by kernel */
+       size_t usedpages = 0;           /* [in]  RX bufs to be used from user */
+       size_t processed = 0;           /* [in]  TX bufs to be consumed */
 
        /*
-        * Please see documentation of aead_request_set_crypt for the
-        * description of the AEAD memory structure expected from the caller.
+        * Data length provided by caller via sendmsg/sendpage that has not
+        * yet been processed.
         */
-
-       if (ctx->more) {
-               err = aead_wait_for_data(sk, flags);
-               if (err)
-                       goto unlock;
-       }
-
-       /* data length provided by caller via sendmsg/sendpage */
        used = ctx->used;
 
        /*
@@ -584,8 +126,8 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
         * the error message in sendmsg/sendpage and still call recvmsg. This
         * check here protects the kernel integrity.
         */
-       if (!aead_sufficient_data(ctx))
-               goto unlock;
+       if (!aead_sufficient_data(sk))
+               return -EINVAL;
 
        /*
         * Calculate the minimum output buffer size holding the result of the
@@ -606,104 +148,191 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
         */
        used -= ctx->aead_assoclen;
 
-       /* convert iovecs of output buffers into scatterlists */
-       while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
-               size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
-                                     (outlen - usedpages));
-
-               if (list_empty(&ctx->list)) {
-                       rsgl = &ctx->first_rsgl;
-               } else {
-                       rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
-                       if (unlikely(!rsgl)) {
-                               err = -ENOMEM;
-                               goto unlock;
-                       }
-               }
-               rsgl->sgl.npages = 0;
-               list_add_tail(&rsgl->list, &ctx->list);
+       /* Allocate cipher request for current operation. */
+       areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
+                                    crypto_aead_reqsize(tfm));
+       if (IS_ERR(areq))
+               return PTR_ERR(areq);
 
-               /* make one iovec available as scatterlist */
-               err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
-               if (err < 0)
-                       goto unlock;
-               usedpages += err;
-               /* chain the new scatterlist with previous one */
-               if (last_rsgl)
-                       af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
+       /* convert iovecs of output buffers into RX SGL */
+       err = af_alg_get_rsgl(sk, msg, flags, areq, outlen, &usedpages);
+       if (err)
+               goto free;
 
-               last_rsgl = rsgl;
+       /*
+        * Ensure output buffer is sufficiently large. If the caller provides
+        * less buffer space, only use the relative required input size. This
+        * allows AIO operation where the caller sent all data to be processed
+        * and the AIO operation performs the operation on the different chunks
+        * of the input data.
+        */
+       if (usedpages < outlen) {
+               size_t less = outlen - usedpages;
 
-               iov_iter_advance(&msg->msg_iter, err);
+               if (used < less) {
+                       err = -EINVAL;
+                       goto free;
+               }
+               used -= less;
+               outlen -= less;
        }
 
-       /* ensure output buffer is sufficiently large */
-       if (usedpages < outlen) {
-               err = -EINVAL;
-               goto unlock;
-       }
+       processed = used + ctx->aead_assoclen;
+       tsgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, list);
 
-       sg_mark_end(sgl->sg + sgl->cur - 1);
-       aead_request_set_crypt(&ctx->aead_req, sgl->sg, ctx->first_rsgl.sgl.sg,
-                              used, ctx->iv);
-       aead_request_set_ad(&ctx->aead_req, ctx->aead_assoclen);
+       /*
+        * Copy of AAD from source to destination
+        *
+        * The AAD is copied to the destination buffer without change. Even
+        * when user space uses an in-place cipher operation, the kernel
+        * will copy the data as it does not see whether such in-place operation
+        * is initiated.
+        *
+        * To ensure efficiency, the following implementation ensure that the
+        * ciphers are invoked to perform a crypto operation in-place. This
+        * is achieved by memory management specified as follows.
+        */
 
-       err = af_alg_wait_for_completion(ctx->enc ?
-                                        crypto_aead_encrypt(&ctx->aead_req) :
-                                        crypto_aead_decrypt(&ctx->aead_req),
-                                        &ctx->completion);
+       /* Use the RX SGL as source (and destination) for crypto op. */
+       src = areq->first_rsgl.sgl.sg;
+
+       if (ctx->enc) {
+               /*
+                * Encryption operation - The in-place cipher operation is
+                * achieved by the following operation:
+                *
+                * TX SGL: AAD || PT
+                *          |      |
+                *          | copy |
+                *          v      v
+                * RX SGL: AAD || PT || Tag
+                */
+               err = crypto_aead_copy_sgl(null_tfm, tsgl->sg,
+                                          areq->first_rsgl.sgl.sg, processed);
+               if (err)
+                       goto free;
+               af_alg_pull_tsgl(sk, processed, NULL, 0);
+       } else {
+               /*
+                * Decryption operation - To achieve an in-place cipher
+                * operation, the following  SGL structure is used:
+                *
+                * TX SGL: AAD || CT || Tag
+                *          |      |     ^
+                *          | copy |     | Create SGL link.
+                *          v      v     |
+                * RX SGL: AAD || CT ----+
+                */
+
+                /* Copy AAD || CT to RX SGL buffer for in-place operation. */
+               err = crypto_aead_copy_sgl(null_tfm, tsgl->sg,
+                                          areq->first_rsgl.sgl.sg, outlen);
+               if (err)
+                       goto free;
 
-       if (err) {
-               /* EBADMSG implies a valid cipher operation took place */
-               if (err == -EBADMSG)
-                       aead_put_sgl(sk);
+               /* Create TX SGL for tag and chain it to RX SGL. */
+               areq->tsgl_entries = af_alg_count_tsgl(sk, processed,
+                                                      processed - as);
+               if (!areq->tsgl_entries)
+                       areq->tsgl_entries = 1;
+               areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) *
+                                             areq->tsgl_entries,
+                                         GFP_KERNEL);
+               if (!areq->tsgl) {
+                       err = -ENOMEM;
+                       goto free;
+               }
+               sg_init_table(areq->tsgl, areq->tsgl_entries);
+
+               /* Release TX SGL, except for tag data and reassign tag data. */
+               af_alg_pull_tsgl(sk, processed, areq->tsgl, processed - as);
+
+               /* chain the areq TX SGL holding the tag with RX SGL */
+               if (usedpages) {
+                       /* RX SGL present */
+                       struct af_alg_sgl *sgl_prev = &areq->last_rsgl->sgl;
+
+                       sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
+                       sg_chain(sgl_prev->sg, sgl_prev->npages + 1,
+                                areq->tsgl);
+               } else
+                       /* no RX SGL present (e.g. authentication only) */
+                       src = areq->tsgl;
+       }
 
-               goto unlock;
+       /* Initialize the crypto operation */
+       aead_request_set_crypt(&areq->cra_u.aead_req, src,
+                              areq->first_rsgl.sgl.sg, used, ctx->iv);
+       aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen);
+       aead_request_set_tfm(&areq->cra_u.aead_req, tfm);
+
+       if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
+               /* AIO operation */
+               areq->iocb = msg->msg_iocb;
+               aead_request_set_callback(&areq->cra_u.aead_req,
+                                         CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                         af_alg_async_cb, areq);
+               err = ctx->enc ? crypto_aead_encrypt(&areq->cra_u.aead_req) :
+                                crypto_aead_decrypt(&areq->cra_u.aead_req);
+       } else {
+               /* Synchronous operation */
+               aead_request_set_callback(&areq->cra_u.aead_req,
+                                         CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                         af_alg_complete, &ctx->completion);
+               err = af_alg_wait_for_completion(ctx->enc ?
+                               crypto_aead_encrypt(&areq->cra_u.aead_req) :
+                               crypto_aead_decrypt(&areq->cra_u.aead_req),
+                                                &ctx->completion);
        }
 
-       aead_put_sgl(sk);
-       err = 0;
+       /* AIO operation in progress */
+       if (err == -EINPROGRESS) {
+               sock_hold(sk);
 
-unlock:
-       list_for_each_entry_safe(rsgl, tmp, &ctx->list, list) {
-               af_alg_free_sg(&rsgl->sgl);
-               list_del(&rsgl->list);
-               if (rsgl != &ctx->first_rsgl)
-                       sock_kfree_s(sk, rsgl, sizeof(*rsgl));
+               /* Remember output size that will be generated. */
+               areq->outlen = outlen;
+
+               return -EIOCBQUEUED;
        }
-       INIT_LIST_HEAD(&ctx->list);
-       aead_wmem_wakeup(sk);
-       release_sock(sk);
 
-       return err ? err : outlen;
-}
+free:
+       af_alg_free_areq_sgls(areq);
+       sock_kfree_s(sk, areq, areq->areqlen);
 
-static int aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t ignored,
-                       int flags)
-{
-       return (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) ?
-               aead_recvmsg_async(sock, msg, flags) :
-               aead_recvmsg_sync(sock, msg, flags);
+       return err ? err : outlen;
 }
 
-static unsigned int aead_poll(struct file *file, struct socket *sock,
-                             poll_table *wait)
+static int aead_recvmsg(struct socket *sock, struct msghdr *msg,
+                       size_t ignored, int flags)
 {
        struct sock *sk = sock->sk;
-       struct alg_sock *ask = alg_sk(sk);
-       struct aead_ctx *ctx = ask->private;
-       unsigned int mask;
-
-       sock_poll_wait(file, sk_sleep(sk), wait);
-       mask = 0;
+       int ret = 0;
 
-       if (!ctx->more)
-               mask |= POLLIN | POLLRDNORM;
+       lock_sock(sk);
+       while (msg_data_left(msg)) {
+               int err = _aead_recvmsg(sock, msg, ignored, flags);
+
+               /*
+                * This error covers -EIOCBQUEUED which implies that we can
+                * only handle one AIO request. If the caller wants to have
+                * multiple AIO requests in parallel, he must make multiple
+                * separate AIO calls.
+                *
+                * Also return the error if no data has been processed so far.
+                */
+               if (err <= 0) {
+                       if (err == -EIOCBQUEUED || err == -EBADMSG || !ret)
+                               ret = err;
+                       goto out;
+               }
 
-       if (aead_writable(sk))
-               mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+               ret += err;
+       }
 
-       return mask;
+out:
+       af_alg_wmem_wakeup(sk);
+       release_sock(sk);
+       return ret;
 }
 
 static struct proto_ops algif_aead_ops = {
@@ -723,9 +352,9 @@ static struct proto_ops algif_aead_ops = {
 
        .release        =       af_alg_release,
        .sendmsg        =       aead_sendmsg,
-       .sendpage       =       aead_sendpage,
+       .sendpage       =       af_alg_sendpage,
        .recvmsg        =       aead_recvmsg,
-       .poll           =       aead_poll,
+       .poll           =       af_alg_poll,
 };
 
 static int aead_check_key(struct socket *sock)
@@ -787,7 +416,7 @@ static ssize_t aead_sendpage_nokey(struct socket *sock, struct page *page,
        if (err)
                return err;
 
-       return aead_sendpage(sock, page, offset, size, flags);
+       return af_alg_sendpage(sock, page, offset, size, flags);
 }
 
 static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
@@ -821,13 +450,14 @@ static struct proto_ops algif_aead_ops_nokey = {
        .sendmsg        =       aead_sendmsg_nokey,
        .sendpage       =       aead_sendpage_nokey,
        .recvmsg        =       aead_recvmsg_nokey,
-       .poll           =       aead_poll,
+       .poll           =       af_alg_poll,
 };
 
 static void *aead_bind(const char *name, u32 type, u32 mask)
 {
        struct aead_tfm *tfm;
        struct crypto_aead *aead;
+       struct crypto_skcipher *null_tfm;
 
        tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
        if (!tfm)
@@ -839,7 +469,15 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
                return ERR_CAST(aead);
        }
 
+       null_tfm = crypto_get_default_null_skcipher2();
+       if (IS_ERR(null_tfm)) {
+               crypto_free_aead(aead);
+               kfree(tfm);
+               return ERR_CAST(null_tfm);
+       }
+
        tfm->aead = aead;
+       tfm->null_tfm = null_tfm;
 
        return tfm;
 }
@@ -873,12 +511,15 @@ static int aead_setkey(void *private, const u8 *key, unsigned int keylen)
 static void aead_sock_destruct(struct sock *sk)
 {
        struct alg_sock *ask = alg_sk(sk);
-       struct aead_ctx *ctx = ask->private;
-       unsigned int ivlen = crypto_aead_ivsize(
-                               crypto_aead_reqtfm(&ctx->aead_req));
-
-       WARN_ON(refcount_read(&sk->sk_refcnt) != 0);
-       aead_put_sgl(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
+       struct aead_tfm *aeadc = pask->private;
+       struct crypto_aead *tfm = aeadc->aead;
+       unsigned int ivlen = crypto_aead_ivsize(tfm);
+
+       af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
+       crypto_put_default_null_skcipher2();
        sock_kzfree_s(sk, ctx->iv, ivlen);
        sock_kfree_s(sk, ctx, ctx->len);
        af_alg_release_parent(sk);
@@ -886,11 +527,11 @@ static void aead_sock_destruct(struct sock *sk)
 
 static int aead_accept_parent_nokey(void *private, struct sock *sk)
 {
-       struct aead_ctx *ctx;
+       struct af_alg_ctx *ctx;
        struct alg_sock *ask = alg_sk(sk);
        struct aead_tfm *tfm = private;
        struct crypto_aead *aead = tfm->aead;
-       unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(aead);
+       unsigned int len = sizeof(*ctx);
        unsigned int ivlen = crypto_aead_ivsize(aead);
 
        ctx = sock_kmalloc(sk, len, GFP_KERNEL);
@@ -905,23 +546,18 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk)
        }
        memset(ctx->iv, 0, ivlen);
 
+       INIT_LIST_HEAD(&ctx->tsgl_list);
        ctx->len = len;
        ctx->used = 0;
+       ctx->rcvused = 0;
        ctx->more = 0;
        ctx->merge = 0;
        ctx->enc = 0;
-       ctx->tsgl.cur = 0;
        ctx->aead_assoclen = 0;
        af_alg_init_completion(&ctx->completion);
-       sg_init_table(ctx->tsgl.sg, ALG_MAX_PAGES);
-       INIT_LIST_HEAD(&ctx->list);
 
        ask->private = ctx;
 
-       aead_request_set_tfm(&ctx->aead_req, aead);
-       aead_request_set_callback(&ctx->aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                 af_alg_complete, &ctx->completion);
-
        sk->sk_destruct = aead_sock_destruct;
 
        return 0;
index 903605dbc1a50282e8e3d7ced0bba148ec7ebe9c..8ae4170aaeb4f2ef533f7461b81cd8b828879dd5 100644 (file)
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  *
+ * The following concept of the memory management is used:
+ *
+ * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
+ * filled by user space with the data submitted via sendpage/sendmsg. Filling
+ * up the TX SGL does not cause a crypto operation -- the data will only be
+ * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
+ * provide a buffer which is tracked with the RX SGL.
+ *
+ * During the processing of the recvmsg operation, the cipher request is
+ * allocated and prepared. As part of the recvmsg operation, the processed
+ * TX buffers are extracted from the TX SGL into a separate SGL.
+ *
+ * After the completion of the crypto operation, the RX SGL and the cipher
+ * request is released. The extracted TX SGL parts are released together with
+ * the RX SGL release.
  */
 
 #include <crypto/scatterwalk.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
-#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <net/sock.h>
 
-struct skcipher_sg_list {
-       struct list_head list;
-
-       int cur;
-
-       struct scatterlist sg[0];
-};
-
 struct skcipher_tfm {
        struct crypto_skcipher *skcipher;
        bool has_key;
 };
 
-struct skcipher_ctx {
-       struct list_head tsgl;
-       struct af_alg_sgl rsgl;
-
-       void *iv;
-
-       struct af_alg_completion completion;
-
-       atomic_t inflight;
-       size_t used;
-
-       unsigned int len;
-       bool more;
-       bool merge;
-       bool enc;
-
-       struct skcipher_request req;
-};
-
-struct skcipher_async_rsgl {
-       struct af_alg_sgl sgl;
-       struct list_head list;
-};
-
-struct skcipher_async_req {
-       struct kiocb *iocb;
-       struct skcipher_async_rsgl first_sgl;
-       struct list_head list;
-       struct scatterlist *tsg;
-       atomic_t *inflight;
-       struct skcipher_request req;
-};
-
-#define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
-                     sizeof(struct scatterlist) - 1)
-
-static void skcipher_free_async_sgls(struct skcipher_async_req *sreq)
-{
-       struct skcipher_async_rsgl *rsgl, *tmp;
-       struct scatterlist *sgl;
-       struct scatterlist *sg;
-       int i, n;
-
-       list_for_each_entry_safe(rsgl, tmp, &sreq->list, list) {
-               af_alg_free_sg(&rsgl->sgl);
-               if (rsgl != &sreq->first_sgl)
-                       kfree(rsgl);
-       }
-       sgl = sreq->tsg;
-       n = sg_nents(sgl);
-       for_each_sg(sgl, sg, n, i) {
-               struct page *page = sg_page(sg);
-
-               /* some SGs may not have a page mapped */
-               if (page && page_ref_count(page))
-                       put_page(page);
-       }
-
-       kfree(sreq->tsg);
-}
-
-static void skcipher_async_cb(struct crypto_async_request *req, int err)
-{
-       struct skcipher_async_req *sreq = req->data;
-       struct kiocb *iocb = sreq->iocb;
-
-       atomic_dec(sreq->inflight);
-       skcipher_free_async_sgls(sreq);
-       kzfree(sreq);
-       iocb->ki_complete(iocb, err, err);
-}
-
-static inline int skcipher_sndbuf(struct sock *sk)
-{
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-
-       return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
-                         ctx->used, 0);
-}
-
-static inline bool skcipher_writable(struct sock *sk)
-{
-       return PAGE_SIZE <= skcipher_sndbuf(sk);
-}
-
-static int skcipher_alloc_sgl(struct sock *sk)
-{
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-       struct skcipher_sg_list *sgl;
-       struct scatterlist *sg = NULL;
-
-       sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
-       if (!list_empty(&ctx->tsgl))
-               sg = sgl->sg;
-
-       if (!sg || sgl->cur >= MAX_SGL_ENTS) {
-               sgl = sock_kmalloc(sk, sizeof(*sgl) +
-                                      sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
-                                  GFP_KERNEL);
-               if (!sgl)
-                       return -ENOMEM;
-
-               sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
-               sgl->cur = 0;
-
-               if (sg)
-                       sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
-
-               list_add_tail(&sgl->list, &ctx->tsgl);
-       }
-
-       return 0;
-}
-
-static void skcipher_pull_sgl(struct sock *sk, size_t used, int put)
-{
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-       struct skcipher_sg_list *sgl;
-       struct scatterlist *sg;
-       int i;
-
-       while (!list_empty(&ctx->tsgl)) {
-               sgl = list_first_entry(&ctx->tsgl, struct skcipher_sg_list,
-                                      list);
-               sg = sgl->sg;
-
-               for (i = 0; i < sgl->cur; i++) {
-                       size_t plen = min_t(size_t, used, sg[i].length);
-
-                       if (!sg_page(sg + i))
-                               continue;
-
-                       sg[i].length -= plen;
-                       sg[i].offset += plen;
-
-                       used -= plen;
-                       ctx->used -= plen;
-
-                       if (sg[i].length)
-                               return;
-                       if (put)
-                               put_page(sg_page(sg + i));
-                       sg_assign_page(sg + i, NULL);
-               }
-
-               list_del(&sgl->list);
-               sock_kfree_s(sk, sgl,
-                            sizeof(*sgl) + sizeof(sgl->sg[0]) *
-                                           (MAX_SGL_ENTS + 1));
-       }
-
-       if (!ctx->used)
-               ctx->merge = 0;
-}
-
-static void skcipher_free_sgl(struct sock *sk)
-{
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-
-       skcipher_pull_sgl(sk, ctx->used, 1);
-}
-
-static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
-{
-       DEFINE_WAIT_FUNC(wait, woken_wake_function);
-       int err = -ERESTARTSYS;
-       long timeout;
-
-       if (flags & MSG_DONTWAIT)
-               return -EAGAIN;
-
-       sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
-       add_wait_queue(sk_sleep(sk), &wait);
-       for (;;) {
-               if (signal_pending(current))
-                       break;
-               timeout = MAX_SCHEDULE_TIMEOUT;
-               if (sk_wait_event(sk, &timeout, skcipher_writable(sk), &wait)) {
-                       err = 0;
-                       break;
-               }
-       }
-       remove_wait_queue(sk_sleep(sk), &wait);
-
-       return err;
-}
-
-static void skcipher_wmem_wakeup(struct sock *sk)
-{
-       struct socket_wq *wq;
-
-       if (!skcipher_writable(sk))
-               return;
-
-       rcu_read_lock();
-       wq = rcu_dereference(sk->sk_wq);
-       if (skwq_has_sleeper(wq))
-               wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
-                                                          POLLRDNORM |
-                                                          POLLRDBAND);
-       sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-       rcu_read_unlock();
-}
-
-static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
-{
-       DEFINE_WAIT_FUNC(wait, woken_wake_function);
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-       long timeout;
-       int err = -ERESTARTSYS;
-
-       if (flags & MSG_DONTWAIT) {
-               return -EAGAIN;
-       }
-
-       sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-       add_wait_queue(sk_sleep(sk), &wait);
-       for (;;) {
-               if (signal_pending(current))
-                       break;
-               timeout = MAX_SCHEDULE_TIMEOUT;
-               if (sk_wait_event(sk, &timeout, ctx->used, &wait)) {
-                       err = 0;
-                       break;
-               }
-       }
-       remove_wait_queue(sk_sleep(sk), &wait);
-
-       sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-       return err;
-}
-
-static void skcipher_data_wakeup(struct sock *sk)
-{
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-       struct socket_wq *wq;
-
-       if (!ctx->used)
-               return;
-
-       rcu_read_lock();
-       wq = rcu_dereference(sk->sk_wq);
-       if (skwq_has_sleeper(wq))
-               wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
-                                                          POLLRDNORM |
-                                                          POLLRDBAND);
-       sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-       rcu_read_unlock();
-}
-
 static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
                            size_t size)
 {
@@ -303,445 +50,143 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
        struct alg_sock *ask = alg_sk(sk);
        struct sock *psk = ask->parent;
        struct alg_sock *pask = alg_sk(psk);
-       struct skcipher_ctx *ctx = ask->private;
        struct skcipher_tfm *skc = pask->private;
        struct crypto_skcipher *tfm = skc->skcipher;
        unsigned ivsize = crypto_skcipher_ivsize(tfm);
-       struct skcipher_sg_list *sgl;
-       struct af_alg_control con = {};
-       long copied = 0;
-       bool enc = 0;
-       bool init = 0;
-       int err;
-       int i;
-
-       if (msg->msg_controllen) {
-               err = af_alg_cmsg_send(msg, &con);
-               if (err)
-                       return err;
-
-               init = 1;
-               switch (con.op) {
-               case ALG_OP_ENCRYPT:
-                       enc = 1;
-                       break;
-               case ALG_OP_DECRYPT:
-                       enc = 0;
-                       break;
-               default:
-                       return -EINVAL;
-               }
-
-               if (con.iv && con.iv->ivlen != ivsize)
-                       return -EINVAL;
-       }
-
-       err = -EINVAL;
-
-       lock_sock(sk);
-       if (!ctx->more && ctx->used)
-               goto unlock;
-
-       if (init) {
-               ctx->enc = enc;
-               if (con.iv)
-                       memcpy(ctx->iv, con.iv->iv, ivsize);
-       }
-
-       while (size) {
-               struct scatterlist *sg;
-               unsigned long len = size;
-               size_t plen;
-
-               if (ctx->merge) {
-                       sgl = list_entry(ctx->tsgl.prev,
-                                        struct skcipher_sg_list, list);
-                       sg = sgl->sg + sgl->cur - 1;
-                       len = min_t(unsigned long, len,
-                                   PAGE_SIZE - sg->offset - sg->length);
-
-                       err = memcpy_from_msg(page_address(sg_page(sg)) +
-                                             sg->offset + sg->length,
-                                             msg, len);
-                       if (err)
-                               goto unlock;
-
-                       sg->length += len;
-                       ctx->merge = (sg->offset + sg->length) &
-                                    (PAGE_SIZE - 1);
-
-                       ctx->used += len;
-                       copied += len;
-                       size -= len;
-                       continue;
-               }
 
-               if (!skcipher_writable(sk)) {
-                       err = skcipher_wait_for_wmem(sk, msg->msg_flags);
-                       if (err)
-                               goto unlock;
-               }
-
-               len = min_t(unsigned long, len, skcipher_sndbuf(sk));
-
-               err = skcipher_alloc_sgl(sk);
-               if (err)
-                       goto unlock;
-
-               sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
-               sg = sgl->sg;
-               if (sgl->cur)
-                       sg_unmark_end(sg + sgl->cur - 1);
-               do {
-                       i = sgl->cur;
-                       plen = min_t(size_t, len, PAGE_SIZE);
-
-                       sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
-                       err = -ENOMEM;
-                       if (!sg_page(sg + i))
-                               goto unlock;
-
-                       err = memcpy_from_msg(page_address(sg_page(sg + i)),
-                                             msg, plen);
-                       if (err) {
-                               __free_page(sg_page(sg + i));
-                               sg_assign_page(sg + i, NULL);
-                               goto unlock;
-                       }
-
-                       sg[i].length = plen;
-                       len -= plen;
-                       ctx->used += plen;
-                       copied += plen;
-                       size -= plen;
-                       sgl->cur++;
-               } while (len && sgl->cur < MAX_SGL_ENTS);
-
-               if (!size)
-                       sg_mark_end(sg + sgl->cur - 1);
-
-               ctx->merge = plen & (PAGE_SIZE - 1);
-       }
-
-       err = 0;
-
-       ctx->more = msg->msg_flags & MSG_MORE;
-
-unlock:
-       skcipher_data_wakeup(sk);
-       release_sock(sk);
-
-       return copied ?: err;
+       return af_alg_sendmsg(sock, msg, size, ivsize);
 }
 
-static ssize_t skcipher_sendpage(struct socket *sock, struct page *page,
-                                int offset, size_t size, int flags)
-{
-       struct sock *sk = sock->sk;
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-       struct skcipher_sg_list *sgl;
-       int err = -EINVAL;
-
-       if (flags & MSG_SENDPAGE_NOTLAST)
-               flags |= MSG_MORE;
-
-       lock_sock(sk);
-       if (!ctx->more && ctx->used)
-               goto unlock;
-
-       if (!size)
-               goto done;
-
-       if (!skcipher_writable(sk)) {
-               err = skcipher_wait_for_wmem(sk, flags);
-               if (err)
-                       goto unlock;
-       }
-
-       err = skcipher_alloc_sgl(sk);
-       if (err)
-               goto unlock;
-
-       ctx->merge = 0;
-       sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
-
-       if (sgl->cur)
-               sg_unmark_end(sgl->sg + sgl->cur - 1);
-
-       sg_mark_end(sgl->sg + sgl->cur);
-       get_page(page);
-       sg_set_page(sgl->sg + sgl->cur, page, size, offset);
-       sgl->cur++;
-       ctx->used += size;
-
-done:
-       ctx->more = flags & MSG_MORE;
-
-unlock:
-       skcipher_data_wakeup(sk);
-       release_sock(sk);
-
-       return err ?: size;
-}
-
-static int skcipher_all_sg_nents(struct skcipher_ctx *ctx)
-{
-       struct skcipher_sg_list *sgl;
-       struct scatterlist *sg;
-       int nents = 0;
-
-       list_for_each_entry(sgl, &ctx->tsgl, list) {
-               sg = sgl->sg;
-
-               while (!sg->length)
-                       sg++;
-
-               nents += sg_nents(sg);
-       }
-       return nents;
-}
-
-static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
-                                 int flags)
+static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
+                            size_t ignored, int flags)
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
        struct sock *psk = ask->parent;
        struct alg_sock *pask = alg_sk(psk);
-       struct skcipher_ctx *ctx = ask->private;
+       struct af_alg_ctx *ctx = ask->private;
        struct skcipher_tfm *skc = pask->private;
        struct crypto_skcipher *tfm = skc->skcipher;
-       struct skcipher_sg_list *sgl;
-       struct scatterlist *sg;
-       struct skcipher_async_req *sreq;
-       struct skcipher_request *req;
-       struct skcipher_async_rsgl *last_rsgl = NULL;
-       unsigned int txbufs = 0, len = 0, tx_nents;
-       unsigned int reqsize = crypto_skcipher_reqsize(tfm);
-       unsigned int ivsize = crypto_skcipher_ivsize(tfm);
-       int err = -ENOMEM;
-       bool mark = false;
-       char *iv;
-
-       sreq = kzalloc(sizeof(*sreq) + reqsize + ivsize, GFP_KERNEL);
-       if (unlikely(!sreq))
-               goto out;
-
-       req = &sreq->req;
-       iv = (char *)(req + 1) + reqsize;
-       sreq->iocb = msg->msg_iocb;
-       INIT_LIST_HEAD(&sreq->list);
-       sreq->inflight = &ctx->inflight;
+       unsigned int bs = crypto_skcipher_blocksize(tfm);
+       struct af_alg_async_req *areq;
+       int err = 0;
+       size_t len = 0;
 
-       lock_sock(sk);
-       tx_nents = skcipher_all_sg_nents(ctx);
-       sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL);
-       if (unlikely(!sreq->tsg))
-               goto unlock;
-       sg_init_table(sreq->tsg, tx_nents);
-       memcpy(iv, ctx->iv, ivsize);
-       skcipher_request_set_tfm(req, tfm);
-       skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
-                                     skcipher_async_cb, sreq);
-
-       while (iov_iter_count(&msg->msg_iter)) {
-               struct skcipher_async_rsgl *rsgl;
-               int used;
-
-               if (!ctx->used) {
-                       err = skcipher_wait_for_data(sk, flags);
-                       if (err)
-                               goto free;
-               }
-               sgl = list_first_entry(&ctx->tsgl,
-                                      struct skcipher_sg_list, list);
-               sg = sgl->sg;
-
-               while (!sg->length)
-                       sg++;
-
-               used = min_t(unsigned long, ctx->used,
-                            iov_iter_count(&msg->msg_iter));
-               used = min_t(unsigned long, used, sg->length);
-
-               if (txbufs == tx_nents) {
-                       struct scatterlist *tmp;
-                       int x;
-                       /* Ran out of tx slots in async request
-                        * need to expand */
-                       tmp = kcalloc(tx_nents * 2, sizeof(*tmp),
-                                     GFP_KERNEL);
-                       if (!tmp) {
-                               err = -ENOMEM;
-                               goto free;
-                       }
-
-                       sg_init_table(tmp, tx_nents * 2);
-                       for (x = 0; x < tx_nents; x++)
-                               sg_set_page(&tmp[x], sg_page(&sreq->tsg[x]),
-                                           sreq->tsg[x].length,
-                                           sreq->tsg[x].offset);
-                       kfree(sreq->tsg);
-                       sreq->tsg = tmp;
-                       tx_nents *= 2;
-                       mark = true;
-               }
-               /* Need to take over the tx sgl from ctx
-                * to the asynch req - these sgls will be freed later */
-               sg_set_page(sreq->tsg + txbufs++, sg_page(sg), sg->length,
-                           sg->offset);
-
-               if (list_empty(&sreq->list)) {
-                       rsgl = &sreq->first_sgl;
-                       list_add_tail(&rsgl->list, &sreq->list);
-               } else {
-                       rsgl = kmalloc(sizeof(*rsgl), GFP_KERNEL);
-                       if (!rsgl) {
-                               err = -ENOMEM;
-                               goto free;
-                       }
-                       list_add_tail(&rsgl->list, &sreq->list);
-               }
+       /* Allocate cipher request for current operation. */
+       areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
+                                    crypto_skcipher_reqsize(tfm));
+       if (IS_ERR(areq))
+               return PTR_ERR(areq);
 
-               used = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, used);
-               err = used;
-               if (used < 0)
-                       goto free;
-               if (last_rsgl)
-                       af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
-
-               last_rsgl = rsgl;
-               len += used;
-               skcipher_pull_sgl(sk, used, 0);
-               iov_iter_advance(&msg->msg_iter, used);
+       /* convert iovecs of output buffers into RX SGL */
+       err = af_alg_get_rsgl(sk, msg, flags, areq, -1, &len);
+       if (err)
+               goto free;
+
+       /* Process only as much RX buffers for which we have TX data */
+       if (len > ctx->used)
+               len = ctx->used;
+
+       /*
+        * If more buffers are to be expected to be processed, process only
+        * full block size buffers.
+        */
+       if (ctx->more || len < ctx->used)
+               len -= len % bs;
+
+       /*
+        * Create a per request TX SGL for this request which tracks the
+        * SG entries from the global TX SGL.
+        */
+       areq->tsgl_entries = af_alg_count_tsgl(sk, len, 0);
+       if (!areq->tsgl_entries)
+               areq->tsgl_entries = 1;
+       areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * areq->tsgl_entries,
+                                 GFP_KERNEL);
+       if (!areq->tsgl) {
+               err = -ENOMEM;
+               goto free;
+       }
+       sg_init_table(areq->tsgl, areq->tsgl_entries);
+       af_alg_pull_tsgl(sk, len, areq->tsgl, 0);
+
+       /* Initialize the crypto operation */
+       skcipher_request_set_tfm(&areq->cra_u.skcipher_req, tfm);
+       skcipher_request_set_crypt(&areq->cra_u.skcipher_req, areq->tsgl,
+                                  areq->first_rsgl.sgl.sg, len, ctx->iv);
+
+       if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
+               /* AIO operation */
+               areq->iocb = msg->msg_iocb;
+               skcipher_request_set_callback(&areq->cra_u.skcipher_req,
+                                             CRYPTO_TFM_REQ_MAY_SLEEP,
+                                             af_alg_async_cb, areq);
+               err = ctx->enc ?
+                       crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) :
+                       crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
+       } else {
+               /* Synchronous operation */
+               skcipher_request_set_callback(&areq->cra_u.skcipher_req,
+                                             CRYPTO_TFM_REQ_MAY_SLEEP |
+                                             CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                             af_alg_complete,
+                                             &ctx->completion);
+               err = af_alg_wait_for_completion(ctx->enc ?
+                       crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) :
+                       crypto_skcipher_decrypt(&areq->cra_u.skcipher_req),
+                                                &ctx->completion);
        }
 
-       if (mark)
-               sg_mark_end(sreq->tsg + txbufs - 1);
-
-       skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
-                                  len, iv);
-       err = ctx->enc ? crypto_skcipher_encrypt(req) :
-                        crypto_skcipher_decrypt(req);
+       /* AIO operation in progress */
        if (err == -EINPROGRESS) {
-               atomic_inc(&ctx->inflight);
-               err = -EIOCBQUEUED;
-               sreq = NULL;
-               goto unlock;
+               sock_hold(sk);
+
+               /* Remember output size that will be generated. */
+               areq->outlen = len;
+
+               return -EIOCBQUEUED;
        }
+
 free:
-       skcipher_free_async_sgls(sreq);
-unlock:
-       skcipher_wmem_wakeup(sk);
-       release_sock(sk);
-       kzfree(sreq);
-out:
-       return err;
+       af_alg_free_areq_sgls(areq);
+       sock_kfree_s(sk, areq, areq->areqlen);
+
+       return err ? err : len;
 }
 
-static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
-                                int flags)
+static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
+                           size_t ignored, int flags)
 {
        struct sock *sk = sock->sk;
-       struct alg_sock *ask = alg_sk(sk);
-       struct sock *psk = ask->parent;
-       struct alg_sock *pask = alg_sk(psk);
-       struct skcipher_ctx *ctx = ask->private;
-       struct skcipher_tfm *skc = pask->private;
-       struct crypto_skcipher *tfm = skc->skcipher;
-       unsigned bs = crypto_skcipher_blocksize(tfm);
-       struct skcipher_sg_list *sgl;
-       struct scatterlist *sg;
-       int err = -EAGAIN;
-       int used;
-       long copied = 0;
+       int ret = 0;
 
        lock_sock(sk);
        while (msg_data_left(msg)) {
-               if (!ctx->used) {
-                       err = skcipher_wait_for_data(sk, flags);
-                       if (err)
-                               goto unlock;
+               int err = _skcipher_recvmsg(sock, msg, ignored, flags);
+
+               /*
+                * This error covers -EIOCBQUEUED which implies that we can
+                * only handle one AIO request. If the caller wants to have
+                * multiple AIO requests in parallel, he must make multiple
+                * separate AIO calls.
+                *
+                * Also return the error if no data has been processed so far.
+                */
+               if (err <= 0) {
+                       if (err == -EIOCBQUEUED || !ret)
+                               ret = err;
+                       goto out;
                }
 
-               used = min_t(unsigned long, ctx->used, msg_data_left(msg));
-
-               used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used);
-               err = used;
-               if (err < 0)
-                       goto unlock;
-
-               if (ctx->more || used < ctx->used)
-                       used -= used % bs;
-
-               err = -EINVAL;
-               if (!used)
-                       goto free;
-
-               sgl = list_first_entry(&ctx->tsgl,
-                                      struct skcipher_sg_list, list);
-               sg = sgl->sg;
-
-               while (!sg->length)
-                       sg++;
-
-               skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used,
-                                          ctx->iv);
-
-               err = af_alg_wait_for_completion(
-                               ctx->enc ?
-                                       crypto_skcipher_encrypt(&ctx->req) :
-                                       crypto_skcipher_decrypt(&ctx->req),
-                               &ctx->completion);
-
-free:
-               af_alg_free_sg(&ctx->rsgl);
-
-               if (err)
-                       goto unlock;
-
-               copied += used;
-               skcipher_pull_sgl(sk, used, 1);
-               iov_iter_advance(&msg->msg_iter, used);
+               ret += err;
        }
 
-       err = 0;
-
-unlock:
-       skcipher_wmem_wakeup(sk);
+out:
+       af_alg_wmem_wakeup(sk);
        release_sock(sk);
-
-       return copied ?: err;
-}
-
-static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
-                           size_t ignored, int flags)
-{
-       return (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) ?
-               skcipher_recvmsg_async(sock, msg, flags) :
-               skcipher_recvmsg_sync(sock, msg, flags);
+       return ret;
 }
 
-static unsigned int skcipher_poll(struct file *file, struct socket *sock,
-                                 poll_table *wait)
-{
-       struct sock *sk = sock->sk;
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-       unsigned int mask;
-
-       sock_poll_wait(file, sk_sleep(sk), wait);
-       mask = 0;
-
-       if (ctx->used)
-               mask |= POLLIN | POLLRDNORM;
-
-       if (skcipher_writable(sk))
-               mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
-
-       return mask;
-}
 
 static struct proto_ops algif_skcipher_ops = {
        .family         =       PF_ALG,
@@ -760,9 +205,9 @@ static struct proto_ops algif_skcipher_ops = {
 
        .release        =       af_alg_release,
        .sendmsg        =       skcipher_sendmsg,
-       .sendpage       =       skcipher_sendpage,
+       .sendpage       =       af_alg_sendpage,
        .recvmsg        =       skcipher_recvmsg,
-       .poll           =       skcipher_poll,
+       .poll           =       af_alg_poll,
 };
 
 static int skcipher_check_key(struct socket *sock)
@@ -824,7 +269,7 @@ static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
        if (err)
                return err;
 
-       return skcipher_sendpage(sock, page, offset, size, flags);
+       return af_alg_sendpage(sock, page, offset, size, flags);
 }
 
 static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
@@ -858,7 +303,7 @@ static struct proto_ops algif_skcipher_ops_nokey = {
        .sendmsg        =       skcipher_sendmsg_nokey,
        .sendpage       =       skcipher_sendpage_nokey,
        .recvmsg        =       skcipher_recvmsg_nokey,
-       .poll           =       skcipher_poll,
+       .poll           =       af_alg_poll,
 };
 
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
@@ -900,26 +345,16 @@ static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
        return err;
 }
 
-static void skcipher_wait(struct sock *sk)
-{
-       struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-       int ctr = 0;
-
-       while (atomic_read(&ctx->inflight) && ctr++ < 100)
-               msleep(100);
-}
-
 static void skcipher_sock_destruct(struct sock *sk)
 {
        struct alg_sock *ask = alg_sk(sk);
-       struct skcipher_ctx *ctx = ask->private;
-       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
-
-       if (atomic_read(&ctx->inflight))
-               skcipher_wait(sk);
+       struct af_alg_ctx *ctx = ask->private;
+       struct sock *psk = ask->parent;
+       struct alg_sock *pask = alg_sk(psk);
+       struct skcipher_tfm *skc = pask->private;
+       struct crypto_skcipher *tfm = skc->skcipher;
 
-       skcipher_free_sgl(sk);
+       af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
        sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm));
        sock_kfree_s(sk, ctx, ctx->len);
        af_alg_release_parent(sk);
@@ -927,11 +362,11 @@ static void skcipher_sock_destruct(struct sock *sk)
 
 static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 {
-       struct skcipher_ctx *ctx;
+       struct af_alg_ctx *ctx;
        struct alg_sock *ask = alg_sk(sk);
        struct skcipher_tfm *tfm = private;
        struct crypto_skcipher *skcipher = tfm->skcipher;
-       unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(skcipher);
+       unsigned int len = sizeof(*ctx);
 
        ctx = sock_kmalloc(sk, len, GFP_KERNEL);
        if (!ctx)
@@ -946,22 +381,17 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 
        memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
 
-       INIT_LIST_HEAD(&ctx->tsgl);
+       INIT_LIST_HEAD(&ctx->tsgl_list);
        ctx->len = len;
        ctx->used = 0;
+       ctx->rcvused = 0;
        ctx->more = 0;
        ctx->merge = 0;
        ctx->enc = 0;
-       atomic_set(&ctx->inflight, 0);
        af_alg_init_completion(&ctx->completion);
 
        ask->private = ctx;
 
-       skcipher_request_set_tfm(&ctx->req, skcipher);
-       skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_SLEEP |
-                                                CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                     af_alg_complete, &ctx->completion);
-
        sk->sk_destruct = skcipher_sock_destruct;
 
        return 0;
index 477d9226ccaac99c087bf0df5f51597a34518100..854d924f9d8e6fad3c7653ca5ba0ea5d83a9d425 100644 (file)
@@ -65,8 +65,7 @@ static void crypto_ctr_crypt_final(struct blkcipher_walk *walk,
        unsigned int nbytes = walk->nbytes;
 
        crypto_cipher_encrypt_one(tfm, keystream, ctrblk);
-       crypto_xor(keystream, src, nbytes);
-       memcpy(dst, keystream, nbytes);
+       crypto_xor_cpy(dst, keystream, src, nbytes);
 
        crypto_inc(ctrblk, bsize);
 }
index 61c7708905d01ec6745c03352579b2173fc6c87f..4271fc77d2616ef4a86d1509e8c44aeee33f4f72 100644 (file)
@@ -20,8 +20,6 @@ struct ecdh_ctx {
        unsigned int curve_id;
        unsigned int ndigits;
        u64 private_key[ECC_MAX_DIGITS];
-       u64 public_key[2 * ECC_MAX_DIGITS];
-       u64 shared_secret[ECC_MAX_DIGITS];
 };
 
 static inline struct ecdh_ctx *ecdh_get_ctx(struct crypto_kpp *tfm)
@@ -70,41 +68,58 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 
 static int ecdh_compute_value(struct kpp_request *req)
 {
-       int ret = 0;
        struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
        struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
-       size_t copied, nbytes;
+       u64 *public_key;
+       u64 *shared_secret = NULL;
        void *buf;
+       size_t copied, nbytes, public_key_sz;
+       int ret = -ENOMEM;
 
        nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+       /* Public part is a point thus it has both coordinates */
+       public_key_sz = 2 * nbytes;
+
+       public_key = kmalloc(public_key_sz, GFP_KERNEL);
+       if (!public_key)
+               return -ENOMEM;
 
        if (req->src) {
-               copied = sg_copy_to_buffer(req->src, 1, ctx->public_key,
-                                          2 * nbytes);
-               if (copied != 2 * nbytes)
-                       return -EINVAL;
+               shared_secret = kmalloc(nbytes, GFP_KERNEL);
+               if (!shared_secret)
+                       goto free_pubkey;
+
+               copied = sg_copy_to_buffer(req->src, 1, public_key,
+                                          public_key_sz);
+               if (copied != public_key_sz) {
+                       ret = -EINVAL;
+                       goto free_all;
+               }
 
                ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
-                                               ctx->private_key,
-                                               ctx->public_key,
-                                               ctx->shared_secret);
+                                               ctx->private_key, public_key,
+                                               shared_secret);
 
-               buf = ctx->shared_secret;
+               buf = shared_secret;
        } else {
                ret = ecc_make_pub_key(ctx->curve_id, ctx->ndigits,
-                                      ctx->private_key, ctx->public_key);
-               buf = ctx->public_key;
-               /* Public part is a point thus it has both coordinates */
-               nbytes *= 2;
+                                      ctx->private_key, public_key);
+               buf = public_key;
+               nbytes = public_key_sz;
        }
 
        if (ret < 0)
-               return ret;
+               goto free_all;
 
        copied = sg_copy_from_buffer(req->dst, 1, buf, nbytes);
        if (copied != nbytes)
-               return -EINVAL;
+               ret = -EINVAL;
 
+       /* fall through */
+free_all:
+       kzfree(shared_secret);
+free_pubkey:
+       kfree(public_key);
        return ret;
 }
 
index 29dd2b4a3b85b4021592facbdd019cc1c14938f9..d9e45a9587201d7760084bd81a024c11d743dc75 100644 (file)
@@ -55,8 +55,7 @@ static int crypto_pcbc_encrypt_segment(struct skcipher_request *req,
        do {
                crypto_xor(iv, src, bsize);
                crypto_cipher_encrypt_one(tfm, dst, iv);
-               memcpy(iv, dst, bsize);
-               crypto_xor(iv, src, bsize);
+               crypto_xor_cpy(iv, dst, src, bsize);
 
                src += bsize;
                dst += bsize;
@@ -79,8 +78,7 @@ static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req,
                memcpy(tmpbuf, src, bsize);
                crypto_xor(iv, src, bsize);
                crypto_cipher_encrypt_one(tfm, src, iv);
-               memcpy(iv, tmpbuf, bsize);
-               crypto_xor(iv, src, bsize);
+               crypto_xor_cpy(iv, tmpbuf, src, bsize);
 
                src += bsize;
        } while ((nbytes -= bsize) >= bsize);
@@ -127,8 +125,7 @@ static int crypto_pcbc_decrypt_segment(struct skcipher_request *req,
        do {
                crypto_cipher_decrypt_one(tfm, dst, src);
                crypto_xor(dst, iv, bsize);
-               memcpy(iv, src, bsize);
-               crypto_xor(iv, dst, bsize);
+               crypto_xor_cpy(iv, dst, src, bsize);
 
                src += bsize;
                dst += bsize;
@@ -153,8 +150,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
                memcpy(tmpbuf, src, bsize);
                crypto_cipher_decrypt_one(tfm, src, src);
                crypto_xor(src, iv, bsize);
-               memcpy(iv, tmpbuf, bsize);
-               crypto_xor(iv, src, bsize);
+               crypto_xor_cpy(iv, src, tmpbuf, bsize);
 
                src += bsize;
        } while ((nbytes -= bsize) >= bsize);
index 5e8469244960f85b52640fda18b561238ccdcce3..b4a618668161dc93b0b7643393a8e558287ba39f 100644 (file)
@@ -43,12 +43,14 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
                if (!buf)
                        return -ENOMEM;
 
-               get_random_bytes(buf, slen);
+               err = get_random_bytes_wait(buf, slen);
+               if (err)
+                       goto out;
                seed = buf;
        }
 
        err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
-
+out:
        kzfree(buf);
        return err;
 }
index ae1d3cf209e4836b5cdbdf0e261caa9190e59bb4..2075e2c4e7dfd5b70bafe10cc37f9571fab34468 100644 (file)
@@ -65,11 +65,6 @@ static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
        seq_puts(m, "type         : scomp\n");
 }
 
-static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
-{
-       return 0;
-}
-
 static void crypto_scomp_free_scratches(void * __percpu *scratches)
 {
        int i;
@@ -125,12 +120,26 @@ static int crypto_scomp_alloc_all_scratches(void)
                if (!scomp_src_scratches)
                        return -ENOMEM;
                scomp_dst_scratches = crypto_scomp_alloc_scratches();
-               if (!scomp_dst_scratches)
+               if (!scomp_dst_scratches) {
+                       crypto_scomp_free_scratches(scomp_src_scratches);
+                       scomp_src_scratches = NULL;
                        return -ENOMEM;
+               }
        }
        return 0;
 }
 
+static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
+{
+       int ret;
+
+       mutex_lock(&scomp_lock);
+       ret = crypto_scomp_alloc_all_scratches();
+       mutex_unlock(&scomp_lock);
+
+       return ret;
+}
+
 static void crypto_scomp_sg_free(struct scatterlist *sgl)
 {
        int i, n;
@@ -211,9 +220,7 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
                                              scratch_dst, &req->dlen, *ctx);
        if (!ret) {
                if (!req->dst) {
-                       req->dst = crypto_scomp_sg_alloc(req->dlen,
-                                  req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
-                                  GFP_KERNEL : GFP_ATOMIC);
+                       req->dst = crypto_scomp_sg_alloc(req->dlen, GFP_ATOMIC);
                        if (!req->dst)
                                goto out;
                }
@@ -240,6 +247,10 @@ static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm)
        struct crypto_scomp **ctx = crypto_tfm_ctx(tfm);
 
        crypto_free_scomp(*ctx);
+
+       mutex_lock(&scomp_lock);
+       crypto_scomp_free_all_scratches();
+       mutex_unlock(&scomp_lock);
 }
 
 int crypto_init_scomp_ops_async(struct crypto_tfm *tfm)
@@ -316,40 +327,18 @@ static const struct crypto_type crypto_scomp_type = {
 int crypto_register_scomp(struct scomp_alg *alg)
 {
        struct crypto_alg *base = &alg->base;
-       int ret = -ENOMEM;
-
-       mutex_lock(&scomp_lock);
-       if (crypto_scomp_alloc_all_scratches())
-               goto error;
 
        base->cra_type = &crypto_scomp_type;
        base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
        base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS;
 
-       ret = crypto_register_alg(base);
-       if (ret)
-               goto error;
-
-       mutex_unlock(&scomp_lock);
-       return ret;
-
-error:
-       crypto_scomp_free_all_scratches();
-       mutex_unlock(&scomp_lock);
-       return ret;
+       return crypto_register_alg(base);
 }
 EXPORT_SYMBOL_GPL(crypto_register_scomp);
 
 int crypto_unregister_scomp(struct scomp_alg *alg)
 {
-       int ret;
-
-       mutex_lock(&scomp_lock);
-       ret = crypto_unregister_alg(&alg->base);
-       crypto_scomp_free_all_scratches();
-       mutex_unlock(&scomp_lock);
-
-       return ret;
+       return crypto_unregister_alg(&alg->base);
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_scomp);
 
index 94970a794975ac2148fbc0d84bf2e830719070da..7c3382facc82e8bb706a48029d90875cafb6a156 100644 (file)
        x4 ^= x2;                                       \
        })
 
+static void __serpent_setkey_sbox(u32 r0, u32 r1, u32 r2, u32 r3, u32 r4, u32 *k)
+{
+       k += 100;
+       S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24);
+       S4(r1, r2, r4, r3, r0); store_and_load_keys(r2, r4, r3, r0, 24, 20);
+       S5(r2, r4, r3, r0, r1); store_and_load_keys(r1, r2, r4, r0, 20, 16);
+       S6(r1, r2, r4, r0, r3); store_and_load_keys(r4, r3, r2, r0, 16, 12);
+       S7(r4, r3, r2, r0, r1); store_and_load_keys(r1, r2, r0, r4, 12, 8);
+       S0(r1, r2, r0, r4, r3); store_and_load_keys(r0, r2, r4, r1, 8, 4);
+       S1(r0, r2, r4, r1, r3); store_and_load_keys(r3, r4, r1, r0, 4, 0);
+       S2(r3, r4, r1, r0, r2); store_and_load_keys(r2, r4, r3, r0, 0, -4);
+       S3(r2, r4, r3, r0, r1); store_and_load_keys(r0, r1, r4, r2, -4, -8);
+       S4(r0, r1, r4, r2, r3); store_and_load_keys(r1, r4, r2, r3, -8, -12);
+       S5(r1, r4, r2, r3, r0); store_and_load_keys(r0, r1, r4, r3, -12, -16);
+       S6(r0, r1, r4, r3, r2); store_and_load_keys(r4, r2, r1, r3, -16, -20);
+       S7(r4, r2, r1, r3, r0); store_and_load_keys(r0, r1, r3, r4, -20, -24);
+       S0(r0, r1, r3, r4, r2); store_and_load_keys(r3, r1, r4, r0, -24, -28);
+       k -= 50;
+       S1(r3, r1, r4, r0, r2); store_and_load_keys(r2, r4, r0, r3, 22, 18);
+       S2(r2, r4, r0, r3, r1); store_and_load_keys(r1, r4, r2, r3, 18, 14);
+       S3(r1, r4, r2, r3, r0); store_and_load_keys(r3, r0, r4, r1, 14, 10);
+       S4(r3, r0, r4, r1, r2); store_and_load_keys(r0, r4, r1, r2, 10, 6);
+       S5(r0, r4, r1, r2, r3); store_and_load_keys(r3, r0, r4, r2, 6, 2);
+       S6(r3, r0, r4, r2, r1); store_and_load_keys(r4, r1, r0, r2, 2, -2);
+       S7(r4, r1, r0, r2, r3); store_and_load_keys(r3, r0, r2, r4,&nbs