Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Jan 2008 16:38:25 +0000 (08:38 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Jan 2008 16:38:25 +0000 (08:38 -0800)
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (125 commits)
  [CRYPTO] twofish: Merge common glue code
  [CRYPTO] hifn_795x: Fixup container_of() usage
  [CRYPTO] cast6: inline bloat--
  [CRYPTO] api: Set default CRYPTO_MINALIGN to unsigned long long
  [CRYPTO] tcrypt: Make xcbc available as a standalone test
  [CRYPTO] xcbc: Remove bogus hash/cipher test
  [CRYPTO] xcbc: Fix algorithm leak when block size check fails
  [CRYPTO] tcrypt: Zero axbuf in the right function
  [CRYPTO] padlock: Only reset the key once for each CBC and ECB operation
  [CRYPTO] api: Include sched.h for cond_resched in scatterwalk.h
  [CRYPTO] salsa20-asm: Remove unnecessary dependency on CRYPTO_SALSA20
  [CRYPTO] tcrypt: Add select of AEAD
  [CRYPTO] salsa20: Add x86-64 assembly version
  [CRYPTO] salsa20_i586: Salsa20 stream cipher algorithm (i586 version)
  [CRYPTO] gcm: Introduce rfc4106
  [CRYPTO] api: Show async type
  [CRYPTO] chainiv: Avoid lock spinning where possible
  [CRYPTO] seqiv: Add select AEAD in Kconfig
  [CRYPTO] scatterwalk: Handle zero nbytes in scatterwalk_map_and_copy
  [CRYPTO] null: Allow setkey on digest_null
  ...

72 files changed:
Documentation/crypto/api-intro.txt
arch/s390/crypto/aes_s390.c
arch/x86/crypto/Makefile
arch/x86/crypto/aes-i586-asm_32.S
arch/x86/crypto/aes-x86_64-asm_64.S
arch/x86/crypto/aes_32.c [deleted file]
arch/x86/crypto/aes_64.c [deleted file]
arch/x86/crypto/aes_glue.c [new file with mode: 0644]
arch/x86/crypto/salsa20-i586-asm_32.S [new file with mode: 0644]
arch/x86/crypto/salsa20-x86_64-asm_64.S [new file with mode: 0644]
arch/x86/crypto/salsa20_glue.c [new file with mode: 0644]
arch/x86/crypto/twofish_64.c [deleted file]
arch/x86/crypto/twofish_glue.c [moved from arch/x86/crypto/twofish_32.c with 94% similarity]
crypto/Kconfig
crypto/Makefile
crypto/ablkcipher.c
crypto/aead.c
crypto/aes_generic.c
crypto/algapi.c
crypto/api.c
crypto/authenc.c
crypto/blkcipher.c
crypto/camellia.c
crypto/cast6.c
crypto/cbc.c
crypto/ccm.c [new file with mode: 0644]
crypto/chainiv.c [new file with mode: 0644]
crypto/cryptd.c
crypto/crypto_null.c
crypto/ctr.c [new file with mode: 0644]
crypto/des_generic.c
crypto/digest.c
crypto/eseqiv.c [new file with mode: 0644]
crypto/gcm.c [new file with mode: 0644]
crypto/hmac.c
crypto/internal.h
crypto/lzo.c [new file with mode: 0644]
crypto/pcbc.c
crypto/salsa20_generic.c [new file with mode: 0644]
crypto/scatterwalk.c
crypto/seqiv.c [new file with mode: 0644]
crypto/sha256_generic.c
crypto/tcrypt.c
crypto/tcrypt.h
crypto/twofish_common.c
crypto/xcbc.c
drivers/char/hw_random/amd-rng.c
drivers/char/hw_random/core.c
drivers/char/hw_random/geode-rng.c
drivers/char/hw_random/intel-rng.c
drivers/char/hw_random/omap-rng.c
drivers/char/hw_random/pasemi-rng.c
drivers/char/hw_random/via-rng.c
drivers/crypto/Kconfig
drivers/crypto/Makefile
drivers/crypto/geode-aes.c
drivers/crypto/geode-aes.h
drivers/crypto/hifn_795x.c [new file with mode: 0644]
drivers/crypto/padlock-aes.c
include/crypto/aead.h [new file with mode: 0644]
include/crypto/aes.h [new file with mode: 0644]
include/crypto/algapi.h
include/crypto/authenc.h [new file with mode: 0644]
include/crypto/ctr.h [new file with mode: 0644]
include/crypto/des.h [new file with mode: 0644]
include/crypto/internal/aead.h [new file with mode: 0644]
include/crypto/internal/skcipher.h [new file with mode: 0644]
include/crypto/scatterwalk.h [moved from crypto/scatterwalk.h with 69% similarity]
include/crypto/sha.h
include/crypto/skcipher.h [new file with mode: 0644]
include/linux/crypto.h
include/linux/hw_random.h

index a2ac6d2947932022ddd8b6d906d13bd5882a0719..8b49302712a890365ff3edaa2571971608ccd381 100644 (file)
@@ -33,9 +33,16 @@ The idea is to make the user interface and algorithm registration API
 very simple, while hiding the core logic from both.  Many good ideas
 from existing APIs such as Cryptoapi and Nettle have been adapted for this.
 
-The API currently supports three types of transforms: Ciphers, Digests and
-Compressors.  The compression algorithms especially seem to be performing
-very well so far.
+The API currently supports five main types of transforms: AEAD (Authenticated
+Encryption with Associated Data), Block Ciphers, Ciphers, Compressors and
+Hashes.
+
+Please note that Block Ciphers is somewhat of a misnomer.  It is in fact
+meant to support all ciphers including stream ciphers.  The difference
+between Block Ciphers and Ciphers is that the latter operates on exactly
+one block while the former can operate on an arbitrary amount of data,
+subject to block size requirements (i.e., non-stream ciphers can only
+process multiples of blocks).
 
 Support for hardware crypto devices via an asynchronous interface is
 under development.
@@ -69,29 +76,12 @@ Here's an example of how to use the API:
 Many real examples are available in the regression test module (tcrypt.c).
 
 
-CONFIGURATION NOTES
-
-As Triple DES is part of the DES module, for those using modular builds,
-add the following line to /etc/modprobe.conf:
-
-  alias des3_ede des
-
-The Null algorithms reside in the crypto_null module, so these lines
-should also be added:
-
-  alias cipher_null crypto_null
-  alias digest_null crypto_null
-  alias compress_null crypto_null
-
-The SHA384 algorithm shares code within the SHA512 module, so you'll
-also need:
-  alias sha384 sha512
-
-
 DEVELOPER NOTES
 
 Transforms may only be allocated in user context, and cryptographic
-methods may only be called from softirq and user contexts.
+methods may only be called from softirq and user contexts.  For
+transforms with a setkey method it too should only be called from
+user context.
 
 When using the API for ciphers, performance will be optimal if each
 scatterlist contains data which is a multiple of the cipher's block
@@ -130,8 +120,9 @@ might already be working on.
 BUGS
 
 Send bug reports to:
-Herbert Xu <herbert@gondor.apana.org.au>
-Cc: David S. Miller <davem@redhat.com>
+linux-crypto@vger.kernel.org
+Cc: Herbert Xu <herbert@gondor.apana.org.au>,
+    David S. Miller <davem@redhat.com>
 
 
 FURTHER INFORMATION
index 512669691ad01b9513ce5c283d5d9db6e75e5aed..46c97058ebe1a2de8a0d4d2eeecc788644188f78 100644 (file)
@@ -6,6 +6,7 @@
  * s390 Version:
  *   Copyright IBM Corp. 2005,2007
  *   Author(s): Jan Glauber (jang@de.ibm.com)
+ *             Sebastian Siewior (sebastian@breakpoint.cc> SW-Fallback
  *
  * Derived from "crypto/aes_generic.c"
  *
  *
  */
 
+#include <crypto/aes.h>
 #include <crypto/algapi.h>
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include "crypt_s390.h"
 
-#define AES_MIN_KEY_SIZE       16
-#define AES_MAX_KEY_SIZE       32
-
-/* data block size for all key lengths */
-#define AES_BLOCK_SIZE         16
-
 #define AES_KEYLEN_128         1
 #define AES_KEYLEN_192         2
 #define AES_KEYLEN_256         4
@@ -39,45 +36,89 @@ struct s390_aes_ctx {
        long enc;
        long dec;
        int key_len;
+       union {
+               struct crypto_blkcipher *blk;
+               struct crypto_cipher *cip;
+       } fallback;
 };
 
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-                      unsigned int key_len)
+/*
+ * Check if the key_len is supported by the HW.
+ * Returns 0 if it is, a positive number if it is not and software fallback is
+ * required or a negative number in case the key size is not valid
+ */
+static int need_fallback(unsigned int key_len)
 {
-       struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-       u32 *flags = &tfm->crt_flags;
-
        switch (key_len) {
        case 16:
                if (!(keylen_flag & AES_KEYLEN_128))
-                       goto fail;
+                       return 1;
                break;
        case 24:
                if (!(keylen_flag & AES_KEYLEN_192))
-                       goto fail;
-
+                       return 1;
                break;
        case 32:
                if (!(keylen_flag & AES_KEYLEN_256))
-                       goto fail;
+                       return 1;
                break;
        default:
-               goto fail;
+               return -1;
                break;
        }
+       return 0;
+}
+
+static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key,
+               unsigned int key_len)
+{
+       struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+       int ret;
+
+       sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+       sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags &
+                       CRYPTO_TFM_REQ_MASK);
+
+       ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
+       if (ret) {
+               tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+               tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags &
+                               CRYPTO_TFM_RES_MASK);
+       }
+       return ret;
+}
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+                      unsigned int key_len)
+{
+       struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+       u32 *flags = &tfm->crt_flags;
+       int ret;
+
+       ret = need_fallback(key_len);
+       if (ret < 0) {
+               *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               return -EINVAL;
+       }
 
        sctx->key_len = key_len;
-       memcpy(sctx->key, in_key, key_len);
-       return 0;
-fail:
-       *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-       return -EINVAL;
+       if (!ret) {
+               memcpy(sctx->key, in_key, key_len);
+               return 0;
+       }
+
+       return setkey_fallback_cip(tfm, in_key, key_len);
 }
 
 static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
        const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
+       if (unlikely(need_fallback(sctx->key_len))) {
+               crypto_cipher_encrypt_one(sctx->fallback.cip, out, in);
+               return;
+       }
+
        switch (sctx->key_len) {
        case 16:
                crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in,
@@ -98,6 +139,11 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
        const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
+       if (unlikely(need_fallback(sctx->key_len))) {
+               crypto_cipher_decrypt_one(sctx->fallback.cip, out, in);
+               return;
+       }
+
        switch (sctx->key_len) {
        case 16:
                crypt_s390_km(KM_AES_128_DECRYPT, &sctx->key, out, in,
@@ -114,6 +160,29 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
        }
 }
 
+static int fallback_init_cip(struct crypto_tfm *tfm)
+{
+       const char *name = tfm->__crt_alg->cra_name;
+       struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       sctx->fallback.cip = crypto_alloc_cipher(name, 0,
+                       CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+       if (IS_ERR(sctx->fallback.cip)) {
+               printk(KERN_ERR "Error allocating fallback algo %s\n", name);
+               return PTR_ERR(sctx->fallback.blk);
+       }
+
+       return 0;
+}
+
+static void fallback_exit_cip(struct crypto_tfm *tfm)
+{
+       struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       crypto_free_cipher(sctx->fallback.cip);
+       sctx->fallback.cip = NULL;
+}
 
 static struct crypto_alg aes_alg = {
        .cra_name               =       "aes",
@@ -125,6 +194,8 @@ static struct crypto_alg aes_alg = {
        .cra_ctxsize            =       sizeof(struct s390_aes_ctx),
        .cra_module             =       THIS_MODULE,
        .cra_list               =       LIST_HEAD_INIT(aes_alg.cra_list),
+       .cra_init               =       fallback_init_cip,
+       .cra_exit               =       fallback_exit_cip,
        .cra_u                  =       {
                .cipher = {
                        .cia_min_keysize        =       AES_MIN_KEY_SIZE,
@@ -136,10 +207,70 @@ static struct crypto_alg aes_alg = {
        }
 };
 
+static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
+               unsigned int len)
+{
+       struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+       unsigned int ret;
+
+       sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+       sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags &
+                       CRYPTO_TFM_REQ_MASK);
+
+       ret = crypto_blkcipher_setkey(sctx->fallback.blk, key, len);
+       if (ret) {
+               tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+               tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags &
+                               CRYPTO_TFM_RES_MASK);
+       }
+       return ret;
+}
+
+static int fallback_blk_dec(struct blkcipher_desc *desc,
+               struct scatterlist *dst, struct scatterlist *src,
+               unsigned int nbytes)
+{
+       unsigned int ret;
+       struct crypto_blkcipher *tfm;
+       struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+
+       tfm = desc->tfm;
+       desc->tfm = sctx->fallback.blk;
+
+       ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes);
+
+       desc->tfm = tfm;
+       return ret;
+}
+
+static int fallback_blk_enc(struct blkcipher_desc *desc,
+               struct scatterlist *dst, struct scatterlist *src,
+               unsigned int nbytes)
+{
+       unsigned int ret;
+       struct crypto_blkcipher *tfm;
+       struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+
+       tfm = desc->tfm;
+       desc->tfm = sctx->fallback.blk;
+
+       ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes);
+
+       desc->tfm = tfm;
+       return ret;
+}
+
 static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
                           unsigned int key_len)
 {
        struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+       int ret;
+
+       ret = need_fallback(key_len);
+       if (ret > 0) {
+               sctx->key_len = key_len;
+               return setkey_fallback_blk(tfm, in_key, key_len);
+       }
 
        switch (key_len) {
        case 16:
@@ -188,6 +319,9 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
        struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
        struct blkcipher_walk walk;
 
+       if (unlikely(need_fallback(sctx->key_len)))
+               return fallback_blk_enc(desc, dst, src, nbytes);
+
        blkcipher_walk_init(&walk, dst, src, nbytes);
        return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk);
 }
@@ -199,10 +333,37 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
        struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
        struct blkcipher_walk walk;
 
+       if (unlikely(need_fallback(sctx->key_len)))
+               return fallback_blk_dec(desc, dst, src, nbytes);
+
        blkcipher_walk_init(&walk, dst, src, nbytes);
        return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk);
 }
 
+static int fallback_init_blk(struct crypto_tfm *tfm)
+{
+       const char *name = tfm->__crt_alg->cra_name;
+       struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       sctx->fallback.blk = crypto_alloc_blkcipher(name, 0,
+                       CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+       if (IS_ERR(sctx->fallback.blk)) {
+               printk(KERN_ERR "Error allocating fallback algo %s\n", name);
+               return PTR_ERR(sctx->fallback.blk);
+       }
+
+       return 0;
+}
+
+static void fallback_exit_blk(struct crypto_tfm *tfm)
+{
+       struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+       crypto_free_blkcipher(sctx->fallback.blk);
+       sctx->fallback.blk = NULL;
+}
+
 static struct crypto_alg ecb_aes_alg = {
        .cra_name               =       "ecb(aes)",
        .cra_driver_name        =       "ecb-aes-s390",
@@ -214,6 +375,8 @@ static struct crypto_alg ecb_aes_alg = {
        .cra_type               =       &crypto_blkcipher_type,
        .cra_module             =       THIS_MODULE,
        .cra_list               =       LIST_HEAD_INIT(ecb_aes_alg.cra_list),
+       .cra_init               =       fallback_init_blk,
+       .cra_exit               =       fallback_exit_blk,
        .cra_u                  =       {
                .blkcipher = {
                        .min_keysize            =       AES_MIN_KEY_SIZE,
@@ -229,6 +392,13 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
                           unsigned int key_len)
 {
        struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+       int ret;
+
+       ret = need_fallback(key_len);
+       if (ret > 0) {
+               sctx->key_len = key_len;
+               return setkey_fallback_blk(tfm, in_key, key_len);
+       }
 
        switch (key_len) {
        case 16:
@@ -283,6 +453,9 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
        struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
        struct blkcipher_walk walk;
 
+       if (unlikely(need_fallback(sctx->key_len)))
+               return fallback_blk_enc(desc, dst, src, nbytes);
+
        blkcipher_walk_init(&walk, dst, src, nbytes);
        return cbc_aes_crypt(desc, sctx->enc, sctx->iv, &walk);
 }
@@ -294,6 +467,9 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
        struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
        struct blkcipher_walk walk;
 
+       if (unlikely(need_fallback(sctx->key_len)))
+               return fallback_blk_dec(desc, dst, src, nbytes);
+
        blkcipher_walk_init(&walk, dst, src, nbytes);
        return cbc_aes_crypt(desc, sctx->dec, sctx->iv, &walk);
 }
@@ -309,6 +485,8 @@ static struct crypto_alg cbc_aes_alg = {
        .cra_type               =       &crypto_blkcipher_type,
        .cra_module             =       THIS_MODULE,
        .cra_list               =       LIST_HEAD_INIT(cbc_aes_alg.cra_list),
+       .cra_init               =       fallback_init_blk,
+       .cra_exit               =       fallback_exit_blk,
        .cra_u                  =       {
                .blkcipher = {
                        .min_keysize            =       AES_MIN_KEY_SIZE,
@@ -336,14 +514,10 @@ static int __init aes_init(void)
                return -EOPNOTSUPP;
 
        /* z9 109 and z9 BC/EC only support 128 bit key length */
-       if (keylen_flag == AES_KEYLEN_128) {
-               aes_alg.cra_u.cipher.cia_max_keysize = AES_MIN_KEY_SIZE;
-               ecb_aes_alg.cra_u.blkcipher.max_keysize = AES_MIN_KEY_SIZE;
-               cbc_aes_alg.cra_u.blkcipher.max_keysize = AES_MIN_KEY_SIZE;
+       if (keylen_flag == AES_KEYLEN_128)
                printk(KERN_INFO
                       "aes_s390: hardware acceleration only available for"
                       "128 bit keys\n");
-       }
 
        ret = crypto_register_alg(&aes_alg);
        if (ret)
@@ -382,4 +556,3 @@ MODULE_ALIAS("aes");
 
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
 MODULE_LICENSE("GPL");
-
index 46bb609e2444a50cd9c5b2370d5febe0df90cbce..3874c2de54036e125a7f4772d93f37fa277e5c11 100644 (file)
@@ -4,12 +4,16 @@
 
 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
+obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
+obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
 
-aes-i586-y := aes-i586-asm_32.o aes_32.o
-twofish-i586-y := twofish-i586-asm_32.o twofish_32.o
+aes-i586-y := aes-i586-asm_32.o aes_glue.o
+twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
+salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
 
-aes-x86_64-y := aes-x86_64-asm_64.o aes_64.o
-twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o
+aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
+twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
+salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
index f942f0c8f6306d19875b069a419743efe010c979..1093bede3e0a75378c6dde1bfa9a9f2fafce4cff 100644 (file)
@@ -46,9 +46,9 @@
 #define in_blk 16
 
 /* offsets in crypto_tfm structure */
-#define ekey (crypto_tfm_ctx_offset + 0)
-#define nrnd (crypto_tfm_ctx_offset + 256)
-#define dkey (crypto_tfm_ctx_offset + 260)
+#define klen (crypto_tfm_ctx_offset + 0)
+#define ekey (crypto_tfm_ctx_offset + 4)
+#define dkey (crypto_tfm_ctx_offset + 244)
 
 // register mapping for encrypt and decrypt subroutines
 
 
 .global  aes_enc_blk
 
-.extern  ft_tab
-.extern  fl_tab
+.extern  crypto_ft_tab
+.extern  crypto_fl_tab
 
 .align 4
 
@@ -236,7 +236,7 @@ aes_enc_blk:
 1:     push    %ebx
        mov     in_blk+4(%esp),%r2
        push    %esi
-       mov     nrnd(%ebp),%r3   // number of rounds
+       mov     klen(%ebp),%r3   // key size
        push    %edi
 #if ekey != 0
        lea     ekey(%ebp),%ebp  // key pointer
@@ -255,26 +255,26 @@ aes_enc_blk:
 
        sub     $8,%esp         // space for register saves on stack
        add     $16,%ebp        // increment to next round key
-       cmp     $12,%r3
+       cmp     $24,%r3
        jb      4f              // 10 rounds for 128-bit key
        lea     32(%ebp),%ebp
        je      3f              // 12 rounds for 192-bit key
        lea     32(%ebp),%ebp
 
-2:     fwd_rnd1( -64(%ebp) ,ft_tab)    // 14 rounds for 256-bit key
-       fwd_rnd2( -48(%ebp) ,ft_tab)
-3:     fwd_rnd1( -32(%ebp) ,ft_tab)    // 12 rounds for 192-bit key
-       fwd_rnd2( -16(%ebp) ,ft_tab)
-4:     fwd_rnd1(    (%ebp) ,ft_tab)    // 10 rounds for 128-bit key
-       fwd_rnd2( +16(%ebp) ,ft_tab)
-       fwd_rnd1( +32(%ebp) ,ft_tab)
-       fwd_rnd2( +48(%ebp) ,ft_tab)
-       fwd_rnd1( +64(%ebp) ,ft_tab)
-       fwd_rnd2( +80(%ebp) ,ft_tab)
-       fwd_rnd1( +96(%ebp) ,ft_tab)
-       fwd_rnd2(+112(%ebp) ,ft_tab)
-       fwd_rnd1(+128(%ebp) ,ft_tab)
-       fwd_rnd2(+144(%ebp) ,fl_tab)    // last round uses a different table
+2:     fwd_rnd1( -64(%ebp), crypto_ft_tab)     // 14 rounds for 256-bit key
+       fwd_rnd2( -48(%ebp), crypto_ft_tab)
+3:     fwd_rnd1( -32(%ebp), crypto_ft_tab)     // 12 rounds for 192-bit key
+       fwd_rnd2( -16(%ebp), crypto_ft_tab)
+4:     fwd_rnd1(    (%ebp), crypto_ft_tab)     // 10 rounds for 128-bit key
+       fwd_rnd2( +16(%ebp), crypto_ft_tab)
+       fwd_rnd1( +32(%ebp), crypto_ft_tab)
+       fwd_rnd2( +48(%ebp), crypto_ft_tab)
+       fwd_rnd1( +64(%ebp), crypto_ft_tab)
+       fwd_rnd2( +80(%ebp), crypto_ft_tab)
+       fwd_rnd1( +96(%ebp), crypto_ft_tab)
+       fwd_rnd2(+112(%ebp), crypto_ft_tab)
+       fwd_rnd1(+128(%ebp), crypto_ft_tab)
+       fwd_rnd2(+144(%ebp), crypto_fl_tab)     // last round uses a different table
 
 // move final values to the output array.  CAUTION: the 
 // order of these assigns rely on the register mappings
@@ -297,8 +297,8 @@ aes_enc_blk:
 
 .global  aes_dec_blk
 
-.extern  it_tab
-.extern  il_tab
+.extern  crypto_it_tab
+.extern  crypto_il_tab
 
 .align 4
 
@@ -312,14 +312,11 @@ aes_dec_blk:
 1:     push    %ebx
        mov     in_blk+4(%esp),%r2
        push    %esi
-       mov     nrnd(%ebp),%r3   // number of rounds
+       mov     klen(%ebp),%r3   // key size
        push    %edi
 #if dkey != 0
        lea     dkey(%ebp),%ebp  // key pointer
 #endif
-       mov     %r3,%r0
-       shl     $4,%r0
-       add     %r0,%ebp
        
 // input four columns and xor in first round key
 
@@ -333,27 +330,27 @@ aes_dec_blk:
        xor     12(%ebp),%r5
 
        sub     $8,%esp         // space for register saves on stack
-       sub     $16,%ebp        // increment to next round key
-       cmp     $12,%r3
+       add     $16,%ebp        // increment to next round key
+       cmp     $24,%r3
        jb      4f              // 10 rounds for 128-bit key
-       lea     -32(%ebp),%ebp
+       lea     32(%ebp),%ebp
        je      3f              // 12 rounds for 192-bit key
-       lea     -32(%ebp),%ebp
-
-2:     inv_rnd1( +64(%ebp), it_tab)    // 14 rounds for 256-bit key
-       inv_rnd2( +48(%ebp), it_tab)
-3:     inv_rnd1( +32(%ebp), it_tab)    // 12 rounds for 192-bit key
-       inv_rnd2( +16(%ebp), it_tab)
-4:     inv_rnd1(    (%ebp), it_tab)    // 10 rounds for 128-bit key
-       inv_rnd2( -16(%ebp), it_tab)
-       inv_rnd1( -32(%ebp), it_tab)
-       inv_rnd2( -48(%ebp), it_tab)
-       inv_rnd1( -64(%ebp), it_tab)
-       inv_rnd2( -80(%ebp), it_tab)
-       inv_rnd1( -96(%ebp), it_tab)
-       inv_rnd2(-112(%ebp), it_tab)
-       inv_rnd1(-128(%ebp), it_tab)
-       inv_rnd2(-144(%ebp), il_tab)    // last round uses a different table
+       lea     32(%ebp),%ebp
+
+2:     inv_rnd1( -64(%ebp), crypto_it_tab)     // 14 rounds for 256-bit key
+       inv_rnd2( -48(%ebp), crypto_it_tab)
+3:     inv_rnd1( -32(%ebp), crypto_it_tab)     // 12 rounds for 192-bit key
+       inv_rnd2( -16(%ebp), crypto_it_tab)
+4:     inv_rnd1(    (%ebp), crypto_it_tab)     // 10 rounds for 128-bit key
+       inv_rnd2( +16(%ebp), crypto_it_tab)
+       inv_rnd1( +32(%ebp), crypto_it_tab)
+       inv_rnd2( +48(%ebp), crypto_it_tab)
+       inv_rnd1( +64(%ebp), crypto_it_tab)
+       inv_rnd2( +80(%ebp), crypto_it_tab)
+       inv_rnd1( +96(%ebp), crypto_it_tab)
+       inv_rnd2(+112(%ebp), crypto_it_tab)
+       inv_rnd1(+128(%ebp), crypto_it_tab)
+       inv_rnd2(+144(%ebp), crypto_il_tab)     // last round uses a different table
 
 // move final values to the output array.  CAUTION: the 
 // order of these assigns rely on the register mappings
index 26b40de4d0b0299939591aead1d6652e296f60f7..a120f526c3df9d9c547b996867f1b035b44b006b 100644 (file)
@@ -8,10 +8,10 @@
  * including this sentence is retained in full.
  */
 
-.extern aes_ft_tab
-.extern aes_it_tab
-.extern aes_fl_tab
-.extern aes_il_tab
+.extern crypto_ft_tab
+.extern crypto_it_tab
+.extern crypto_fl_tab
+.extern crypto_il_tab
 
 .text
 
        .align  8;                      \
 FUNC:  movq    r1,r2;                  \
        movq    r3,r4;                  \
-       leaq    BASE+KEY+52(r8),r9;     \
+       leaq    BASE+KEY+48+4(r8),r9;   \
        movq    r10,r11;                \
        movl    (r7),r5 ## E;           \
        movl    4(r7),r1 ## E;          \
        movl    8(r7),r6 ## E;          \
        movl    12(r7),r7 ## E;         \
-       movl    BASE(r8),r10 ## E;      \
+       movl    BASE+0(r8),r10 ## E;    \
        xorl    -48(r9),r5 ## E;        \
        xorl    -44(r9),r1 ## E;        \
        xorl    -40(r9),r6 ## E;        \
@@ -154,37 +154,37 @@ FUNC:     movq    r1,r2;                  \
 /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
 
        entry(aes_enc_blk,0,enc128,enc192)
-       encrypt_round(aes_ft_tab,-96)
-       encrypt_round(aes_ft_tab,-80)
-enc192:        encrypt_round(aes_ft_tab,-64)
-       encrypt_round(aes_ft_tab,-48)
-enc128:        encrypt_round(aes_ft_tab,-32)
-       encrypt_round(aes_ft_tab,-16)
-       encrypt_round(aes_ft_tab,  0)
-       encrypt_round(aes_ft_tab, 16)
-       encrypt_round(aes_ft_tab, 32)
-       encrypt_round(aes_ft_tab, 48)
-       encrypt_round(aes_ft_tab, 64)
-       encrypt_round(aes_ft_tab, 80)
-       encrypt_round(aes_ft_tab, 96)
-       encrypt_final(aes_fl_tab,112)
+       encrypt_round(crypto_ft_tab,-96)
+       encrypt_round(crypto_ft_tab,-80)
+enc192:        encrypt_round(crypto_ft_tab,-64)
+       encrypt_round(crypto_ft_tab,-48)
+enc128:        encrypt_round(crypto_ft_tab,-32)
+       encrypt_round(crypto_ft_tab,-16)
+       encrypt_round(crypto_ft_tab,  0)
+       encrypt_round(crypto_ft_tab, 16)
+       encrypt_round(crypto_ft_tab, 32)
+       encrypt_round(crypto_ft_tab, 48)
+       encrypt_round(crypto_ft_tab, 64)
+       encrypt_round(crypto_ft_tab, 80)
+       encrypt_round(crypto_ft_tab, 96)
+       encrypt_final(crypto_fl_tab,112)
        return
 
 /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
 
        entry(aes_dec_blk,240,dec128,dec192)
-       decrypt_round(aes_it_tab,-96)
-       decrypt_round(aes_it_tab,-80)
-dec192:        decrypt_round(aes_it_tab,-64)
-       decrypt_round(aes_it_tab,-48)
-dec128:        decrypt_round(aes_it_tab,-32)
-       decrypt_round(aes_it_tab,-16)
-       decrypt_round(aes_it_tab,  0)
-       decrypt_round(aes_it_tab, 16)
-       decrypt_round(aes_it_tab, 32)
-       decrypt_round(aes_it_tab, 48)
-       decrypt_round(aes_it_tab, 64)
-       decrypt_round(aes_it_tab, 80)
-       decrypt_round(aes_it_tab, 96)
-       decrypt_final(aes_il_tab,112)
+       decrypt_round(crypto_it_tab,-96)
+       decrypt_round(crypto_it_tab,-80)
+dec192:        decrypt_round(crypto_it_tab,-64)
+       decrypt_round(crypto_it_tab,-48)
+dec128:        decrypt_round(crypto_it_tab,-32)
+       decrypt_round(crypto_it_tab,-16)
+       decrypt_round(crypto_it_tab,  0)
+       decrypt_round(crypto_it_tab, 16)
+       decrypt_round(crypto_it_tab, 32)
+       decrypt_round(crypto_it_tab, 48)
+       decrypt_round(crypto_it_tab, 64)
+       decrypt_round(crypto_it_tab, 80)
+       decrypt_round(crypto_it_tab, 96)
+       decrypt_final(crypto_il_tab,112)
        return
diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c
deleted file mode 100644 (file)
index 49aad93..0000000
+++ /dev/null
@@ -1,515 +0,0 @@
-/* 
- * 
- * Glue Code for optimized 586 assembler version of AES
- *
- * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
- * All rights reserved.
- *
- * LICENSE TERMS
- *
- * The free distribution and use of this software in both source and binary
- * form is allowed (with or without changes) provided that:
- *
- *   1. distributions of this source code include the above copyright
- *      notice, this list of conditions and the following disclaimer;
- *
- *   2. distributions in binary form include the above copyright
- *      notice, this list of conditions and the following disclaimer
- *      in the documentation and/or other associated materials;
- *
- *   3. the copyright holder's name is not used to endorse products
- *      built using this software without specific written permission.
- *
- * ALTERNATIVELY, provided that this notice is retained in full, this product
- * may be distributed under the terms of the GNU General Public License (GPL),
- * in which case the provisions of the GPL apply INSTEAD OF those given above.
- *
- * DISCLAIMER
- *
- * This software is provided 'as is' with no explicit or implied warranties
- * in respect of its properties, including, but not limited to, correctness
- * and/or fitness for purpose.
- *
- * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
- * 2.5 API).
- * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
- * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
- *
- */
-
-#include <asm/byteorder.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/linkage.h>
-
-asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-#define AES_MIN_KEY_SIZE       16
-#define AES_MAX_KEY_SIZE       32
-#define AES_BLOCK_SIZE         16
-#define AES_KS_LENGTH          4 * AES_BLOCK_SIZE
-#define RC_LENGTH              29
-
-struct aes_ctx {
-       u32 ekey[AES_KS_LENGTH];
-       u32 rounds;
-       u32 dkey[AES_KS_LENGTH];
-};
-
-#define WPOLY 0x011b
-#define bytes2word(b0, b1, b2, b3)  \
-       (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
-
-/* define the finite field multiplies required for Rijndael */
-#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
-#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
-#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
-#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
-#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
-#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
-#define fi(x) ((x) ?   pow[255 - log[x]]: 0)
-
-static inline u32 upr(u32 x, int n)
-{
-       return (x << 8 * n) | (x >> (32 - 8 * n));
-}
-
-static inline u8 bval(u32 x, int n)
-{
-       return x >> 8 * n;
-}
-
-/* The forward and inverse affine transformations used in the S-box */
-#define fwd_affine(x) \
-       (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
-
-#define inv_affine(x) \
-       (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
-
-static u32 rcon_tab[RC_LENGTH];
-
-u32 ft_tab[4][256];
-u32 fl_tab[4][256];
-static u32 im_tab[4][256];
-u32 il_tab[4][256];
-u32 it_tab[4][256];
-
-static void gen_tabs(void)
-{
-       u32 i, w;
-       u8 pow[512], log[256];
-
-       /*
-        * log and power tables for GF(2^8) finite field with
-        * WPOLY as modular polynomial - the simplest primitive
-        * root is 0x03, used here to generate the tables.
-        */
-       i = 0; w = 1; 
-       
-       do {
-               pow[i] = (u8)w;
-               pow[i + 255] = (u8)w;
-               log[w] = (u8)i++;
-               w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
-       } while (w != 1);
-       
-       for(i = 0, w = 1; i < RC_LENGTH; ++i) {
-               rcon_tab[i] = bytes2word(w, 0, 0, 0);
-               w = f2(w);
-       }
-
-       for(i = 0; i < 256; ++i) {
-               u8 b;
-               
-               b = fwd_affine(fi((u8)i));
-               w = bytes2word(f2(b), b, b, f3(b));
-
-               /* tables for a normal encryption round */
-               ft_tab[0][i] = w;
-               ft_tab[1][i] = upr(w, 1);
-               ft_tab[2][i] = upr(w, 2);
-               ft_tab[3][i] = upr(w, 3);
-               w = bytes2word(b, 0, 0, 0);
-               
-               /*
-                * tables for last encryption round
-                * (may also be used in the key schedule)
-                */
-               fl_tab[0][i] = w;
-               fl_tab[1][i] = upr(w, 1);
-               fl_tab[2][i] = upr(w, 2);
-               fl_tab[3][i] = upr(w, 3);
-               
-               b = fi(inv_affine((u8)i));
-               w = bytes2word(fe(b), f9(b), fd(b), fb(b));
-
-               /* tables for the inverse mix column operation  */
-               im_tab[0][b] = w;
-               im_tab[1][b] = upr(w, 1);
-               im_tab[2][b] = upr(w, 2);
-               im_tab[3][b] = upr(w, 3);
-
-               /* tables for a normal decryption round */
-               it_tab[0][i] = w;
-               it_tab[1][i] = upr(w,1);
-               it_tab[2][i] = upr(w,2);
-               it_tab[3][i] = upr(w,3);
-
-               w = bytes2word(b, 0, 0, 0);
-               
-               /* tables for last decryption round */
-               il_tab[0][i] = w;
-               il_tab[1][i] = upr(w,1);
-               il_tab[2][i] = upr(w,2);
-               il_tab[3][i] = upr(w,3);
-    }
-}
-
-#define four_tables(x,tab,vf,rf,c)             \
-(      tab[0][bval(vf(x,0,c),rf(0,c))] ^       \
-       tab[1][bval(vf(x,1,c),rf(1,c))] ^       \
-       tab[2][bval(vf(x,2,c),rf(2,c))] ^       \
-       tab[3][bval(vf(x,3,c),rf(3,c))]         \
-)
-
-#define vf1(x,r,c)  (x)
-#define rf1(r,c)    (r)
-#define rf2(r,c)    ((r-c)&3)
-
-#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
-#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
-
-#define ff(x) inv_mcol(x)
-
-#define ke4(k,i)                                                       \
-{                                                                      \
-       k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];            \
-       k[4*(i)+5] = ss[1] ^= ss[0];                                    \
-       k[4*(i)+6] = ss[2] ^= ss[1];                                    \
-       k[4*(i)+7] = ss[3] ^= ss[2];                                    \
-}
-
-#define kel4(k,i)                                                      \
-{                                                                      \
-       k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];            \
-       k[4*(i)+5] = ss[1] ^= ss[0];                                    \
-       k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2];       \
-}
-
-#define ke6(k,i)                                                       \
-{                                                                      \
-       k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];           \
-       k[6*(i)+ 7] = ss[1] ^= ss[0];                                   \
-       k[6*(i)+ 8] = ss[2] ^= ss[1];                                   \
-       k[6*(i)+ 9] = ss[3] ^= ss[2];                                   \
-       k[6*(i)+10] = ss[4] ^= ss[3];                                   \
-       k[6*(i)+11] = ss[5] ^= ss[4];                                   \
-}
-
-#define kel6(k,i)                                                      \
-{                                                                      \
-       k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];           \
-       k[6*(i)+ 7] = ss[1] ^= ss[0];                                   \
-       k[6*(i)+ 8] = ss[2] ^= ss[1];                                   \
-       k[6*(i)+ 9] = ss[3] ^= ss[2];                                   \
-}
-
-#define ke8(k,i)                                                       \
-{                                                                      \
-       k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];           \
-       k[8*(i)+ 9] = ss[1] ^= ss[0];                                   \
-       k[8*(i)+10] = ss[2] ^= ss[1];                                   \
-       k[8*(i)+11] = ss[3] ^= ss[2];                                   \
-       k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0);                         \
-       k[8*(i)+13] = ss[5] ^= ss[4];                                   \
-       k[8*(i)+14] = ss[6] ^= ss[5];                                   \
-       k[8*(i)+15] = ss[7] ^= ss[6];                                   \
-}
-
-#define kel8(k,i)                                                      \
-{                                                                      \
-       k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];           \
-       k[8*(i)+ 9] = ss[1] ^= ss[0];                                   \
-       k[8*(i)+10] = ss[2] ^= ss[1];                                   \
-       k[8*(i)+11] = ss[3] ^= ss[2];                                   \
-}
-
-#define kdf4(k,i)                                                      \
-{                                                                      \
-       ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3];                          \
-       ss[1] = ss[1] ^ ss[3];                                          \
-       ss[2] = ss[2] ^ ss[3];                                          \
-       ss[3] = ss[3];                                                  \
-       ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
-       ss[i % 4] ^= ss[4];                                             \
-       ss[4] ^= k[4*(i)];                                              \
-       k[4*(i)+4] = ff(ss[4]);                                         \
-       ss[4] ^= k[4*(i)+1];                                            \
-       k[4*(i)+5] = ff(ss[4]);                                         \
-       ss[4] ^= k[4*(i)+2];                                            \
-       k[4*(i)+6] = ff(ss[4]);                                         \
-       ss[4] ^= k[4*(i)+3];                                            \
-       k[4*(i)+7] = ff(ss[4]);                                         \
-}
-
-#define kd4(k,i)                                                       \
-{                                                                      \
-       ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
-       ss[i % 4] ^= ss[4];                                             \
-       ss[4] = ff(ss[4]);                                              \
-       k[4*(i)+4] = ss[4] ^= k[4*(i)];                                 \
-       k[4*(i)+5] = ss[4] ^= k[4*(i)+1];                               \
-       k[4*(i)+6] = ss[4] ^= k[4*(i)+2];                               \
-       k[4*(i)+7] = ss[4] ^= k[4*(i)+3];                               \
-}
-
-#define kdl4(k,i)                                                      \
-{                                                                      \
-       ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
-       ss[i % 4] ^= ss[4];                                             \
-       k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3];                  \
-       k[4*(i)+5] = ss[1] ^ ss[3];                                     \
-       k[4*(i)+6] = ss[0];                                             \
-       k[4*(i)+7] = ss[1];                                             \
-}
-
-#define kdf6(k,i)                                                      \
-{                                                                      \
-       ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];                         \
-       k[6*(i)+ 6] = ff(ss[0]);                                        \
-       ss[1] ^= ss[0];                                                 \
-       k[6*(i)+ 7] = ff(ss[1]);                                        \
-       ss[2] ^= ss[1];                                                 \
-       k[6*(i)+ 8] = ff(ss[2]);                                        \
-       ss[3] ^= ss[2];                                                 \
-       k[6*(i)+ 9] = ff(ss[3]);                                        \
-       ss[4] ^= ss[3];                                                 \
-       k[6*(i)+10] = ff(ss[4]);                                        \
-       ss[5] ^= ss[4];                                                 \
-       k[6*(i)+11] = ff(ss[5]);                                        \
-}
-
-#define kd6(k,i)                                                       \
-{                                                                      \
-       ss[6] = ls_box(ss[5],3) ^ rcon_tab[i];                          \
-       ss[0] ^= ss[6]; ss[6] = ff(ss[6]);                              \
-       k[6*(i)+ 6] = ss[6] ^= k[6*(i)];                                \
-       ss[1] ^= ss[0];                                                 \
-       k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1];                             \
-       ss[2] ^= ss[1];                                                 \
-       k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2];                             \
-       ss[3] ^= ss[2];                                                 \
-       k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3];                             \
-       ss[4] ^= ss[3];                                                 \
-       k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4];                             \
-       ss[5] ^= ss[4];                                                 \
-       k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5];                             \
-}
-
-#define kdl6(k,i)                                                      \
-{                                                                      \
-       ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];                         \
-       k[6*(i)+ 6] = ss[0];                                            \
-       ss[1] ^= ss[0];                                                 \
-       k[6*(i)+ 7] = ss[1];                                            \
-       ss[2] ^= ss[1];                                                 \
-       k[6*(i)+ 8] = ss[2];                                            \
-       ss[3] ^= ss[2];                                                 \
-       k[6*(i)+ 9] = ss[3];                                            \
-}
-
-#define kdf8(k,i)                                                      \
-{                                                                      \
-       ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];                         \
-       k[8*(i)+ 8] = ff(ss[0]);                                        \
-       ss[1] ^= ss[0];                                                 \
-       k[8*(i)+ 9] = ff(ss[1]);                                        \
-       ss[2] ^= ss[1];                                                 \
-       k[8*(i)+10] = ff(ss[2]);                                        \
-       ss[3] ^= ss[2];                                                 \
-       k[8*(i)+11] = ff(ss[3]);                                        \
-       ss[4] ^= ls_box(ss[3],0);                                       \
-       k[8*(i)+12] = ff(ss[4]);                                        \
-       ss[5] ^= ss[4];                                                 \
-       k[8*(i)+13] = ff(ss[5]);                                        \
-       ss[6] ^= ss[5];                                                 \
-       k[8*(i)+14] = ff(ss[6]);                                        \
-       ss[7] ^= ss[6];                                                 \
-       k[8*(i)+15] = ff(ss[7]);                                        \
-}
-
-#define kd8(k,i)                                                       \
-{                                                                      \
-       u32 __g = ls_box(ss[7],3) ^ rcon_tab[i];                        \
-       ss[0] ^= __g;                                                   \
-       __g = ff(__g);                                                  \
-       k[8*(i)+ 8] = __g ^= k[8*(i)];                                  \
-       ss[1] ^= ss[0];                                                 \
-       k[8*(i)+ 9] = __g ^= k[8*(i)+ 1];                               \
-       ss[2] ^= ss[1];                                                 \
-       k[8*(i)+10] = __g ^= k[8*(i)+ 2];                               \
-       ss[3] ^= ss[2];                                                 \
-       k[8*(i)+11] = __g ^= k[8*(i)+ 3];                               \
-       __g = ls_box(ss[3],0);                                          \
-       ss[4] ^= __g;                                                   \
-       __g = ff(__g);                                                  \
-       k[8*(i)+12] = __g ^= k[8*(i)+ 4];                               \
-       ss[5] ^= ss[4];                                                 \
-       k[8*(i)+13] = __g ^= k[8*(i)+ 5];                               \
-       ss[6] ^= ss[5];                                                 \
-       k[8*(i)+14] = __g ^= k[8*(i)+ 6];                               \
-       ss[7] ^= ss[6];                                                 \
-       k[8*(i)+15] = __g ^= k[8*(i)+ 7];                               \
-}
-
-#define kdl8(k,i)                                                      \
-{                                                                      \
-       ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];                         \
-       k[8*(i)+ 8] = ss[0];                                            \
-       ss[1] ^= ss[0];                                                 \
-       k[8*(i)+ 9] = ss[1];                                            \
-       ss[2] ^= ss[1];                                                 \
-       k[8*(i)+10] = ss[2];                                            \
-       ss[3] ^= ss[2];                                                 \
-       k[8*(i)+11] = ss[3];                                            \
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-                      unsigned int key_len)
-{
-       int i;
-       u32 ss[8];
-       struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
-       const __le32 *key = (const __le32 *)in_key;
-       u32 *flags = &tfm->crt_flags;
-
-       /* encryption schedule */
-       
-       ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]);
-       ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]);
-       ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]);
-       ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]);
-
-       switch(key_len) {
-       case 16:
-               for (i = 0; i < 9; i++)
-                       ke4(ctx->ekey, i);
-               kel4(ctx->ekey, 9);
-               ctx->rounds = 10;
-               break;
-               
-       case 24:
-               ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
-               ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
-               for (i = 0; i < 7; i++)
-                       ke6(ctx->ekey, i);
-               kel6(ctx->ekey, 7); 
-               ctx->rounds = 12;
-               break;
-
-       case 32:
-               ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
-               ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
-               ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]);
-               ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]);
-               for (i = 0; i < 6; i++)
-                       ke8(ctx->ekey, i);
-               kel8(ctx->ekey, 6);
-               ctx->rounds = 14;
-               break;
-
-       default:
-               *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-               return -EINVAL;
-       }
-       
-       /* decryption schedule */
-       
-       ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]);
-       ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]);
-       ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]);
-       ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]);
-
-       switch (key_len) {
-       case 16:
-               kdf4(ctx->dkey, 0);
-               for (i = 1; i < 9; i++)
-                       kd4(ctx->dkey, i);
-               kdl4(ctx->dkey, 9);
-               break;
-               
-       case 24:
-               ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
-               ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
-               kdf6(ctx->dkey, 0);
-               for (i = 1; i < 7; i++)
-                       kd6(ctx->dkey, i);
-               kdl6(ctx->dkey, 7);
-               break;
-
-       case 32:
-               ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
-               ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
-               ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6]));
-               ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7]));
-               kdf8(ctx->dkey, 0);
-               for (i = 1; i < 6; i++)
-                       kd8(ctx->dkey, i);
-               kdl8(ctx->dkey, 6);
-               break;
-       }
-       return 0;
-}
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       aes_enc_blk(tfm, dst, src);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       aes_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg aes_alg = {
-       .cra_name               =       "aes",
-       .cra_driver_name        =       "aes-i586",
-       .cra_priority           =       200,
-       .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
-       .cra_blocksize          =       AES_BLOCK_SIZE,
-       .cra_ctxsize            =       sizeof(struct aes_ctx),
-       .cra_module             =       THIS_MODULE,
-       .cra_list               =       LIST_HEAD_INIT(aes_alg.cra_list),
-       .cra_u                  =       {
-               .cipher = {
-                       .cia_min_keysize        =       AES_MIN_KEY_SIZE,
-                       .cia_max_keysize        =       AES_MAX_KEY_SIZE,
-                       .cia_setkey             =       aes_set_key,
-                       .cia_encrypt            =       aes_encrypt,
-                       .cia_decrypt            =       aes_decrypt
-               }
-       }
-};
-
-static int __init aes_init(void)
-{
-       gen_tabs();
-       return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
-       crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
-MODULE_ALIAS("aes");
diff --git a/arch/x86/crypto/aes_64.c b/arch/x86/crypto/aes_64.c
deleted file mode 100644 (file)
index 5cdb13e..0000000
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * Cryptographic API.
- *
- * AES Cipher Algorithm.
- *
- * Based on Brian Gladman's code.
- *
- * Linux developers:
- *  Alexander Kjeldaas <astor@fast.no>
- *  Herbert Valerio Riedel <hvr@hvrlab.org>
- *  Kyle McMartin <kyle@debian.org>
- *  Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API).
- *  Andreas Steinmetz <ast@domdv.de> (adapted to x86_64 assembler)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * ---------------------------------------------------------------------------
- * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
- * All rights reserved.
- *
- * LICENSE TERMS
- *
- * The free distribution and use of this software in both source and binary
- * form is allowed (with or without changes) provided that:
- *
- *   1. distributions of this source code include the above copyright
- *      notice, this list of conditions and the following disclaimer;
- *
- *   2. distributions in binary form include the above copyright
- *      notice, this list of conditions and the following disclaimer
- *      in the documentation and/or other associated materials;
- *
- *   3. the copyright holder's name is not used to endorse products
- *      built using this software without specific written permission.
- *
- * ALTERNATIVELY, provided that this notice is retained in full, this product
- * may be distributed under the terms of the GNU General Public License (GPL),
- * in which case the provisions of the GPL apply INSTEAD OF those given above.
- *
- * DISCLAIMER
- *
- * This software is provided 'as is' with no explicit or implied warranties
- * in respect of its properties, including, but not limited to, correctness
- * and/or fitness for purpose.
- * ---------------------------------------------------------------------------
- */
-
-/* Some changes from the Gladman version:
-    s/RIJNDAEL(e_key)/E_KEY/g
-    s/RIJNDAEL(d_key)/D_KEY/g
-*/
-
-#include <asm/byteorder.h>
-#include <linux/bitops.h>
-#include <linux/crypto.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-#define AES_MIN_KEY_SIZE       16
-#define AES_MAX_KEY_SIZE       32
-
-#define AES_BLOCK_SIZE         16
-
-/*
- * #define byte(x, nr) ((unsigned char)((x) >> (nr*8)))
- */
-static inline u8 byte(const u32 x, const unsigned n)
-{
-       return x >> (n << 3);
-}
-
-struct aes_ctx
-{
-       u32 key_length;
-       u32 buf[120];
-};
-
-#define E_KEY (&ctx->buf[0])
-#define D_KEY (&ctx->buf[60])
-
-static u8 pow_tab[256] __initdata;
-static u8 log_tab[256] __initdata;
-static u8 sbx_tab[256] __initdata;
-static u8 isb_tab[256] __initdata;
-static u32 rco_tab[10];
-u32 aes_ft_tab[4][256];
-u32 aes_it_tab[4][256];
-
-u32 aes_fl_tab[4][256];
-u32 aes_il_tab[4][256];
-
-static inline u8 f_mult(u8 a, u8 b)
-{
-       u8 aa = log_tab[a], cc = aa + log_tab[b];
-
-       return pow_tab[cc + (cc < aa ? 1 : 0)];
-}
-
-#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0)
-
-#define ls_box(x)                              \
-       (aes_fl_tab[0][byte(x, 0)] ^            \
-        aes_fl_tab[1][byte(x, 1)] ^            \
-        aes_fl_tab[2][byte(x, 2)] ^            \
-        aes_fl_tab[3][byte(x, 3)])
-
-static void __init gen_tabs(void)
-{
-       u32 i, t;
-       u8 p, q;
-
-       /* log and power tables for GF(2**8) finite field with
-          0x011b as modular polynomial - the simplest primitive
-          root is 0x03, used here to generate the tables */
-
-       for (i = 0, p = 1; i < 256; ++i) {
-               pow_tab[i] = (u8)p;
-               log_tab[p] = (u8)i;
-
-               p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
-       }
-
-       log_tab[1] = 0;
-
-       for (i = 0, p = 1; i < 10; ++i) {
-               rco_tab[i] = p;
-
-               p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
-       }
-
-       for (i = 0; i < 256; ++i) {
-               p = (i ? pow_tab[255 - log_tab[i]] : 0);
-               q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2));
-               p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2));
-               sbx_tab[i] = p;
-               isb_tab[p] = (u8)i;
-       }
-
-       for (i = 0; i < 256; ++i) {
-               p = sbx_tab[i];
-
-               t = p;
-               aes_fl_tab[0][i] = t;
-               aes_fl_tab[1][i] = rol32(t, 8);
-               aes_fl_tab[2][i] = rol32(t, 16);
-               aes_fl_tab[3][i] = rol32(t, 24);
-
-               t = ((u32)ff_mult(2, p)) |
-                   ((u32)p << 8) |
-                   ((u32)p << 16) | ((u32)ff_mult(3, p) << 24);
-
-               aes_ft_tab[0][i] = t;
-               aes_ft_tab[1][i] = rol32(t, 8);
-               aes_ft_tab[2][i] = rol32(t, 16);
-               aes_ft_tab[3][i] = rol32(t, 24);
-
-               p = isb_tab[i];
-
-               t = p;
-               aes_il_tab[0][i] = t;
-               aes_il_tab[1][i] = rol32(t, 8);
-               aes_il_tab[2][i] = rol32(t, 16);
-               aes_il_tab[3][i] = rol32(t, 24);
-
-               t = ((u32)ff_mult(14, p)) |
-                   ((u32)ff_mult(9, p) << 8) |
-                   ((u32)ff_mult(13, p) << 16) |
-                   ((u32)ff_mult(11, p) << 24);
-
-               aes_it_tab[0][i] = t;
-               aes_it_tab[1][i] = rol32(t, 8);
-               aes_it_tab[2][i] = rol32(t, 16);
-               aes_it_tab[3][i] = rol32(t, 24);
-       }
-}
-
-#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
-
-#define imix_col(y, x)                 \
-       u    = star_x(x);               \
-       v    = star_x(u);               \
-       w    = star_x(v);               \
-       t    = w ^ (x);                 \
-       (y)  = u ^ v ^ w;               \
-       (y) ^= ror32(u ^ t,  8) ^       \
-              ror32(v ^ t, 16) ^       \
-              ror32(t, 24)
-
-/* initialise the key schedule from the user supplied key */
-
-#define loop4(i)                                       \
-{                                                      \
-       t = ror32(t,  8); t = ls_box(t) ^ rco_tab[i];   \
-       t ^= E_KEY[4 * i];     E_KEY[4 * i + 4] = t;    \
-       t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t;    \
-       t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t;    \
-       t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t;    \
-}
-
-#define loop6(i)                                       \
-{                                                      \
-       t = ror32(t,  8); t = ls_box(t) ^ rco_tab[i];   \
-       t ^= E_KEY[6 * i];     E_KEY[6 * i + 6] = t;    \
-       t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t;    \
-       t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t;    \
-       t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t;    \
-       t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t;   \
-       t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t;   \
-}
-
-#define loop8(i)                                       \
-{                                                      \
-       t = ror32(t,  8); ; t = ls_box(t) ^ rco_tab[i]; \
-       t ^= E_KEY[8 * i];     E_KEY[8 * i + 8] = t;    \
-       t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t;    \
-       t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t;   \
-       t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t;   \
-       t  = E_KEY[8 * i + 4] ^ ls_box(t);              \
-       E_KEY[8 * i + 12] = t;                          \
-       t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t;   \
-       t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t;   \
-       t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t;   \
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-                      unsigned int key_len)
-{
-       struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
-       const __le32 *key = (const __le32 *)in_key;
-       u32 *flags = &tfm->crt_flags;
-       u32 i, j, t, u, v, w;
-
-       if (key_len % 8) {
-               *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-               return -EINVAL;
-       }
-
-       ctx->key_length = key_len;
-
-       D_KEY[key_len + 24] = E_KEY[0] = le32_to_cpu(key[0]);
-       D_KEY[key_len + 25] = E_KEY[1] = le32_to_cpu(key[1]);
-       D_KEY[key_len + 26] = E_KEY[2] = le32_to_cpu(key[2]);
-       D_KEY[key_len + 27] = E_KEY[3] = le32_to_cpu(key[3]);
-
-       switch (key_len) {
-       case 16:
-               t = E_KEY[3];
-               for (i = 0; i < 10; ++i)
-                       loop4(i);
-               break;
-
-       case 24:
-               E_KEY[4] = le32_to_cpu(key[4]);
-               t = E_KEY[5] = le32_to_cpu(key[5]);
-               for (i = 0; i < 8; ++i)
-                       loop6 (i);
-               break;
-
-       case 32:
-               E_KEY[4] = le32_to_cpu(key[4]);
-               E_KEY[5] = le32_to_cpu(key[5]);
-               E_KEY[6] = le32_to_cpu(key[6]);
-               t = E_KEY[7] = le32_to_cpu(key[7]);
-               for (i = 0; i < 7; ++i)
-                       loop8(i);
-               break;
-       }
-
-       D_KEY[0] = E_KEY[key_len + 24];
-       D_KEY[1] = E_KEY[key_len + 25];
-       D_KEY[2] = E_KEY[key_len + 26];
-       D_KEY[3] = E_KEY[key_len + 27];
-
-       for (i = 4; i < key_len + 24; ++i) {
-               j = key_len + 24 - (i & ~3) + (i & 3);
-               imix_col(D_KEY[j], E_KEY[i]);
-       }
-
-       return 0;
-}
-
-asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       aes_enc_blk(tfm, dst, src);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       aes_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg aes_alg = {
-       .cra_name               =       "aes",
-       .cra_driver_name        =       "aes-x86_64",
-       .cra_priority           =       200,
-       .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
-       .cra_blocksize          =       AES_BLOCK_SIZE,
-       .cra_ctxsize            =       sizeof(struct aes_ctx),
-       .cra_module             =       THIS_MODULE,
-       .cra_list               =       LIST_HEAD_INIT(aes_alg.cra_list),
-       .cra_u                  =       {
-               .cipher = {
-                       .cia_min_keysize        =       AES_MIN_KEY_SIZE,
-                       .cia_max_keysize        =       AES_MAX_KEY_SIZE,
-                       .cia_setkey             =       aes_set_key,
-                       .cia_encrypt            =       aes_encrypt,
-                       .cia_decrypt            =       aes_decrypt
-               }
-       }
-};
-
-static int __init aes_init(void)
-{
-       gen_tabs();
-       return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
-       crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("aes");
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
new file mode 100644 (file)
index 0000000..71f4578
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Glue Code for the asm optimized version of the AES Cipher Algorithm
+ *
+ */
+
+#include <crypto/aes.h>
+
+asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+       aes_enc_blk(tfm, dst, src);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+       aes_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg aes_alg = {
+       .cra_name               = "aes",
+       .cra_driver_name        = "aes-asm",
+       .cra_priority           = 200,
+       .cra_flags              = CRYPTO_ALG_TYPE_CIPHER,
+       .cra_blocksize          = AES_BLOCK_SIZE,
+       .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
+       .cra_module             = THIS_MODULE,
+       .cra_list               = LIST_HEAD_INIT(aes_alg.cra_list),
+       .cra_u  = {
+               .cipher = {
+                       .cia_min_keysize        = AES_MIN_KEY_SIZE,
+                       .cia_max_keysize        = AES_MAX_KEY_SIZE,
+                       .cia_setkey             = crypto_aes_set_key,
+                       .cia_encrypt            = aes_encrypt,
+                       .cia_decrypt            = aes_decrypt
+               }
+       }
+};
+
+static int __init aes_init(void)
+{
+       return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+       crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("aes");
+MODULE_ALIAS("aes-asm");
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
new file mode 100644 (file)
index 0000000..72eb306
--- /dev/null
@@ -0,0 +1,1114 @@
+# salsa20_pm.s version 20051229
+# D. J. Bernstein
+# Public domain.
+
+# enter ECRYPT_encrypt_bytes
+.text
+.p2align 5
+.globl ECRYPT_encrypt_bytes
+ECRYPT_encrypt_bytes:
+       mov     %esp,%eax
+       and     $31,%eax
+       add     $256,%eax
+       sub     %eax,%esp
+       # eax_stack = eax
+       movl    %eax,80(%esp)
+       # ebx_stack = ebx
+       movl    %ebx,84(%esp)
+       # esi_stack = esi
+       movl    %esi,88(%esp)
+       # edi_stack = edi
+       movl    %edi,92(%esp)
+       # ebp_stack = ebp
+       movl    %ebp,96(%esp)
+       # x = arg1
+       movl    4(%esp,%eax),%edx
+       # m = arg2
+       movl    8(%esp,%eax),%esi
+       # out = arg3
+       movl    12(%esp,%eax),%edi
+       # bytes = arg4
+       movl    16(%esp,%eax),%ebx
+       # bytes -= 0
+       sub     $0,%ebx
+       # goto done if unsigned<=
+       jbe     ._done
+._start:
+       # in0 = *(uint32 *) (x + 0)
+       movl    0(%edx),%eax
+       # in1 = *(uint32 *) (x + 4)
+       movl    4(%edx),%ecx
+       # in2 = *(uint32 *) (x + 8)
+       movl    8(%edx),%ebp
+       # j0 = in0
+       movl    %eax,164(%esp)
+       # in3 = *(uint32 *) (x + 12)
+       movl    12(%edx),%eax
+       # j1 = in1
+       movl    %ecx,168(%esp)
+       # in4 = *(uint32 *) (x + 16)
+       movl    16(%edx),%ecx
+       # j2 = in2
+       movl    %ebp,172(%esp)
+       # in5 = *(uint32 *) (x + 20)
+       movl    20(%edx),%ebp
+       # j3 = in3
+       movl    %eax,176(%esp)
+       # in6 = *(uint32 *) (x + 24)
+       movl    24(%edx),%eax
+       # j4 = in4
+       movl    %ecx,180(%esp)
+       # in7 = *(uint32 *) (x + 28)
+       movl    28(%edx),%ecx
+       # j5 = in5
+       movl    %ebp,184(%esp)
+       # in8 = *(uint32 *) (x + 32)
+       movl    32(%edx),%ebp
+       # j6 = in6
+       movl    %eax,188(%esp)
+       # in9 = *(uint32 *) (x + 36)
+       movl    36(%edx),%eax
+       # j7 = in7
+       movl    %ecx,192(%esp)
+       # in10 = *(uint32 *) (x + 40)
+       movl    40(%edx),%ecx
+       # j8 = in8
+       movl    %ebp,196(%esp)
+       # in11 = *(uint32 *) (x + 44)
+       movl    44(%edx),%ebp
+       # j9 = in9
+       movl    %eax,200(%esp)
+       # in12 = *(uint32 *) (x + 48)
+       movl    48(%edx),%eax
+       # j10 = in10
+       movl    %ecx,204(%esp)
+       # in13 = *(uint32 *) (x + 52)
+       movl    52(%edx),%ecx
+       # j11 = in11
+       movl    %ebp,208(%esp)
+       # in14 = *(uint32 *) (x + 56)
+       movl    56(%edx),%ebp
+       # j12 = in12
+       movl    %eax,212(%esp)
+       # in15 = *(uint32 *) (x + 60)
+       movl    60(%edx),%eax
+       # j13 = in13
+       movl    %ecx,216(%esp)
+       # j14 = in14
+       movl    %ebp,220(%esp)
+       # j15 = in15
+       movl    %eax,224(%esp)
+       # x_backup = x
+       movl    %edx,64(%esp)
+._bytesatleast1:
+       #   bytes - 64
+       cmp     $64,%ebx
+       #   goto nocopy if unsigned>=
+       jae     ._nocopy
+       #     ctarget = out
+       movl    %edi,228(%esp)
+       #     out = &tmp
+       leal    0(%esp),%edi
+       #     i = bytes
+       mov     %ebx,%ecx
+       #     while (i) { *out++ = *m++; --i }
+       rep     movsb
+       #     out = &tmp
+       leal    0(%esp),%edi
+       #     m = &tmp
+       leal    0(%esp),%esi
+._nocopy:
+       #   out_backup = out
+       movl    %edi,72(%esp)
+       #   m_backup = m
+       movl    %esi,68(%esp)
+       #   bytes_backup = bytes
+       movl    %ebx,76(%esp)
+       #   in0 = j0
+       movl    164(%esp),%eax
+       #   in1 = j1
+       movl    168(%esp),%ecx
+       #   in2 = j2
+       movl    172(%esp),%edx
+       #   in3 = j3
+       movl    176(%esp),%ebx
+       #   x0 = in0
+       movl    %eax,100(%esp)
+       #   x1 = in1
+       movl    %ecx,104(%esp)
+       #   x2 = in2
+       movl    %edx,108(%esp)
+       #   x3 = in3
+       movl    %ebx,112(%esp)
+       #   in4 = j4
+       movl    180(%esp),%eax
+       #   in5 = j5
+       movl    184(%esp),%ecx
+       #   in6 = j6
+       movl    188(%esp),%edx
+       #   in7 = j7
+       movl    192(%esp),%ebx
+       #   x4 = in4
+       movl    %eax,116(%esp)
+       #   x5 = in5
+       movl    %ecx,120(%esp)
+       #   x6 = in6
+       movl    %edx,124(%esp)
+       #   x7 = in7
+       movl    %ebx,128(%esp)
+       #   in8 = j8
+       movl    196(%esp),%eax
+       #   in9 = j9
+       movl    200(%esp),%ecx
+       #   in10 = j10
+       movl    204(%esp),%edx
+       #   in11 = j11
+       movl    208(%esp),%ebx
+       #   x8 = in8
+       movl    %eax,132(%esp)
+       #   x9 = in9
+       movl    %ecx,136(%esp)
+       #   x10 = in10
+       movl    %edx,140(%esp)
+       #   x11 = in11
+       movl    %ebx,144(%esp)
+       #   in12 = j12
+       movl    212(%esp),%eax
+       #   in13 = j13
+       movl    216(%esp),%ecx
+       #   in14 = j14
+       movl    220(%esp),%edx
+       #   in15 = j15
+       movl    224(%esp),%ebx
+       #   x12 = in12
+       movl    %eax,148(%esp)
+       #   x13 = in13
+       movl    %ecx,152(%esp)
+       #   x14 = in14
+       movl    %edx,156(%esp)
+       #   x15 = in15
+       movl    %ebx,160(%esp)
+       #   i = 20
+       mov     $20,%ebp
+       # p = x0
+       movl    100(%esp),%eax
+       # s = x5
+       movl    120(%esp),%ecx
+       # t = x10
+       movl    140(%esp),%edx
+       # w = x15
+       movl    160(%esp),%ebx
+._mainloop:
+       # x0 = p
+       movl    %eax,100(%esp)
+       #                               x10 = t
+       movl    %edx,140(%esp)
+       # p += x12
+       addl    148(%esp),%eax
+       #               x5 = s
+       movl    %ecx,120(%esp)
+       #                               t += x6
+       addl    124(%esp),%edx
+       #                                               x15 = w
+       movl    %ebx,160(%esp)
+       #               r = x1
+       movl    104(%esp),%esi
+       #               r += s
+       add     %ecx,%esi
+       #                                               v = x11
+       movl    144(%esp),%edi
+       #                                               v += w
+       add     %ebx,%edi
+       # p <<<= 7
+       rol     $7,%eax
+       # p ^= x4
+       xorl    116(%esp),%eax
+       #                               t <<<= 7
+       rol     $7,%edx
+       #                               t ^= x14
+       xorl    156(%esp),%edx
+       #               r <<<= 7
+       rol     $7,%esi
+       #               r ^= x9
+       xorl    136(%esp),%esi
+       #                                               v <<<= 7
+       rol     $7,%edi
+       #                                               v ^= x3
+       xorl    112(%esp),%edi
+       # x4 = p
+       movl    %eax,116(%esp)
+       #                               x14 = t
+       movl    %edx,156(%esp)
+       # p += x0
+       addl    100(%esp),%eax
+       #               x9 = r
+       movl    %esi,136(%esp)
+       #                               t += x10
+       addl    140(%esp),%edx
+       #                                               x3 = v
+       movl    %edi,112(%esp)
+       # p <<<= 9
+       rol     $9,%eax
+       # p ^= x8
+       xorl    132(%esp),%eax
+       #                               t <<<= 9
+       rol     $9,%edx
+       #                               t ^= x2
+       xorl    108(%esp),%edx
+       #               s += r
+       add     %esi,%ecx
+       #               s <<<= 9
+       rol     $9,%ecx
+       #               s ^= x13
+       xorl    152(%esp),%ecx
+       #                                               w += v
+       add     %edi,%ebx
+       #                                               w <<<= 9
+       rol     $9,%ebx
+       #                                               w ^= x7
+       xorl    128(%esp),%ebx
+       # x8 = p
+       movl    %eax,132(%esp)
+       #                               x2 = t
+       movl    %edx,108(%esp)
+       # p += x4
+       addl    116(%esp),%eax
+       #               x13 = s
+       movl    %ecx,152(%esp)
+       #                               t += x14
+       addl    156(%esp),%edx
+       #                                               x7 = w
+       movl    %ebx,128(%esp)
+       # p <<<= 13
+       rol     $13,%eax
+       # p ^= x12
+       xorl    148(%esp),%eax
+       #                               t <<<= 13
+       rol     $13,%edx
+       #                               t ^= x6
+       xorl    124(%esp),%edx
+       #               r += s
+       add     %ecx,%esi
+       #               r <<<= 13
+       rol     $13,%esi
+       #               r ^= x1
+       xorl    104(%esp),%esi
+       #                                               v += w
+       add     %ebx,%edi
+       #                                               v <<<= 13
+       rol     $13,%edi
+       #                                               v ^= x11
+       xorl    144(%esp),%edi
+       # x12 = p
+       movl    %eax,148(%esp)
+       #                               x6 = t
+       movl    %edx,124(%esp)
+       # p += x8
+       addl    132(%esp),%eax
+       #               x1 = r
+       movl    %esi,104(%esp)
+       #                               t += x2
+       addl    108(%esp),%edx
+       #                                               x11 = v
+       movl    %edi,144(%esp)
+       # p <<<= 18
+       rol     $18,%eax
+       # p ^= x0
+       xorl    100(%esp),%eax
+       #                               t <<<= 18
+       rol     $18,%edx
+       #                               t ^= x10
+       xorl    140(%esp),%edx
+       #               s += r
+       add     %esi,%ecx
+       #               s <<<= 18
+       rol     $18,%ecx
+       #               s ^= x5
+       xorl    120(%esp),%ecx
+       #                                               w += v
+       add     %edi,%ebx
+       #                                               w <<<= 18
+       rol     $18,%ebx
+       #                                               w ^= x15
+       xorl    160(%esp),%ebx
+       # x0 = p
+       movl    %eax,100(%esp)
+       #                               x10 = t
+       movl    %edx,140(%esp)
+       # p += x3
+       addl    112(%esp),%eax
+       # p <<<= 7
+       rol     $7,%eax
+       #               x5 = s
+       movl    %ecx,120(%esp)
+       #                               t += x9
+       addl    136(%esp),%edx
+       #                                               x15 = w
+       movl    %ebx,160(%esp)
+       #               r = x4
+       movl    116(%esp),%esi
+       #               r += s
+       add     %ecx,%esi
+       #                                               v = x14
+       movl    156(%esp),%edi
+       #                                               v += w
+       add     %ebx,%edi
+       # p ^= x1
+       xorl    104(%esp),%eax
+       #                               t <<<= 7
+       rol     $7,%edx
+       #                               t ^= x11
+       xorl    144(%esp),%edx
+       #               r <<<= 7
+       rol     $7,%esi
+       #               r ^= x6
+       xorl    124(%esp),%esi
+       #                                               v <<<= 7
+       rol     $7,%edi
+       #                                               v ^= x12
+       xorl    148(%esp),%edi
+       # x1 = p
+       movl    %eax,104(%esp)
+       #                               x11 = t
+       movl    %edx,144(%esp)
+       # p += x0
+       addl    100(%esp),%eax
+       #               x6 = r
+       movl    %esi,124(%esp)
+       #                               t += x10
+       addl    140(%esp),%edx
+       #                                               x12 = v
+       movl    %edi,148(%esp)
+       # p <<<= 9
+       rol     $9,%eax
+       # p ^= x2
+       xorl    108(%esp),%eax
+       #                               t <<<= 9
+       rol     $9,%edx
+       #                               t ^= x8
+       xorl    132(%esp),%edx
+       #               s += r
+       add     %esi,%ecx
+       #               s <<<= 9
+       rol     $9,%ecx
+       #               s ^= x7
+       xorl    128(%esp),%ecx
+       #                                               w += v
+       add     %edi,%ebx
+       #                                               w <<<= 9
+       rol     $9,%ebx
+       #                                               w ^= x13
+       xorl    152(%esp),%ebx
+       # x2 = p
+       movl    %eax,108(%esp)
+       #                               x8 = t
+       movl    %edx,132(%esp)
+       # p += x1
+       addl    104(%esp),%eax
+       #               x7 = s
+       movl    %ecx,128(%esp)
+       #                               t += x11
+       addl    144(%esp),%edx
+       #                                               x13 = w
+       movl    %ebx,152(%esp)
+       # p <<<= 13
+       rol     $13,%eax
+       # p ^= x3
+       xorl    112(%esp),%eax
+       #                               t <<<= 13
+       rol     $13,%edx
+       #                               t ^= x9
+       xorl    136(%esp),%edx
+       #               r += s
+       add     %ecx,%esi
+       #               r <<<= 13
+       rol     $13,%esi
+       #               r ^= x4
+       xorl    116(%esp),%esi
+       #                                               v += w
+       add     %ebx,%edi
+       #                                               v <<<= 13
+       rol     $13,%edi
+       #                                               v ^= x14
+       xorl    156(%esp),%edi
+       # x3 = p
+       movl    %eax,112(%esp)
+       #                               x9 = t
+       movl    %edx,136(%esp)
+       # p += x2
+       addl    108(%esp),%eax
+       #               x4 = r
+       movl    %esi,116(%esp)
+       #                               t += x8
+       addl    132(%esp),%edx
+       #                                               x14 = v
+       movl    %edi,156(%esp)
+       # p <<<= 18
+       rol     $18,%eax
+       # p ^= x0
+       xorl    100(%esp),%eax
+       #                               t <<<= 18
+       rol     $18,%edx
+       #                               t ^= x10
+       xorl    140(%esp),%edx
+       #               s += r
+       add     %esi,%ecx
+       #               s <<<= 18
+       rol     $18,%ecx
+       #               s ^= x5
+       xorl    120(%esp),%ecx
+       #                                               w += v
+       add     %edi,%ebx
+       #                                               w <<<= 18
+       rol     $18,%ebx
+       #                                               w ^= x15
+       xorl    160(%esp),%ebx
+       # x0 = p
+       movl    %eax,100(%esp)
+       #                               x10 = t
+       movl    %edx,140(%esp)
+       # p += x12
+       addl    148(%esp),%eax
+       #               x5 = s
+       movl    %ecx,120(%esp)
+       #                               t += x6
+       addl    124(%esp),%edx
+       #                                               x15 = w
+       movl    %ebx,160(%esp)
+       #               r = x1
+       movl    104(%esp),%esi
+       #               r += s
+       add     %ecx,%esi
+       #                                               v = x11
+       movl    144(%esp),%edi
+       #                                               v += w
+       add     %ebx,%edi
+       # p <<<= 7
+       rol     $7,%eax
+       # p ^= x4
+       xorl    116(%esp),%eax
+       #                               t <<<= 7
+       rol     $7,%edx
+       #                               t ^= x14
+       xorl    156(%esp),%edx
+       #               r <<<= 7
+       rol     $7,%esi
+       #               r ^= x9
+       xorl    136(%esp),%esi
+       #                                               v <<<= 7
+       rol     $7,%edi
+       #                                               v ^= x3
+       xorl    112(%esp),%edi
+       # x4 = p
+       movl    %eax,116(%esp)
+       #                               x14 = t
+       movl    %edx,156(%esp)
+       # p += x0
+       addl    100(%esp),%eax
+       #               x9 = r
+       movl    %esi,136(%esp)
+       #                               t += x10
+       addl    140(%esp),%edx
+       #                                               x3 = v
+       movl    %edi,112(%esp)
+       # p <<<= 9
+       rol     $9,%eax
+       # p ^= x8
+       xorl    132(%esp),%eax
+       #                               t <<<= 9
+       rol     $9,%edx
+       #                               t ^= x2
+       xorl    108(%esp),%edx
+       #               s += r
+       add     %esi,%ecx
+       #               s <<<= 9
+       rol     $9,%ecx
+       #               s ^= x13
+       xorl    152(%esp),%ecx
+       #                                               w += v
+       add     %edi,%ebx
+       #                                               w <<<= 9
+       rol     $9,%ebx
+       #                                               w ^= x7
+       xorl    128(%esp),%ebx
+       # x8 = p
+       movl    %eax,132(%esp)
+       #                               x2 = t
+       movl    %edx,108(%esp)
+       # p += x4
+       addl    116(%esp),%eax
+       #               x13 = s
+       movl    %ecx,152(%esp)
+       #                               t += x14
+       addl    156(%esp),%edx
+       #                                               x7 = w
+       movl    %ebx,128(%esp)
+       # p <<<= 13
+       rol     $13,%eax
+       # p ^= x12
+       xorl    148(%esp),%eax
+       #                               t <<<= 13
+       rol     $13,%edx
+       #                               t ^= x6
+       xorl    124(%esp),%edx
+       #               r += s
+       add     %ecx,%esi
+       #               r <<<= 13
+       rol     $13,%esi
+       #               r ^= x1
+       xorl    104(%esp),%esi
+       #                                               v += w
+       add     %ebx,%edi
+       #                                               v <<<= 13
+       rol     $13,%edi
+       #                                               v ^= x11
+       xorl    144(%esp),%edi
+       # x12 = p
+       movl    %eax,148(%esp)
+       #                               x6 = t
+       movl    %edx,124(%esp)
+       # p += x8
+       addl    132(%esp),%eax
+       #               x1 = r
+       movl    %esi,104(%esp)
+       #                               t += x2
+       addl    108(%esp),%edx
+       #                                               x11 = v
+       movl    %edi,144(%esp)
+       # p <<<= 18
+       rol     $18,%eax
+       # p ^= x0
+       xorl    100(%esp),%eax
+       #                               t <<<= 18
+       rol     $18,%edx
+       #                               t ^= x10
+       xorl    140(%esp),%edx
+       #               s += r
+       add     %esi,%ecx
+       #               s <<<= 18
+       rol     $18,%ecx
+       #               s ^= x5
+       xorl    120(%esp),%ecx
+       #                                               w += v
+       add     %edi,%ebx
+       #                                               w <<<= 18
+       rol     $18,%ebx
+       #                                               w ^= x15
+       xorl    160(%esp),%ebx
+       # x0 = p
+       movl    %eax,100(%esp)
+       #                               x10 = t
+       movl    %edx,140(%esp)
+       # p += x3
+       addl    112(%esp),%eax
+       # p <<<= 7
+       rol     $7,%eax
+       #               x5 = s
+       movl    %ecx,120(%esp)
+       #                               t += x9
+       addl    136(%esp),%edx
+       #                                               x15 = w
+       movl    %ebx,160(%esp)
+       #               r = x4
+       movl    116(%esp),%esi
+       #               r += s
+       add     %ecx,%esi
+       #                                               v = x14
+       movl    156(%esp),%edi
+       #                                               v += w
+       add     %ebx,%edi
+       # p ^= x1
+       xorl    104(%esp),%eax
+       #                               t <<<= 7
+       rol     $7,%edx
+       #                               t ^= x11
+       xorl    144(%esp),%edx
+       #               r <<<= 7
+       rol     $7,%esi
+       #               r ^= x6
+       xorl    124(%esp),%esi
+       #                                               v <<<= 7
+       rol     $7,%edi
+       #                                               v ^= x12
+       xorl    148(%esp),%edi
+       # x1 = p
+       movl    %eax,104(%esp)
+       #                               x11 = t
+       movl    %edx,144(%esp)
+       # p += x0
+       addl    100(%esp),%eax
+       #               x6 = r
+       movl    %esi,124(%esp)
+       #                               t += x10
+       addl    140(%esp),%edx
+       #                                               x12 = v
+       movl    %edi,148(%esp)
+       # p <<<= 9
+       rol     $9,%eax
+       # p ^= x2
+       xorl    108(%esp),%eax
+       #                               t <<<= 9
+       rol     $9,%edx
+       #                               t ^= x8
+       xorl    132(%esp),%edx
+       #               s += r
+       add     %esi,%ecx
+       #               s <<<= 9
+       rol     $9,%ecx
+       #               s ^= x7
+       xorl    128(%esp),%ecx
+       #                                               w += v
+       add     %edi,%ebx
+       #                                               w <<<= 9
+       rol     $9,%ebx
+       #                                               w ^= x13
+       xorl    152(%esp),%ebx
+       # x2 = p
+       movl    %eax,108(%esp)
+       #                               x8 = t
+       movl    %edx,132(%esp)
+       # p += x1
+       addl    104(%esp),%eax
+       #               x7 = s
+       movl    %ecx,128(%esp)
+       #                               t += x11
+       addl    144(%esp),%edx
+       #                                               x13 = w
+       movl    %ebx,152(%esp)
+       # p <<<= 13
+       rol     $13,%eax
+       # p ^= x3
+       xorl    112(%esp),%eax
+       #                               t <<<= 13
+       rol     $13,%edx
+       #                               t ^= x9
+       xorl    136(%esp),%edx
+       #               r += s
+       add     %ecx,%esi
+       #               r <<<= 13
+       rol     $13,%esi
+       #               r ^= x4
+       xorl    116(%esp),%esi
+       #                                               v += w
+       add     %ebx,%edi
+       #                                               v <<<= 13
+       rol     $13,%edi
+       #                                               v ^= x14
+       xorl    156(%esp),%edi
+       # x3 = p
+       movl    %eax,112(%esp)
+       #                               x9 = t
+       movl    %edx,136(%esp)
+       # p += x2
+       addl    108(%esp),%eax
+       #               x4 = r
+       movl    %esi,116(%esp)
+       #                               t += x8
+       addl    132(%esp),%edx
+       #                                               x14 = v
+       movl    %edi,156(%esp)
+       # p <<<= 18
+       rol     $18,%eax
+       # p ^= x0
+       xorl    100(%esp),%eax
+       #                               t <<<= 18
+       rol     $18,%edx
+       #                               t ^= x10
+       xorl    140(%esp),%edx
+       #               s += r
+       add     %esi,%ecx
+       #               s <<<= 18
+       rol     $18,%ecx
+       #               s ^= x5
+       xorl    120(%esp),%ecx
+       #                                               w += v
+       add     %edi,%ebx
+       #                                               w <<<= 18
+       rol     $18,%ebx
+       #                                               w ^= x15
+       xorl    160(%esp),%ebx
+       # i -= 4
+       sub     $4,%ebp
+       # goto mainloop if unsigned >
+       ja      ._mainloop
+       # x0 = p
+       movl    %eax,100(%esp)
+       # x5 = s
+       movl    %ecx,120(%esp)
+       # x10 = t
+       movl    %edx,140(%esp)
+       # x15 = w
+       movl    %ebx,160(%esp)
+       #   out = out_backup
+       movl    72(%esp),%edi
+       #   m = m_backup
+       movl    68(%esp),%esi
+       #   in0 = x0
+       movl    100(%esp),%eax
+       #   in1 = x1
+       movl    104(%esp),%ecx
+       #   in0 += j0
+       addl    164(%esp),%eax
+       #   in1 += j1
+       addl    168(%esp),%ecx
+       #   in0 ^= *(uint32 *) (m + 0)
+       xorl    0(%esi),%eax
+       #   in1 ^= *(uint32 *) (m + 4)
+       xorl    4(%esi),%ecx
+       #   *(uint32 *) (out + 0) = in0
+       movl    %eax,0(%edi)
+       #   *(uint32 *) (out + 4) = in1
+       movl    %ecx,4(%edi)
+       #   in2 = x2
+       movl    108(%esp),%eax
+       #   in3 = x3
+       movl    112(%esp),%ecx
+       #   in2 += j2
+       addl    172(%esp),%eax
+       #   in3 += j3
+       addl    176(%esp),%ecx
+       #   in2 ^= *(uint32 *) (m + 8)
+       xorl    8(%esi),%eax
+       #   in3 ^= *(uint32 *) (m + 12)
+       xorl    12(%esi),%ecx
+       #   *(uint32 *) (out + 8) = in2
+       movl    %eax,8(%edi)
+       #   *(uint32 *) (out + 12) = in3
+       movl    %ecx,12(%edi)
+       #   in4 = x4
+       movl    116(%esp),%eax
+       #   in5 = x5
+       movl    120(%esp),%ecx
+       #   in4 += j4
+       addl    180(%esp),%eax
+       #   in5 += j5
+       addl    184(%esp),%ecx
+       #   in4 ^= *(uint32 *) (m + 16)
+       xorl    16(%esi),%eax
+       #   in5 ^= *(uint32 *) (m + 20)
+       xorl    20(%esi),%ecx
+       #   *(uint32 *) (out + 16) = in4
+       movl    %eax,16(%edi)
+       #   *(uint32 *) (out + 20) = in5
+       movl    %ecx,20(%edi)
+       #   in6 = x6
+       movl    124(%esp),%eax
+       #   in7 = x7
+       movl    128(%esp),%ecx
+       #   in6 += j6
+       addl    188(%esp),%eax
+       #   in7 += j7
+       addl    192(%esp),%ecx
+       #   in6 ^= *(uint32 *) (m + 24)
+       xorl    24(%esi),%eax
+       #   in7 ^= *(uint32 *) (m + 28)
+       xorl    28(%esi),%ecx
+       #   *(uint32 *) (out + 24) = in6
+       movl    %eax,24(%edi)
+       #   *(uint32 *) (out + 28) = in7
+       movl    %ecx,28(%edi)
+       #   in8 = x8
+       movl    132(%esp),%eax
+       #   in9 = x9
+       movl    136(%esp),%ecx
+       #   in8 += j8
+       addl    196(%esp),%eax
+       #   in9 += j9
+       addl    200(%esp),%ecx
+       #   in8 ^= *(uint32 *) (m + 32)
+       xorl    32(%esi),%eax
+       #   in9 ^= *(uint32 *) (m + 36)
+       xorl    36(%esi),%ecx
+       #   *(uint32 *) (out + 32) = in8
+       movl    %eax,32(%edi)
+       #   *(uint32 *) (out + 36) = in9
+       movl    %ecx,36(%edi)
+       #   in10 = x10
+       movl    140(%esp),%eax
+       #   in11 = x11
+       movl    144(%esp),%ecx
+       #   in10 += j10
+       addl    204(%esp),%eax
+       #   in11 += j11
+       addl    208(%esp),%ecx
+       #   in10 ^= *(uint32 *) (m + 40)
+       xorl    40(%esi),%eax
+       #   in11 ^= *(uint32 *) (m + 44)
+       xorl    44(%esi),%ecx
+       #   *(uint32 *) (out + 40) = in10
+       movl    %eax,40(%edi)
+       #   *(uint32 *) (out + 44) = in11
+       movl    %ecx,44(%edi)
+       #   in12 = x12
+       movl    148(%esp),%eax
+       #   in13 = x13
+       movl    152(%esp),%ecx
+       #   in12 += j12
+       addl    212(%esp),%eax
+       #   in13 += j13
+       addl    216(%esp),%ecx
+       #   in12 ^= *(uint32 *) (m + 48)
+       xorl    48(%esi),%eax
+       #   in13 ^= *(uint32 *) (m + 52)
+       xorl    52(%esi),%ecx
+       #   *(uint32 *) (out + 48) = in12
+       movl    %eax,48(%edi)
+       #   *(uint32 *) (out + 52) = in13
+       movl    %ecx,52(%edi)
+       #   in14 = x14
+       movl    156(%esp),%eax
+       #   in15 = x15
+       movl    160(%esp),%ecx
+       #   in14 += j14
+       addl    220(%esp),%eax
+       #   in15 += j15
+       addl    224(%esp),%ecx
+       #   in14 ^= *(uint32 *) (m + 56)
+       xorl    56(%esi),%eax
+       #   in15 ^= *(uint32 *) (m + 60)
+       xorl    60(%esi),%ecx
+       #   *(uint32 *) (out + 56) = in14
+       movl    %eax,56(%edi)
+       #   *(uint32 *) (out + 60) = in15
+       movl    %ecx,60(%edi)
+       #   bytes = bytes_backup
+       movl    76(%esp),%ebx
+       #   in8 = j8
+       movl    196(%esp),%eax
+       #   in9 = j9
+       movl    200(%esp),%ecx
+       #   in8 += 1
+       add     $1,%eax
+       #   in9 += 0 + carry
+       adc     $0,%ecx
+       #   j8 = in8
+       movl    %eax,196(%esp)
+       #   j9 = in9
+       movl    %ecx,200(%esp)
+       #   bytes - 64
+       cmp     $64,%ebx
+       #   goto bytesatleast65 if unsigned>
+       ja      ._bytesatleast65
+       #     goto bytesatleast64 if unsigned>=
+       jae     ._bytesatleast64
+       #       m = out
+       mov     %edi,%esi
+       #       out = ctarget
+       movl    228(%esp),%edi
+       #       i = bytes
+       mov     %ebx,%ecx
+       #       while (i) { *out++ = *m++; --i }
+       rep     movsb
+._bytesatleast64:
+       #     x = x_backup
+       movl    64(%esp),%eax
+       #     in8 = j8
+       movl    196(%esp),%ecx
+       #     in9 = j9
+       movl    200(%esp),%edx
+       #     *(uint32 *) (x + 32) = in8
+       movl    %ecx,32(%eax)
+       #     *(uint32 *) (x + 36) = in9
+       movl    %edx,36(%eax)
+._done:
+       #     eax = eax_stack
+       movl    80(%esp),%eax
+       #     ebx = ebx_stack
+       movl    84(%esp),%ebx
+       #     esi = esi_stack
+       movl    88(%esp),%esi
+       #     edi = edi_stack
+       movl    92(%esp),%edi
+       #     ebp = ebp_stack
+       movl    96(%esp),%ebp
+       #     leave
+       add     %eax,%esp
+       ret
+._bytesatleast65:
+       #   bytes -= 64
+       sub     $64,%ebx
+       #   out += 64
+       add     $64,%edi
+       #   m += 64
+       add     $64,%esi
+       # goto bytesatleast1
+       jmp     ._bytesatleast1
+# enter ECRYPT_keysetup
+.text
+.p2align 5
+.globl ECRYPT_keysetup
+ECRYPT_keysetup:
+       mov     %esp,%eax
+       and     $31,%eax
+       add     $256,%eax
+       sub     %eax,%esp
+       #   eax_stack = eax
+       movl    %eax,64(%esp)
+       #   ebx_stack = ebx
+       movl    %ebx,68(%esp)
+       #   esi_stack = esi
+       movl    %esi,72(%esp)
+       #   edi_stack = edi
+       movl    %edi,76(%esp)
+       #   ebp_stack = ebp
+       movl    %ebp,80(%esp)
+       #   k = arg2
+       movl    8(%esp,%eax),%ecx
+       #   kbits = arg3
+       movl    12(%esp,%eax),%edx
+       #   x = arg1
+       movl    4(%esp,%eax),%eax
+       #   in1 = *(uint32 *) (k + 0)
+       movl    0(%ecx),%ebx
+       #   in2 = *(uint32 *) (k + 4)
+       movl    4(%ecx),%esi
+       #   in3 = *(uint32 *) (k + 8)
+       movl    8(%ecx),%edi
+       #   in4 = *(uint32 *) (k + 12)
+       movl    12(%ecx),%ebp
+       #   *(uint32 *) (x + 4) = in1
+       movl    %ebx,4(%eax)
+       #   *(uint32 *) (x + 8) = in2
+       movl    %esi,8(%eax)
+       #   *(uint32 *) (x + 12) = in3
+       movl    %edi,12(%eax)
+       #   *(uint32 *) (x + 16) = in4
+       movl    %ebp,16(%eax)
+       #   kbits - 256
+       cmp     $256,%edx
+       #   goto kbits128 if unsigned<
+       jb      ._kbits128
+._kbits256:
+       #     in11 = *(uint32 *) (k + 16)
+       movl    16(%ecx),%edx
+       #     in12 = *(uint32 *) (k + 20)
+       movl    20(%ecx),%ebx
+       #     in13 = *(uint32 *) (k + 24)
+       movl    24(%ecx),%esi
+       #     in14 = *(uint32 *) (k + 28)
+       movl    28(%ecx),%ecx
+       #     *(uint32 *) (x + 44) = in11
+       movl    %edx,44(%eax)
+       #     *(uint32 *) (x + 48) = in12
+       movl    %ebx,48(%eax)
+       #     *(uint32 *) (x + 52) = in13
+       movl    %esi,52(%eax)
+       #     *(uint32 *) (x + 56) = in14
+       movl    %ecx,56(%eax)
+       #     in0 = 1634760805
+       mov     $1634760805,%ecx
+       #     in5 = 857760878
+       mov     $857760878,%edx
+       #     in10 = 2036477234
+       mov     $2036477234,%ebx
+       #     in15 = 1797285236
+       mov     $1797285236,%esi
+       #     *(uint32 *) (x + 0) = in0
+       movl    %ecx,0(%eax)
+       #     *(uint32 *) (x + 20) = in5
+       movl    %edx,20(%eax)
+       #     *(uint32 *) (x + 40) = in10
+       movl    %ebx,40(%eax)
+       #     *(uint32 *) (x + 60) = in15
+       movl    %esi,60(%eax)
+       #   goto keysetupdone
+       jmp     ._keysetupdone
+._kbits128:
+       #     in11 = *(uint32 *) (k + 0)
+       movl    0(%ecx),%edx
+       #     in12 = *(uint32 *) (k + 4)
+       movl    4(%ecx),%ebx
+       #     in13 = *(uint32 *) (k + 8)
+       movl    8(%ecx),%esi
+       #     in14 = *(uint32 *) (k + 12)
+       movl    12(%ecx),%ecx
+       #     *(uint32 *) (x + 44) = in11
+       movl    %edx,44(%eax)
+       #     *(uint32 *) (x + 48) = in12
+       movl    %ebx,48(%eax)
+       #     *(uint32 *) (x + 52) = in13
+       movl    %esi,52(%eax)
+       #     *(uint32 *) (x + 56) = in14
+       movl    %ecx,56(%eax)
+       #     in0 = 1634760805
+       mov     $1634760805,%ecx
+       #     in5 = 824206446
+       mov     $824206446,%edx
+       #     in10 = 2036477238
+       mov     $2036477238,%ebx
+       #     in15 = 1797285236
+       mov     $1797285236,%esi
+       #     *(uint32 *) (x + 0) = in0
+       movl    %ecx,0(%eax)
+       #     *(uint32 *) (x + 20) = in5
+       movl    %edx,20(%eax)
+       #     *(uint32 *) (x + 40) = in10
+       movl    %ebx,40(%eax)
+       #     *(uint32 *) (x + 60) = in15
+       movl    %esi,60(%eax)
+._keysetupdone:
+       #   eax = eax_stack
+       movl    64(%esp),%eax
+       #   ebx = ebx_stack
+       movl    68(%esp),%ebx
+       #   esi = esi_stack
+       movl    72(%esp),%esi
+       #   edi = edi_stack
+       movl    76(%esp),%edi
+       #   ebp = ebp_stack
+       movl    80(%esp),%ebp
+       # leave
+       add     %eax,%esp
+       ret
+# enter ECRYPT_ivsetup
+.text
+.p2align 5
+.globl ECRYPT_ivsetup
+ECRYPT_ivsetup:
+       mov     %esp,%eax
+       and     $31,%eax
+       add     $256,%eax
+       sub     %eax,%esp
+       #   eax_stack = eax
+       movl    %eax,64(%esp)
+       #   ebx_stack = ebx
+       movl    %ebx,68(%esp)
+       #   esi_stack = esi
+       movl    %esi,72(%esp)
+       #   edi_stack = edi
+       movl    %edi,76(%esp)
+       #   ebp_stack = ebp
+       movl    %ebp,80(%esp)
+       #   iv = arg2
+       movl    8(%esp,%eax),%ecx
+       #   x = arg1
+       movl    4(%esp,%eax),%eax
+       #   in6 = *(uint32 *) (iv + 0)
+       movl    0(%ecx),%edx
+       #   in7 = *(uint32 *) (iv + 4)
+       movl    4(%ecx),%ecx
+       #   in8 = 0
+       mov     $0,%ebx
+       #   in9 = 0
+       mov     $0,%esi
+       #   *(uint32 *) (x + 24) = in6
+       movl    %edx,24(%eax)
+       #   *(uint32 *) (x + 28) = in7
+       movl    %ecx,28(%eax)
+       #   *(uint32 *) (x + 32) = in8
+       movl    %ebx,32(%eax)
+       #   *(uint32 *) (x + 36) = in9
+       movl    %esi,36(%eax)
+       #   eax = eax_stack
+       movl    64(%esp),%eax
+       #   ebx = ebx_stack
+       movl    68(%esp),%ebx
+       #   esi = esi_stack
+       movl    72(%esp),%esi
+       #   edi = edi_stack
+       movl    76(%esp),%edi
+       #   ebp = ebp_stack
+       movl    80(%esp),%ebp
+       # leave
+       add     %eax,%esp
+       ret
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
new file mode 100644 (file)
index 0000000..6214a9b
--- /dev/null
@@ -0,0 +1,920 @@
+# enter ECRYPT_encrypt_bytes
+.text
+.p2align 5
+.globl ECRYPT_encrypt_bytes
+ECRYPT_encrypt_bytes:
+       mov     %rsp,%r11
+       and     $31,%r11
+       add     $256,%r11
+       sub     %r11,%rsp
+       # x = arg1
+       mov     %rdi,%r8
+       # m = arg2
+       mov     %rsi,%rsi
+       # out = arg3
+       mov     %rdx,%rdi
+       # bytes = arg4
+       mov     %rcx,%rdx
+       #               unsigned>? bytes - 0
+       cmp     $0,%rdx
+       # comment:fp stack unchanged by jump
+       # goto done if !unsigned>
+       jbe     ._done
+       # comment:fp stack unchanged by fallthrough
+# start:
+._start:
+       # r11_stack = r11
+       movq    %r11,0(%rsp)
+       # r12_stack = r12
+       movq    %r12,8(%rsp)
+       # r13_stack = r13
+       movq    %r13,16(%rsp)
+       # r14_stack = r14
+       movq    %r14,24(%rsp)
+       # r15_stack = r15
+       movq    %r15,32(%rsp)
+       # rbx_stack = rbx
+       movq    %rbx,40(%rsp)
+       # rbp_stack = rbp
+       movq    %rbp,48(%rsp)
+       # in0 = *(uint64 *) (x + 0)
+       movq    0(%r8),%rcx
+       # in2 = *(uint64 *) (x + 8)
+       movq    8(%r8),%r9
+       # in4 = *(uint64 *) (x + 16)
+       movq    16(%r8),%rax
+       # in6 = *(uint64 *) (x + 24)
+       movq    24(%r8),%r10
+       # in8 = *(uint64 *) (x + 32)
+       movq    32(%r8),%r11
+       # in10 = *(uint64 *) (x + 40)
+       movq    40(%r8),%r12
+       # in12 = *(uint64 *) (x + 48)
+       movq    48(%r8),%r13
+       # in14 = *(uint64 *) (x + 56)
+       movq    56(%r8),%r14
+       # j0 = in0
+       movq    %rcx,56(%rsp)
+       # j2 = in2
+       movq    %r9,64(%rsp)
+       # j4 = in4
+       movq    %rax,72(%rsp)
+       # j6 = in6
+       movq    %r10,80(%rsp)
+       # j8 = in8
+       movq    %r11,88(%rsp)
+       # j10 = in10
+       movq    %r12,96(%rsp)
+       # j12 = in12
+       movq    %r13,104(%rsp)
+       # j14 = in14
+       movq    %r14,112(%rsp)
+       # x_backup = x
+       movq    %r8,120(%rsp)
+# bytesatleast1:
+._bytesatleast1:
+       #                   unsigned<? bytes - 64
+       cmp     $64,%rdx
+       # comment:fp stack unchanged by jump
+       #   goto nocopy if !unsigned<
+       jae     ._nocopy
+       #     ctarget = out
+       movq    %rdi,128(%rsp)
+       #     out = &tmp
+       leaq    192(%rsp),%rdi
+       #     i = bytes
+       mov     %rdx,%rcx
+       #     while (i) { *out++ = *m++; --i }
+       rep     movsb
+       #     out = &tmp
+       leaq    192(%rsp),%rdi
+       #     m = &tmp
+       leaq    192(%rsp),%rsi
+       # comment:fp stack unchanged by fallthrough
+#   nocopy:
+._nocopy:
+       #   out_backup = out
+       movq    %rdi,136(%rsp)
+       #   m_backup = m
+       movq    %rsi,144(%rsp)
+       #   bytes_backup = bytes
+       movq    %rdx,152(%rsp)
+       #   x1 = j0
+       movq    56(%rsp),%rdi
+       #   x0 = x1
+       mov     %rdi,%rdx
+       #   (uint64) x1 >>= 32
+       shr     $32,%rdi
+       #               x3 = j2
+       movq    64(%rsp),%rsi
+       #               x2 = x3
+       mov     %rsi,%rcx
+       #               (uint64) x3 >>= 32
+       shr     $32,%rsi
+       #   x5 = j4
+       movq    72(%rsp),%r8
+       #   x4 = x5
+       mov     %r8,%r9
+       #   (uint64) x5 >>= 32
+       shr     $32,%r8
+       #   x5_stack = x5
+       movq    %r8,160(%rsp)
+       #               x7 = j6
+       movq    80(%rsp),%r8
+       #               x6 = x7
+       mov     %r8,%rax
+       #               (uint64) x7 >>= 32
+       shr     $32,%r8
+       #   x9 = j8
+       movq    88(%rsp),%r10
+       #   x8 = x9
+       mov     %r10,%r11
+       #   (uint64) x9 >>= 32
+       shr     $32,%r10
+       #               x11 = j10
+       movq    96(%rsp),%r12
+       #               x10 = x11
+       mov     %r12,%r13
+       #               x10_stack = x10
+       movq    %r13,168(%rsp)
+       #               (uint64) x11 >>= 32
+       shr     $32,%r12
+       #   x13 = j12
+       movq    104(%rsp),%r13
+       #   x12 = x13
+       mov     %r13,%r14
+       #   (uint64) x13 >>= 32
+       shr     $32,%r13
+       #               x15 = j14
+       movq    112(%rsp),%r15
+       #               x14 = x15
+       mov     %r15,%rbx
+       #               (uint64) x15 >>= 32
+       shr     $32,%r15
+       #               x15_stack = x15
+       movq    %r15,176(%rsp)
+       #   i = 20
+       mov     $20,%r15
+#   mainloop:
+._mainloop:
+       #   i_backup = i
+       movq    %r15,184(%rsp)
+       #               x5 = x5_stack
+       movq    160(%rsp),%r15
+       # a = x12 + x0
+       lea     (%r14,%rdx),%rbp
+       # (uint32) a <<<= 7
+       rol     $7,%ebp
+       # x4 ^= a
+       xor     %rbp,%r9
+       #               b = x1 + x5
+       lea     (%rdi,%r15),%rbp
+       #               (uint32) b <<<= 7
+       rol     $7,%ebp
+       #               x9 ^= b
+       xor     %rbp,%r10
+       # a = x0 + x4
+       lea     (%rdx,%r9),%rbp
+       # (uint32) a <<<= 9
+       rol     $9,%ebp
+       # x8 ^= a
+       xor     %rbp,%r11
+       #               b = x5 + x9
+       lea     (%r15,%r10),%rbp
+       #               (uint32) b <<<= 9
+       rol     $9,%ebp
+       #               x13 ^= b
+       xor     %rbp,%r13
+       # a = x4 + x8
+       lea     (%r9,%r11),%rbp
+       # (uint32) a <<<= 13
+       rol     $13,%ebp
+       # x12 ^= a
+       xor     %rbp,%r14
+       #               b = x9 + x13
+       lea     (%r10,%r13),%rbp
+       #               (uint32) b <<<= 13
+       rol     $13,%ebp
+       #               x1 ^= b
+       xor     %rbp,%rdi
+       # a = x8 + x12
+       lea     (%r11,%r14),%rbp
+       # (uint32) a <<<= 18
+       rol     $18,%ebp
+       # x0 ^= a
+       xor     %rbp,%rdx
+       #               b = x13 + x1
+       lea     (%r13,%rdi),%rbp
+       #               (uint32) b <<<= 18
+       rol     $18,%ebp
+       #               x5 ^= b
+       xor     %rbp,%r15
+       #                               x10 = x10_stack
+       movq    168(%rsp),%rbp
+       #               x5_stack = x5
+       movq    %r15,160(%rsp)
+       #                               c = x6 + x10
+       lea     (%rax,%rbp),%r15
+       #                               (uint32) c <<<= 7
+       rol     $7,%r15d
+       #                               x14 ^= c
+       xor     %r15,%rbx
+       #                               c = x10 + x14
+       lea     (%rbp,%rbx),%r15
+       #                               (uint32) c <<<= 9
+       rol     $9,%r15d
+       #                               x2 ^= c
+       xor     %r15,%rcx
+       #                               c = x14 + x2
+       lea     (%rbx,%rcx),%r15
+       #                               (uint32) c <<<= 13
+       rol     $13,%r15d
+       #                               x6 ^= c
+       xor     %r15,%rax
+       #                               c = x2 + x6
+       lea     (%rcx,%rax),%r15
+       #                               (uint32) c <<<= 18
+       rol     $18,%r15d
+       #                               x10 ^= c
+       xor     %r15,%rbp
+       #                                               x15 = x15_stack
+       movq    176(%rsp),%r15
+       #                               x10_stack = x10
+       movq    %rbp,168(%rsp)
+       #                                               d = x11 + x15
+       lea     (%r12,%r15),%rbp
+       #                                               (uint32) d <<<= 7
+       rol     $7,%ebp
+       #                                               x3 ^= d
+       xor     %rbp,%rsi
+       #                                               d = x15 + x3
+       lea     (%r15,%rsi),%rbp
+       #                                               (uint32) d <<<= 9
+       rol     $9,%ebp
+       #                                               x7 ^= d
+       xor     %rbp,%r8
+       #                                               d = x3 + x7
+       lea     (%rsi,%r8),%rbp
+       #                                               (uint32) d <<<= 13
+       rol     $13,%ebp
+       #                                               x11 ^= d
+       xor     %rbp,%r12
+       #                                               d = x7 + x11
+       lea     (%r8,%r12),%rbp
+       #                                               (uint32) d <<<= 18
+       rol     $18,%ebp
+       #                                               x15 ^= d
+       xor     %rbp,%r15
+       #                                               x15_stack = x15
+       movq    %r15,176(%rsp)
+       #               x5 = x5_stack
+       movq    160(%rsp),%r15
+       # a = x3 + x0
+       lea     (%rsi,%rdx),%rbp
+       # (uint32) a <<<= 7
+       rol     $7,%ebp
+       # x1 ^= a
+       xor     %rbp,%rdi
+       #               b = x4 + x5
+       lea     (%r9,%r15),%rbp
+       #               (uint32) b <<<= 7
+       rol     $7,%ebp
+       #               x6 ^= b
+       xor     %rbp,%rax
+       # a = x0 + x1
+       lea     (%rdx,%rdi),%rbp
+       # (uint32) a <<<= 9
+       rol     $9,%ebp
+       # x2 ^= a
+       xor     %rbp,%rcx
+       #               b = x5 + x6
+       lea     (%r15,%rax),%rbp
+       #               (uint32) b <<<= 9
+       rol     $9,%ebp
+       #               x7 ^= b
+       xor     %rbp,%r8
+       # a = x1 + x2
+       lea     (%rdi,%rcx),%rbp
+       # (uint32) a <<<= 13
+       rol     $13,%ebp
+       # x3 ^= a
+       xor     %rbp,%rsi
+       #               b = x6 + x7
+       lea     (%rax,%r8),%rbp
+       #               (uint32) b <<<= 13
+       rol     $13,%ebp
+       #               x4 ^= b
+       xor     %rbp,%r9
+       # a = x2 + x3
+       lea     (%rcx,%rsi),%rbp
+       # (uint32) a <<<= 18
+       rol     $18,%ebp
+       # x0 ^= a
+       xor     %rbp,%rdx
+       #               b = x7 + x4
+       lea     (%r8,%r9),%rbp
+       #               (uint32) b <<<= 18
+       rol     $18,%ebp
+       #               x5 ^= b
+       xor     %rbp,%r15
+       #                               x10 = x10_stack
+       movq    168(%rsp),%rbp
+       #               x5_stack = x5
+       movq    %r15,160(%rsp)
+       #                               c = x9 + x10
+       lea     (%r10,%rbp),%r15
+       #                               (uint32) c <<<= 7
+       rol     $7,%r15d
+       #                               x11 ^= c
+       xor     %r15,%r12
+       #                               c = x10 + x11
+       lea     (%rbp,%r12),%r15
+       #                               (uint32) c <<<= 9
+       rol     $9,%r15d
+       #                               x8 ^= c
+       xor     %r15,%r11
+       #                               c = x11 + x8
+       lea     (%r12,%r11),%r15
+       #                               (uint32) c <<<= 13
+       rol     $13,%r15d
+       #                               x9 ^= c
+       xor     %r15,%r10
+       #                               c = x8 + x9
+       lea     (%r11,%r10),%r15
+       #                               (uint32) c <<<= 18
+       rol     $18,%r15d
+       #                               x10 ^= c
+       xor     %r15,%rbp
+       #                                               x15 = x15_stack
+       movq    176(%rsp),%r15
+       #                               x10_stack = x10
+       movq    %rbp,168(%rsp)
+       #                                               d = x14 + x15
+       lea     (%rbx,%r15),%rbp
+       #                                               (uint32) d <<<= 7
+       rol     $7,%ebp
+       #                                               x12 ^= d
+       xor     %rbp,%r14
+       #                                               d = x15 + x12
+       lea     (%r15,%r14),%rbp
+       #                                               (uint32) d <<<= 9
+       rol     $9,%ebp
+       #                                               x13 ^= d
+       xor     %rbp,%r13
+       #                                               d = x12 + x13
+       lea     (%r14,%r13),%rbp
+       #                                               (uint32) d <<<= 13
+       rol     $13,%ebp
+       #                                               x14 ^= d
+       xor     %rbp,%rbx
+       #                                               d = x13 + x14
+       lea     (%r13,%rbx),%rbp
+       #                                               (uint32) d <<<= 18
+       rol     $18,%ebp
+       #                                               x15 ^= d
+       xor     %rbp,%r15
+       #                                               x15_stack = x15
+       movq    %r15,176(%rsp)
+       #               x5 = x5_stack
+       movq    160(%rsp),%r15
+       # a = x12 + x0
+       lea     (%r14,%rdx),%rbp
+       # (uint32) a <<<= 7
+       rol     $7,%ebp
+       # x4 ^= a
+       xor     %rbp,%r9
+       #               b = x1 + x5
+       lea     (%rdi,%r15),%rbp
+       #               (uint32) b <<<= 7
+       rol     $7,%ebp
+       #               x9 ^= b
+       xor     %rbp,%r10
+       # a = x0 + x4
+       lea     (%rdx,%r9),%rbp
+       # (uint32) a <<<= 9
+       rol     $9,%ebp
+       # x8 ^= a
+       xor     %rbp,%r11
+       #               b = x5 + x9
+       lea     (%r15,%r10),%rbp
+       #               (uint32) b <<<= 9
+       rol     $9,%ebp
+       #               x13 ^= b
+       xor     %rbp,%r13
+       # a = x4 + x8
+       lea     (%r9,%r11),%rbp
+       # (uint32) a <<<= 13
+       rol     $13,%ebp
+       # x12 ^= a
+       xor     %rbp,%r14
+       #               b = x9 + x13
+       lea     (%r10,%r13),%rbp
+       #               (uint32) b <<<= 13
+       rol     $13,%ebp
+       #               x1 ^= b
+       xor     %rbp,%rdi
+       # a = x8 + x12
+       lea     (%r11,%r14),%rbp
+       # (uint32) a <<<= 18
+       rol     $18,%ebp
+       # x0 ^= a
+       xor     %rbp,%rdx
+       #               b = x13 + x1
+       lea     (%r13,%rdi),%rbp
+       #               (uint32) b <<<= 18
+       rol     $18,%ebp
+       #               x5 ^= b
+       xor     %rbp,%r15
+       #                               x10 = x10_stack
+       movq    168(%rsp),%rbp
+       #               x5_stack = x5
+       movq    %r15,160(%rsp)
+       #                               c = x6 + x10
+       lea     (%rax,%rbp),%r15
+       #                               (uint32) c <<<= 7
+       rol     $7,%r15d
+       #                               x14 ^= c
+       xor     %r15,%rbx
+       #                               c = x10 + x14
+       lea     (%rbp,%rbx),%r15
+       #                               (uint32) c <<<= 9
+       rol     $9,%r15d
+       #                               x2 ^= c
+       xor     %r15,%rcx
+       #                               c = x14 + x2
+       lea     (%rbx,%rcx),%r15
+       #                               (uint32) c <<<= 13
+       rol     $13,%r15d
+       #                               x6 ^= c
+       xor     %r15,%rax
+       #                               c = x2 + x6
+       lea     (%rcx,%rax),%r15
+       #                               (uint32) c <<<= 18
+       rol     $18,%r15d
+       #                               x10 ^= c
+       xor     %r15,%rbp
+       #                                               x15 = x15_stack
+       movq    176(%rsp),%r15
+       #                               x10_stack = x10
+       movq    %rbp,168(%rsp)
+       #                                               d = x11 + x15
+       lea     (%r12,%r15),%rbp
+       #                                               (uint32) d <<<= 7
+       rol     $7,%ebp
+       #                                               x3 ^= d
+       xor     %rbp,%rsi
+       #                                               d = x15 + x3
+       lea     (%r15,%rsi),%rbp
+       #                                               (uint32) d <<<= 9
+       rol     $9,%ebp
+       #                                               x7 ^= d
+       xor     %rbp,%r8
+       #                                               d = x3 + x7
+       lea     (%rsi,%r8),%rbp
+       #                                               (uint32) d <<<= 13
+       rol     $13,%ebp
+       #                                               x11 ^= d
+       xor     %rbp,%r12
+       #                                               d = x7 + x11
+       lea     (%r8,%r12),%rbp
+       #                                               (uint32) d <<<= 18
+       rol     $18,%ebp
+       #                                               x15 ^= d
+       xor     %rbp,%r15
+       #                                               x15_stack = x15
+       movq    %r15,176(%rsp)
+       #               x5 = x5_stack
+       movq    160(%rsp),%r15
+       # a = x3 + x0
+       lea     (%rsi,%rdx),%rbp
+       # (uint32) a <<<= 7
+       rol     $7,%ebp
+       # x1 ^= a
+       xor     %rbp,%rdi
+       #               b = x4 + x5
+       lea     (%r9,%r15),%rbp
+       #               (uint32) b <<<= 7
+       rol     $7,%ebp
+       #               x6 ^= b
+       xor     %rbp,%rax
+       # a = x0 + x1
+       lea     (%rdx,%rdi),%rbp
+       # (uint32) a <<<= 9
+       rol     $9,%ebp
+       # x2 ^= a
+       xor     %rbp,%rcx
+       #               b = x5 + x6
+       lea     (%r15,%rax),%rbp
+       #               (uint32) b <<<= 9
+       rol     $9,%ebp
+       #               x7 ^= b
+       xor     %rbp,%r8
+       # a = x1 + x2
+       lea     (%rdi,%rcx),%rbp
+       # (uint32) a <<<= 13
+       rol     $13,%ebp
+       # x3 ^= a
+       xor     %rbp,%rsi
+       #               b = x6 + x7
+       lea     (%rax,%r8),%rbp
+       #               (uint32) b <<<= 13
+       rol     $13,%ebp
+       #               x4 ^= b
+       xor     %rbp,%r9
+       # a = x2 + x3
+       lea     (%rcx,%rsi),%rbp
+       # (uint32) a <<<= 18
+       rol     $18,%ebp
+       # x0 ^= a
+       xor     %rbp,%rdx
+       #               b = x7 + x4
+       lea     (%r8,%r9),%rbp
+       #               (uint32) b <<<= 18
+       rol     $18,%ebp
+       #               x5 ^= b
+       xor     %rbp,%r15
+       #                               x10 = x10_stack
+       movq    168(%rsp),%rbp
+       #               x5_stack = x5
+       movq    %r15,160(%rsp)
+       #                               c = x9 + x10
+       lea     (%r10,%rbp),%r15
+       #                               (uint32) c <<<= 7
+       rol     $7,%r15d
+       #                               x11 ^= c
+       xor     %r15,%r12
+       #                               c = x10 + x11
+       lea     (%rbp,%r12),%r15
+       #                               (uint32) c <<<= 9
+       rol     $9,%r15d
+       #                               x8 ^= c
+       xor     %r15,%r11
+       #                               c = x11 + x8
+       lea     (%r12,%r11),%r15
+       #                               (uint32) c <<<= 13
+       rol     $13,%r15d
+       #                               x9 ^= c
+       xor     %r15,%r10
+       #                               c = x8 + x9
+       lea     (%r11,%r10),%r15
+       #                               (uint32) c <<<= 18
+       rol     $18,%r15d
+       #                               x10 ^= c
+       xor     %r15,%rbp
+       #                                               x15 = x15_stack
+       movq    176(%rsp),%r15
+       #                               x10_stack = x10
+       movq    %rbp,168(%rsp)
+       #                                               d = x14 + x15
+       lea     (%rbx,%r15),%rbp
+       #                                               (uint32) d <<<= 7
+       rol     $7,%ebp
+       #                                               x12 ^= d
+       xor     %rbp,%r14
+       #                                               d = x15 + x12
+       lea     (%r15,%r14),%rbp
+       #                                               (uint32) d <<<= 9
+       rol     $9,%ebp
+       #                                               x13 ^= d
+       xor     %rbp,%r13
+       #                                               d = x12 + x13
+       lea     (%r14,%r13),%rbp
+       #                                               (uint32) d <<<= 13
+       rol     $13,%ebp
+       #                                               x14 ^= d
+       xor     %rbp,%rbx
+       #                                               d = x13 + x14
+       lea     (%r13,%rbx),%rbp
+       #                                               (uint32) d <<<= 18
+       rol     $18,%ebp
+       #                                               x15 ^= d
+       xor     %rbp,%r15
+       #                                               x15_stack = x15
+       movq    %r15,176(%rsp)
+       #   i = i_backup
+       movq    184(%rsp),%r15
+       #                  unsigned>? i -= 4
+       sub     $4,%r15
+       # comment:fp stack unchanged by jump
+       # goto mainloop if unsigned>
+       ja      ._mainloop
+       #   (uint32) x2 += j2
+       addl    64(%rsp),%ecx
+       #   x3 <<= 32
+       shl     $32,%rsi
+       #   x3 += j2
+       addq    64(%rsp),%rsi
+       #   (uint64) x3 >>= 32
+       shr     $32,%rsi
+       #   x3 <<= 32
+       shl     $32,%rsi
+       #   x2 += x3
+       add     %rsi,%rcx
+       #   (uint32) x6 += j6
+       addl    80(%rsp),%eax
+       #   x7 <<= 32
+       shl     $32,%r8
+       #   x7 += j6
+       addq    80(%rsp),%r8
+       #   (uint64) x7 >>= 32
+       shr     $32,%r8
+       #   x7 <<= 32
+       shl     $32,%r8
+       #   x6 += x7
+       add     %r8,%rax
+       #   (uint32) x8 += j8
+       addl    88(%rsp),%r11d
+       #   x9 <<= 32
+       shl     $32,%r10
+       #   x9 += j8
+       addq    88(%rsp),%r10
+       #   (uint64) x9 >>= 32
+       shr     $32,%r10
+       #   x9 <<= 32
+       shl     $32,%r10
+       #   x8 += x9
+       add     %r10,%r11
+       #   (uint32) x12 += j12
+       addl    104(%rsp),%r14d
+       #   x13 <<= 32
+       shl     $32,%r13
+       #   x13 += j12
+       addq    104(%rsp),%r13
+       #   (uint64) x13 >>= 32
+       shr     $32,%r13
+       #   x13 <<= 32
+       shl     $32,%r13
+       #   x12 += x13
+       add     %r13,%r14
+       #   (uint32) x0 += j0
+       addl    56(%rsp),%edx
+       #   x1 <<= 32
+       shl     $32,%rdi
+       #   x1 += j0
+       addq    56(%rsp),%rdi
+       #   (uint64) x1 >>= 32
+       shr     $32,%rdi
+       #   x1 <<= 32
+       shl     $32,%rdi
+       #   x0 += x1
+       add     %rdi,%rdx
+       #   x5 = x5_stack
+       movq    160(%rsp),%rdi
+       #   (uint32) x4 += j4
+       addl    72(%rsp),%r9d
+       #   x5 <<= 32
+       shl     $32,%rdi
+       #   x5 += j4
+       addq    72(%rsp),%rdi
+       #   (uint64) x5 >>= 32
+       shr     $32,%rdi
+       #   x5 <<= 32
+       shl     $32,%rdi
+       #   x4 += x5
+       add     %rdi,%r9
+       #   x10 = x10_stack
+       movq    168(%rsp),%r8
+       #   (uint32) x10 += j10
+       addl    96(%rsp),%r8d
+       #   x11 <<= 32
+       shl     $32,%r12
+       #   x11 += j10
+       addq    96(%rsp),%r12
+       #   (uint64) x11 >>= 32
+       shr     $32,%r12
+       #   x11 <<= 32
+       shl     $32,%r12
+       #   x10 += x11
+       add     %r12,%r8
+       #   x15 = x15_stack
+       movq    176(%rsp),%rdi
+       #   (uint32) x14 += j14
+       addl    112(%rsp),%ebx
+       #   x15 <<= 32
+       shl     $32,%rdi
+       #   x15 += j14
+       addq    112(%rsp),%rdi
+       #   (uint64) x15 >>= 32
+       shr     $32,%rdi
+       #   x15 <<= 32
+       shl     $32,%rdi
+       #   x14 += x15
+       add     %rdi,%rbx
+       #   out = out_backup
+       movq    136(%rsp),%rdi
+       #   m = m_backup
+       movq    144(%rsp),%rsi
+       #   x0 ^= *(uint64 *) (m + 0)
+       xorq    0(%rsi),%rdx
+       #   *(uint64 *) (out + 0) = x0
+       movq    %rdx,0(%rdi)
+       #   x2 ^= *(uint64 *) (m + 8)
+       xorq    8(%rsi),%rcx
+       #   *(uint64 *) (out + 8) = x2
+       movq    %rcx,8(%rdi)
+       #   x4 ^= *(uint64 *) (m + 16)
+       xorq    16(%rsi),%r9
+       #   *(uint64 *) (out + 16) = x4
+       movq    %r9,16(%rdi)
+       #   x6 ^= *(uint64 *) (m + 24)
+       xorq    24(%rsi),%rax
+       #   *(uint64 *) (out + 24) = x6
+       movq    %rax,24(%rdi)
+       #   x8 ^= *(uint64 *) (m + 32)
+       xorq    32(%rsi),%r11
+       #   *(uint64 *) (out + 32) = x8
+       movq    %r11,32(%rdi)
+       #   x10 ^= *(uint64 *) (m + 40)
+       xorq    40(%rsi),%r8
+       #   *(uint64 *) (out + 40) = x10
+       movq    %r8,40(%rdi)
+       #   x12 ^= *(uint64 *) (m + 48)
+       xorq    48(%rsi),%r14
+       #   *(uint64 *) (out + 48) = x12
+       movq    %r14,48(%rdi)
+       #   x14 ^= *(uint64 *) (m + 56)
+       xorq    56(%rsi),%rbx
+       #   *(uint64 *) (out + 56) = x14
+       movq    %rbx,56(%rdi)
+       #   bytes = bytes_backup
+       movq    152(%rsp),%rdx
+       #   in8 = j8
+       movq    88(%rsp),%rcx
+       #   in8 += 1
+       add     $1,%rcx
+       #   j8 = in8
+       movq    %rcx,88(%rsp)
+       #                          unsigned>? unsigned<? bytes - 64
+       cmp     $64,%rdx
+       # comment:fp stack unchanged by jump
+       #   goto bytesatleast65 if unsigned>
+       ja      ._bytesatleast65
+       # comment:fp stack unchanged by jump
+       #     goto bytesatleast64 if !unsigned<
+       jae     ._bytesatleast64
+       #       m = out
+       mov     %rdi,%rsi
+       #       out = ctarget
+       movq    128(%rsp),%rdi
+       #       i = bytes
+       mov     %rdx,%rcx
+       #       while (i) { *out++ = *m++; --i }
+       rep     movsb
+       # comment:fp stack unchanged by fallthrough
+#     bytesatleast64:
+._bytesatleast64:
+       #     x = x_backup
+       movq    120(%rsp),%rdi
+       #     in8 = j8
+       movq    88(%rsp),%rsi
+       #     *(uint64 *) (x + 32) = in8
+       movq    %rsi,32(%rdi)
+       #     r11 = r11_stack
+       movq    0(%rsp),%r11
+       #     r12 = r12_stack
+       movq    8(%rsp),%r12
+       #     r13 = r13_stack
+       movq    16(%rsp),%r13
+       #     r14 = r14_stack
+       movq    24(%rsp),%r14
+       #     r15 = r15_stack
+       movq    32(%rsp),%r15
+       #     rbx = rbx_stack
+       movq    40(%rsp),%rbx
+       #     rbp = rbp_stack
+       movq    48(%rsp),%rbp
+       # comment:fp stack unchanged by fallthrough
+#     done:
+._done:
+       #     leave
+       add     %r11,%rsp
+       mov     %rdi,%rax
+       mov     %rsi,%rdx
+       ret
+#   bytesatleast65:
+._bytesatleast65:
+       #   bytes -= 64
+       sub     $64,%rdx
+       #   out += 64
+       add     $64,%rdi
+       #   m += 64
+       add     $64,%rsi
+       # comment:fp stack unchanged by jump
+       # goto bytesatleast1
+       jmp     ._bytesatleast1
+# enter ECRYPT_keysetup
+.text
+.p2align 5
+.globl ECRYPT_keysetup
+ECRYPT_keysetup:
+       mov     %rsp,%r11
+       and     $31,%r11
+       add     $256,%r11
+       sub     %r11,%rsp
+       #   k = arg2
+       mov     %rsi,%rsi
+       #   kbits = arg3
+       mov     %rdx,%rdx
+       #   x = arg1
+       mov     %rdi,%rdi
+       #   in0 = *(uint64 *) (k + 0)
+       movq    0(%rsi),%r8
+       #   in2 = *(uint64 *) (k + 8)
+       movq    8(%rsi),%r9
+       #   *(uint64 *) (x + 4) = in0
+       movq    %r8,4(%rdi)
+       #   *(uint64 *) (x + 12) = in2
+       movq    %r9,12(%rdi)
+       #                    unsigned<? kbits - 256
+       cmp     $256,%rdx
+       # comment:fp stack unchanged by jump
+       #   goto kbits128 if unsigned<
+       jb      ._kbits128
+#   kbits256:
+._kbits256:
+       #     in10 = *(uint64 *) (k + 16)
+       movq    16(%rsi),%rdx
+       #     in12 = *(uint64 *) (k + 24)
+       movq    24(%rsi),%rsi
+       #     *(uint64 *) (x + 44) = in10
+       movq    %rdx,44(%rdi)
+       #     *(uint64 *) (x + 52) = in12
+       movq    %rsi,52(%rdi)
+       #     in0 = 1634760805
+       mov     $1634760805,%rsi
+       #     in4 = 857760878
+       mov     $857760878,%rdx
+       #     in10 = 2036477234
+       mov     $2036477234,%rcx
+       #     in14 = 1797285236
+       mov     $1797285236,%r8
+       #     *(uint32 *) (x + 0) = in0
+       movl    %esi,0(%rdi)
+       #     *(uint32 *) (x + 20) = in4
+       movl    %edx,20(%rdi)
+       #     *(uint32 *) (x + 40) = in10
+       movl    %ecx,40(%rdi)
+       #     *(uint32 *) (x + 60) = in14
+       movl    %r8d,60(%rdi)
+       # comment:fp stack unchanged by jump
+       #   goto keysetupdone
+       jmp     ._keysetupdone
+#   kbits128:
+._kbits128:
+       #     in10 = *(uint64 *) (k + 0)
+       movq    0(%rsi),%rdx
+       #     in12 = *(uint64 *) (k + 8)
+       movq    8(%rsi),%rsi
+       #     *(uint64 *) (x + 44) = in10
+       movq    %rdx,44(%rdi)
+       #     *(uint64 *) (x + 52) = in12
+       movq    %rsi,52(%rdi)
+       #     in0 = 1634760805
+       mov     $1634760805,%rsi
+       #     in4 = 824206446
+       mov     $824206446,%rdx
+       #     in10 = 2036477238
+       mov     $2036477238,%rcx
+       #     in14 = 1797285236
+       mov     $1797285236,%r8
+       #     *(uint32 *) (x + 0) = in0
+       movl    %esi,0(%rdi)
+       #     *(uint32 *) (x + 20) = in4
+       movl    %edx,20(%rdi)
+       #     *(uint32 *) (x + 40) = in10
+       movl    %ecx,40(%rdi)
+       #     *(uint32 *) (x + 60) = in14
+       movl    %r8d,60(%rdi)
+#   keysetupdone:
+._keysetupdone:
+       # leave
+       add     %r11,%rsp
+       mov     %rdi,%rax
+       mov     %rsi,%rdx
+       ret
+# enter ECRYPT_ivsetup
+.text
+.p2align 5
+.globl ECRYPT_ivsetup
+ECRYPT_ivsetup:
+       mov     %rsp,%r11
+       and     $31,%r11
+       add     $256,%r11
+       sub     %r11,%rsp
+       #   iv = arg2
+       mov     %rsi,%rsi
+       #   x = arg1
+       mov     %rdi,%rdi
+       #   in6 = *(uint64 *) (iv + 0)
+       movq    0(%rsi),%rsi
+       #   in8 = 0
+       mov     $0,%r8
+       #   *(uint64 *) (x + 24) = in6
+       movq    %rsi,24(%rdi)
+       #   *(uint64 *) (x + 32) = in8
+       movq    %r8,32(%rdi)
+       # leave
+       add     %r11,%rsp
+       mov     %rdi,%rax
+       mov     %rsi,%rdx
+       ret
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
new file mode 100644 (file)
index 0000000..bccb76d
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * Glue code for optimized assembly version of  Salsa20.
+ *
+ * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
+ *
+ * The assembly codes are public domain assembly codes written by Daniel. J.
+ * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
+ * and to remove extraneous comments and functions that are not needed.
+ * - i586 version, renamed as salsa20-i586-asm_32.S
+ *   available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
+ * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
+ *   available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/algapi.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+
+#define SALSA20_IV_SIZE        8U
+#define SALSA20_MIN_KEY_SIZE  16U
+#define SALSA20_MAX_KEY_SIZE  32U
+
+// use the ECRYPT_* function names
+#define salsa20_keysetup        ECRYPT_keysetup
+#define salsa20_ivsetup         ECRYPT_ivsetup
+#define salsa20_encrypt_bytes   ECRYPT_encrypt_bytes
+
+struct salsa20_ctx
+{
+       u32 input[16];
+};
+
+asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
+                                u32 keysize, u32 ivsize);
+asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
+asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
+                                     const u8 *src, u8 *dst, u32 bytes);
+
+static int setkey(struct crypto_tfm *tfm, const u8 *key,
+                 unsigned int keysize)
+{
+       struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
+       salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
+       return 0;
+}
+
+static int encrypt(struct blkcipher_desc *desc,
+                  struct scatterlist *dst, struct scatterlist *src,
+                  unsigned int nbytes)
+{
+       struct blkcipher_walk walk;
+       struct crypto_blkcipher *tfm = desc->tfm;
+       struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
+       int err;
+
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       err = blkcipher_walk_virt_block(desc, &walk, 64);
+
+       salsa20_ivsetup(ctx, walk.iv);
+
+       if (likely(walk.nbytes == nbytes))
+       {
+               salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
+                                     walk.dst.virt.addr, nbytes);
+               return blkcipher_walk_done(desc, &walk, 0);
+       }
+
+       while (walk.nbytes >= 64) {
+               salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
+                                     walk.dst.virt.addr,
+                                     walk.nbytes - (walk.nbytes % 64));
+               err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
+       }
+
+       if (walk.nbytes) {
+               salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
+                                     walk.dst.virt.addr, walk.nbytes);
+               err = blkcipher_walk_done(desc, &walk, 0);
+       }
+
+       return err;
+}
+
+static struct crypto_alg alg = {
+       .cra_name           =   "salsa20",
+       .cra_driver_name    =   "salsa20-asm",
+       .cra_priority       =   200,
+       .cra_flags          =   CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_type           =   &crypto_blkcipher_type,
+       .cra_blocksize      =   1,
+       .cra_ctxsize        =   sizeof(struct salsa20_ctx),
+       .cra_alignmask      =   3,
+       .cra_module         =   THIS_MODULE,
+       .cra_list           =   LIST_HEAD_INIT(alg.cra_list),
+       .cra_u              =   {
+               .blkcipher = {
+                       .setkey         =   setkey,
+                       .encrypt        =   encrypt,
+                       .decrypt        =   encrypt,
+                       .min_keysize    =   SALSA20_MIN_KEY_SIZE,
+                       .max_keysize    =   SALSA20_MAX_KEY_SIZE,
+                       .ivsize         =   SALSA20_IV_SIZE,
+               }
+       }
+};
+
+static int __init init(void)
+{
+       return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+       crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
+MODULE_ALIAS("salsa20");
+MODULE_ALIAS("salsa20-asm");
diff --git a/arch/x86/crypto/twofish_64.c b/arch/x86/crypto/twofish_64.c
deleted file mode 100644 (file)
index 182d91d..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Glue Code for optimized x86_64 assembler version of TWOFISH
- *
- * Originally Twofish for GPG
- * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
- * 256-bit key length added March 20, 1999
- * Some modifications to reduce the text size by Werner Koch, April, 1998
- * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
- * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
- *
- * The original author has disclaimed all copyright interest in this
- * code and thus put it in the public domain. The subsequent authors
- * have put this under the GNU General Public License.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- * This code is a "clean room" implementation, written from the paper
- * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
- * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
- * through http://www.counterpane.com/twofish.html
- *
- * For background information on multiplication in finite fields, used for
- * the matrix operations in the key schedule, see the book _Contemporary
- * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
- * Third Edition.
- */
-
-#include <crypto/twofish.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       twofish_enc_blk(tfm, dst, src);
-}
-
-static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       twofish_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg alg = {
-       .cra_name               =       "twofish",
-       .cra_driver_name        =       "twofish-x86_64",
-       .cra_priority           =       200,
-       .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
-       .cra_blocksize          =       TF_BLOCK_SIZE,
-       .cra_ctxsize            =       sizeof(struct twofish_ctx),
-       .cra_alignmask          =       3,
-       .cra_module             =       THIS_MODULE,
-       .cra_list               =       LIST_HEAD_INIT(alg.cra_list),
-       .cra_u                  =       {
-               .cipher = {
-                       .cia_min_keysize        =       TF_MIN_KEY_SIZE,
-                       .cia_max_keysize        =       TF_MAX_KEY_SIZE,
-                       .cia_setkey             =       twofish_setkey,
-                       .cia_encrypt            =       twofish_encrypt,
-                       .cia_decrypt            =       twofish_decrypt
-               }
-       }
-};
-
-static int __init init(void)
-{
-       return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
-       crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized");
-MODULE_ALIAS("twofish");
similarity index 94%
rename from arch/x86/crypto/twofish_32.c
rename to arch/x86/crypto/twofish_glue.c
index e3004dfe9c7abdeba724d84ca99973e8d26fa78b..cefaf8b9aa18142ff6bb6586f572a6209519d86f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  Glue Code for optimized 586 assembler version of TWOFISH
+ * Glue Code for assembler optimized version of TWOFISH
  *
  * Originally Twofish for GPG
  * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
@@ -44,7 +44,6 @@
 #include <linux/module.h>
 #include <linux/types.h>
 
-
 asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 
@@ -60,7 +59,7 @@ static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 
 static struct crypto_alg alg = {
        .cra_name               =       "twofish",
-       .cra_driver_name        =       "twofish-i586",
+       .cra_driver_name        =       "twofish-asm",
        .cra_priority           =       200,
        .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
        .cra_blocksize          =       TF_BLOCK_SIZE,
@@ -93,5 +92,6 @@ module_init(init);
 module_exit(fini);
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
+MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized");
 MODULE_ALIAS("twofish");
+MODULE_ALIAS("twofish-asm");
index 083d2e1dfc21640e67f6e6f85671323b2f954a97..c3166a1a5bb6da630f7861581b96a6a5f5a19073 100644 (file)
@@ -24,10 +24,6 @@ config CRYPTO_ALGAPI
        help
          This option provides the API for cryptographic algorithms.
 
-config CRYPTO_ABLKCIPHER
-       tristate
-       select CRYPTO_BLKCIPHER
-
 config CRYPTO_AEAD
        tristate
        select CRYPTO_ALGAPI
@@ -36,6 +32,15 @@ config CRYPTO_BLKCIPHER
        tristate
        select CRYPTO_ALGAPI
 
+config CRYPTO_SEQIV
+       tristate "Sequence Number IV Generator"
+       select CRYPTO_AEAD
+       select CRYPTO_BLKCIPHER
+       help
+         This IV generator generates an IV based on a sequence number by
+         xoring it with a salt.  This algorithm is mainly useful for CTR
+         and similar modes.
+
 config CRYPTO_HASH
        tristate
        select CRYPTO_ALGAPI
@@ -91,7 +96,7 @@ config CRYPTO_SHA1
          SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
 
 config CRYPTO_SHA256
-       tristate "SHA256 digest algorithm"
+       tristate "SHA224 and SHA256 digest algorithm"
        select CRYPTO_ALGAPI
        help
          SHA256 secure hash standard (DFIPS 180-2).
@@ -99,6 +104,9 @@ config CRYPTO_SHA256
          This version of SHA implements a 256 bit hash with 128 bits of
          security against collision attacks.
 
+          This code also includes SHA-224, a 224 bit hash with 112 bits
+          of security against collision attacks.
+
 config CRYPTO_SHA512
        tristate "SHA384 and SHA512 digest algorithms"
        select CRYPTO_ALGAPI
@@ -195,9 +203,34 @@ config CRYPTO_XTS
          key size 256, 384 or 512 bits. This implementation currently
          can't handle a sectorsize which is not a multiple of 16 bytes.
 
+config CRYPTO_CTR
+       tristate "CTR support"
+       select CRYPTO_BLKCIPHER
+       select CRYPTO_SEQIV
+       select CRYPTO_MANAGER
+       help
+         CTR: Counter mode
+         This block cipher algorithm is required for IPSec.
+
+config CRYPTO_GCM
+       tristate "GCM/GMAC support"
+       select CRYPTO_CTR
+       select CRYPTO_AEAD
+       select CRYPTO_GF128MUL
+       help
+         Support for Galois/Counter Mode (GCM) and Galois Message
+         Authentication Code (GMAC). Required for IPSec.
+
+config CRYPTO_CCM
+       tristate "CCM support"
+       select CRYPTO_CTR
+       select CRYPTO_AEAD
+       help
+         Support for Counter with CBC MAC. Required for IPsec.
+
 config CRYPTO_CRYPTD
        tristate "Software async crypto daemon"
-       select CRYPTO_ABLKCIPHER
+       select CRYPTO_BLKCIPHER
        select CRYPTO_MANAGER
        help
          This is a generic software asynchronous crypto daemon that
@@ -320,6 +353,7 @@ config CRYPTO_AES_586
        tristate "AES cipher algorithms (i586)"
        depends on (X86 || UML_X86) && !64BIT
        select CRYPTO_ALGAPI
+       select CRYPTO_AES
        help
          AES cipher algorithms (FIPS-197). AES uses the Rijndael 
          algorithm.
@@ -341,6 +375,7 @@ config CRYPTO_AES_X86_64
        tristate "AES cipher algorithms (x86_64)"
        depends on (X86 || UML_X86) && 64BIT
        select CRYPTO_ALGAPI
+       select CRYPTO_AES
        help
          AES cipher algorithms (FIPS-197). AES uses the Rijndael 
          algorithm.
@@ -441,6 +476,46 @@ config CRYPTO_SEED
          See also:
          <http://www.kisa.or.kr/kisa/seed/jsp/seed_eng.jsp>
 
+config CRYPTO_SALSA20
+       tristate "Salsa20 stream cipher algorithm (EXPERIMENTAL)"
+       depends on EXPERIMENTAL
+       select CRYPTO_BLKCIPHER
+       help
+         Salsa20 stream cipher algorithm.
+
+         Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
+         Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
+
+         The Salsa20 stream cipher algorithm is designed by Daniel J.
+         Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
+
+config CRYPTO_SALSA20_586
+       tristate "Salsa20 stream cipher algorithm (i586) (EXPERIMENTAL)"
+       depends on (X86 || UML_X86) && !64BIT
+       depends on EXPERIMENTAL
+       select CRYPTO_BLKCIPHER
+       help
+         Salsa20 stream cipher algorithm.
+
+         Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
+         Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
+
+         The Salsa20 stream cipher algorithm is designed by Daniel J.
+         Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
+
+config CRYPTO_SALSA20_X86_64
+       tristate "Salsa20 stream cipher algorithm (x86_64) (EXPERIMENTAL)"
+       depends on (X86 || UML_X86) && 64BIT
+       depends on EXPERIMENTAL
+       select CRYPTO_BLKCIPHER
+       help
+         Salsa20 stream cipher algorithm.
+
+         Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
+         Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
+
+         The Salsa20 stream cipher algorithm is designed by Daniel J.
+         Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
 
 config CRYPTO_DEFLATE
        tristate "Deflate compression algorithm"
@@ -491,6 +566,7 @@ config CRYPTO_TEST
        tristate "Testing module"
        depends on m
        select CRYPTO_ALGAPI
+       select CRYPTO_AEAD
        help
          Quick & dirty crypto test module.
 
@@ -498,10 +574,19 @@ config CRYPTO_AUTHENC
        tristate "Authenc support"
        select CRYPTO_AEAD
        select CRYPTO_MANAGER
+       select CRYPTO_HASH
        help
          Authenc: Combined mode wrapper for IPsec.
          This is required for IPSec.
 
+config CRYPTO_LZO
+       tristate "LZO compression algorithm"
+       select CRYPTO_ALGAPI
+       select LZO_COMPRESS
+       select LZO_DECOMPRESS
+       help
+         This is the LZO algorithm.
+
 source "drivers/crypto/Kconfig"
 
 endif  # if CRYPTO
index 43c2a0dc99365faf07d960dfe6ffbf4cc1c816b1..48c7583799541045751655aa6de6af80fed70ae9 100644 (file)
@@ -8,9 +8,14 @@ crypto_algapi-$(CONFIG_PROC_FS) += proc.o
 crypto_algapi-objs := algapi.o scatterwalk.o $(crypto_algapi-y)
 obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o
 
-obj-$(CONFIG_CRYPTO_ABLKCIPHER) += ablkcipher.o
 obj-$(CONFIG_CRYPTO_AEAD) += aead.o
-obj-$(CONFIG_CRYPTO_BLKCIPHER) += blkcipher.o
+
+crypto_blkcipher-objs := ablkcipher.o
+crypto_blkcipher-objs += blkcipher.o
+obj-$(CONFIG_CRYPTO_BLKCIPHER) += crypto_blkcipher.o
+obj-$(CONFIG_CRYPTO_BLKCIPHER) += chainiv.o
+obj-$(CONFIG_CRYPTO_BLKCIPHER) += eseqiv.o
+obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o
 
 crypto_hash-objs := hash.o
 obj-$(CONFIG_CRYPTO_HASH) += crypto_hash.o
@@ -32,6 +37,9 @@ obj-$(CONFIG_CRYPTO_CBC) += cbc.o
 obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
 obj-$(CONFIG_CRYPTO_LRW) += lrw.o
 obj-$(CONFIG_CRYPTO_XTS) += xts.o
+obj-$(CONFIG_CRYPTO_CTR) += ctr.o
+obj-$(CONFIG_CRYPTO_GCM) += gcm.o
+obj-$(CONFIG_CRYPTO_CCM) += ccm.o
 obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
 obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
@@ -48,10 +56,12 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o
 obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
 obj-$(CONFIG_CRYPTO_SEED) += seed.o
+obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
 obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o
+obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 
 obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
 
index 2731acb86e7d53755bbfd3d7cdc98926844ca887..3bcb099b4a85bd779d752f7d42868c48cb07d87a 100644 (file)
  *
  */
 
-#include <crypto/algapi.h>
-#include <linux/errno.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 
+#include "internal.h"
+
 static int setkey_unaligned(struct crypto_ablkcipher *tfm, const u8 *key,
                            unsigned int keylen)
 {
@@ -66,6 +70,16 @@ static unsigned int crypto_ablkcipher_ctxsize(struct crypto_alg *alg, u32 type,
        return alg->cra_ctxsize;
 }
 
+int skcipher_null_givencrypt(struct skcipher_givcrypt_request *req)
+{
+       return crypto_ablkcipher_encrypt(&req->creq);
+}
+
+int skcipher_null_givdecrypt(struct skcipher_givcrypt_request *req)
+{
+       return crypto_ablkcipher_decrypt(&req->creq);
+}
+
 static int crypto_init_ablkcipher_ops(struct crypto_tfm *tfm, u32 type,
                                      u32 mask)
 {
@@ -78,6 +92,11 @@ static int crypto_init_ablkcipher_ops(struct crypto_tfm *tfm, u32 type,
        crt->setkey = setkey;
        crt->encrypt = alg->encrypt;
        crt->decrypt = alg->decrypt;
+       if (!alg->ivsize) {
+               crt->givencrypt = skcipher_null_givencrypt;
+               crt->givdecrypt = skcipher_null_givdecrypt;
+       }
+       crt->base = __crypto_ablkcipher_cast(tfm);
        crt->ivsize = alg->ivsize;
 
        return 0;
@@ -90,10 +109,13 @@ static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg)
        struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;
 
        seq_printf(m, "type         : ablkcipher\n");
+       seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
+                                            "yes" : "no");
        seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
        seq_printf(m, "min keysize  : %u\n", ablkcipher->min_keysize);
        seq_printf(m, "max keysize  : %u\n", ablkcipher->max_keysize);
        seq_printf(m, "ivsize       : %u\n", ablkcipher->ivsize);
+       seq_printf(m, "geniv        : %s\n", ablkcipher->geniv ?: "<default>");
 }
 
 const struct crypto_type crypto_ablkcipher_type = {
@@ -105,5 +127,220 @@ const struct crypto_type crypto_ablkcipher_type = {
 };
 EXPORT_SYMBOL_GPL(crypto_ablkcipher_type);
 
+static int no_givdecrypt(struct skcipher_givcrypt_request *req)
+{
+       return -ENOSYS;
+}
+
+static int crypto_init_givcipher_ops(struct crypto_tfm *tfm, u32 type,
+                                     u32 mask)
+{
+       struct ablkcipher_alg *alg = &tfm->__crt_alg->cra_ablkcipher;
+       struct ablkcipher_tfm *crt = &tfm->crt_ablkcipher;
+
+       if (alg->ivsize > PAGE_SIZE / 8)
+               return -EINVAL;
+
+       crt->setkey = tfm->__crt_alg->cra_flags & CRYPTO_ALG_GENIV ?
+                     alg->setkey : setkey;
+       crt->encrypt = alg->encrypt;
+       crt->decrypt = alg->decrypt;
+       crt->givencrypt = alg->givencrypt;
+       crt->givdecrypt = alg->givdecrypt ?: no_givdecrypt;
+       crt->base = __crypto_ablkcipher_cast(tfm);
+       crt->ivsize = alg->ivsize;
+
+       return 0;
+}
+
+static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
+       __attribute__ ((unused));
+static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
+{
+       struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;
+
+       seq_printf(m, "type         : givcipher\n");
+       seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
+                                            "yes" : "no");
+       seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+       seq_printf(m, "min keysize  : %u\n", ablkcipher->min_keysize);
+       seq_printf(m, "max keysize  : %u\n", ablkcipher->max_keysize);
+       seq_printf(m, "ivsize       : %u\n", ablkcipher->ivsize);
+       seq_printf(m, "geniv        : %s\n", ablkcipher->geniv ?: "<built-in>");
+}
+
+const struct crypto_type crypto_givcipher_type = {
+       .ctxsize = crypto_ablkcipher_ctxsize,
+       .init = crypto_init_givcipher_ops,
+#ifdef CONFIG_PROC_FS
+       .show = crypto_givcipher_show,
+#endif
+};
+EXPORT_SYMBOL_GPL(crypto_givcipher_type);
+
+const char *crypto_default_geniv(const struct crypto_alg *alg)
+{
+       return alg->cra_flags & CRYPTO_ALG_ASYNC ? "eseqiv" : "chainiv";
+}
+
+static int crypto_givcipher_default(struct crypto_alg *alg, u32 type, u32 mask)
+{
+       struct rtattr *tb[3];
+       struct {
+               struct rtattr attr;
+               struct crypto_attr_type data;
+       } ptype;
+       struct {
+               struct rtattr attr;
+               struct crypto_attr_alg data;
+       } palg;
+       struct crypto_template *tmpl;
+       struct crypto_instance *inst;
+       struct crypto_alg *larval;
+       const char *geniv;
+       int err;
+
+       larval = crypto_larval_lookup(alg->cra_driver_name,
+                                     CRYPTO_ALG_TYPE_GIVCIPHER,
+                                     CRYPTO_ALG_TYPE_MASK);
+       err = PTR_ERR(larval);
+       if (IS_ERR(larval))
+               goto out;
+
+       err = -EAGAIN;
+       if (!crypto_is_larval(larval))
+               goto drop_larval;
+
+       ptype.attr.rta_len = sizeof(ptype);
+       ptype.attr.rta_type = CRYPTOA_TYPE;
+       ptype.data.type = type | CRYPTO_ALG_GENIV;
+       /* GENIV tells the template that we're making a default geniv. */
+       ptype.data.mask = mask | CRYPTO_ALG_GENIV;
+       tb[0] = &ptype.attr;
+
+       palg.attr.rta_len = sizeof(palg);
+       palg.attr.rta_type = CRYPTOA_ALG;
+       /* Must use the exact name to locate ourselves. */
+       memcpy(palg.data.name, alg->cra_driver_name, CRYPTO_MAX_ALG_NAME);
+       tb[1] = &palg.attr;
+
+       tb[2] = NULL;
+
+       if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+           CRYPTO_ALG_TYPE_BLKCIPHER)
+               geniv = alg->cra_blkcipher.geniv;
+       else
+               geniv = alg->cra_ablkcipher.geniv;
+
+       if (!geniv)
+               geniv = crypto_default_geniv(alg);
+
+       tmpl = crypto_lookup_template(geniv);
+       err = -ENOENT;
+       if (!tmpl)
+               goto kill_larval;
+
+       inst = tmpl->alloc(tb);
+       err = PTR_ERR(inst);
+       if (IS_ERR(inst))
+               goto put_tmpl;
+
+       if ((err = crypto_register_instance(tmpl, inst))) {
+               tmpl->free(inst);
+               goto put_tmpl;
+       }
+
+       /* Redo the lookup to use the instance we just registered. */
+       err = -EAGAIN;
+
+put_tmpl:
+       crypto_tmpl_put(tmpl);
+kill_larval:
+       crypto_larval_kill(larval);
+drop_larval:
+       crypto_mod_put(larval);
+out:
+       crypto_mod_put(alg);
+       return err;
+}
+
+static struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type,
+                                                u32 mask)
+{
+       struct crypto_alg *alg;
+
+       alg = crypto_alg_mod_lookup(name, type, mask);
+       if (IS_ERR(alg))
+               return alg;
+
+       if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+           CRYPTO_ALG_TYPE_GIVCIPHER)
+               return alg;
+
+       if (!((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+             CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize :
+                                         alg->cra_ablkcipher.ivsize))
+               return alg;
+
+       return ERR_PTR(crypto_givcipher_default(alg, type, mask));
+}
+
+int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name,
+                        u32 type, u32 mask)
+{
+       struct crypto_alg *alg;
+       int err;
+
+       type = crypto_skcipher_type(type);
+       mask = crypto_skcipher_mask(mask);
+
+       alg = crypto_lookup_skcipher(name, type, mask);
+       if (IS_ERR(alg))
+               return PTR_ERR(alg);
+
+       err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
+       crypto_mod_put(alg);
+       return err;
+}
+EXPORT_SYMBOL_GPL(crypto_grab_skcipher);
+
+struct crypto_ablkcipher *crypto_alloc_ablkcipher(const char *alg_name,
+                                                 u32 type, u32 mask)
+{
+       struct crypto_tfm *tfm;
+       int err;
+
+       type = crypto_skcipher_type(type);
+       mask = crypto_skcipher_mask(mask);
+
+       for (;;) {
+               struct crypto_alg *alg;
+
+               alg = crypto_lookup_skcipher(alg_name, type, mask);
+               if (IS_ERR(alg)) {
+                       err = PTR_ERR(alg);
+                       goto err;
+               }
+
+               tfm = __crypto_alloc_tfm(alg, type, mask);
+               if (!IS_ERR(tfm))
+                       return __crypto_ablkcipher_cast(tfm);
+
+               crypto_mod_put(alg);
+               err = PTR_ERR(tfm);
+
+err:
+               if (err != -EAGAIN)
+                       break;
+               if (signal_pending(current)) {
+                       err = -EINTR;
+                       break;
+               }
+       }
+
+       return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_ablkcipher);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Asynchronous block chaining cipher type");
index 84a3501fb478bf8bb9d12c2586e925c59eb46135..3a6f3f52c7c71911ac6cd7506493fb24ccd8eb50 100644 (file)
  *
  */
 
-#include <crypto/algapi.h>
-#include <linux/errno.h>
+#include <crypto/internal/aead.h>
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/rtnetlink.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 
+#include "internal.h"
+
 static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key,
                            unsigned int keylen)
 {
@@ -53,25 +56,54 @@ static int setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen)
        return aead->setkey(tfm, key, keylen);
 }
 
+int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+       struct aead_tfm *crt = crypto_aead_crt(tfm);
+       int err;
+
+       if (authsize > crypto_aead_alg(tfm)->maxauthsize)
+               return -EINVAL;
+
+       if (crypto_aead_alg(tfm)->setauthsize) {
+               err = crypto_aead_alg(tfm)->setauthsize(crt->base, authsize);
+               if (err)
+                       return err;
+       }
+
+       crypto_aead_crt(crt->base)->authsize = authsize;
+       crt->authsize = authsize;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_aead_setauthsize);
+
 static unsigned int crypto_aead_ctxsize(struct crypto_alg *alg, u32 type,
                                        u32 mask)
 {
        return alg->cra_ctxsize;
 }
 
+static int no_givcrypt(struct aead_givcrypt_request *req)
+{
+       return -ENOSYS;
+}
+
 static int crypto_init_aead_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 {
        struct aead_alg *alg = &tfm->__crt_alg->cra_aead;
        struct aead_tfm *crt = &tfm->crt_aead;
 
-       if (max(alg->authsize, alg->ivsize) > PAGE_SIZE / 8)
+       if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8)
                return -EINVAL;
 
-       crt->setkey = setkey;
+       crt->setkey = tfm->__crt_alg->cra_flags & CRYPTO_ALG_GENIV ?
+                     alg->setkey : setkey;
        crt->encrypt = alg->encrypt;
        crt->decrypt = alg->decrypt;
+       crt->givencrypt = alg->givencrypt ?: no_givcrypt;
+       crt->givdecrypt = alg->givdecrypt ?: no_givcrypt;
+       crt->base = __crypto_aead_cast(tfm);
        crt->ivsize = alg->ivsize;
-       crt->authsize = alg->authsize;
+       crt->authsize = alg->maxauthsize;
 
        return 0;
 }
@@ -83,9 +115,12 @@ static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
        struct aead_alg *aead = &alg->cra_aead;
 
        seq_printf(m, "type         : aead\n");
+       seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
+                                            "yes" : "no");
        seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
        seq_printf(m, "ivsize       : %u\n", aead->ivsize);
-       seq_printf(m, "authsize     : %u\n", aead->authsize);
+       seq_printf(m, "maxauthsize  : %u\n", aead->maxauthsize);
+       seq_printf(m, "geniv        : %s\n", aead->geniv ?: "<built-in>");
 }
 
 const struct crypto_type crypto_aead_type = {
@@ -97,5 +132,358 @@ const struct crypto_type crypto_aead_type = {
 };
 EXPORT_SYMBOL_GPL(crypto_aead_type);
 
+static int aead_null_givencrypt(struct aead_givcrypt_request *req)
+{
+       return crypto_aead_encrypt(&req->areq);
+}
+
+static int aead_null_givdecrypt(struct aead_givcrypt_request *req)
+{
+       return crypto_aead_decrypt(&req->areq);
+}
+
+static int crypto_init_nivaead_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
+{
+       struct aead_alg *alg = &tfm->__crt_alg->cra_aead;
+       struct aead_tfm *crt = &tfm->crt_aead;
+
+       if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8)
+               return -EINVAL;
+
+       crt->setkey = setkey;
+       crt->encrypt = alg->encrypt;
+       crt->decrypt = alg->decrypt;
+       if (!alg->ivsize) {
+               crt->givencrypt = aead_null_givencrypt;
+               crt->givdecrypt = aead_null_givdecrypt;
+       }
+       crt->base = __crypto_aead_cast(tfm);
+       crt->ivsize = alg->ivsize;
+       crt->authsize = alg->maxauthsize;
+
+       return 0;
+}
+
+static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg)
+       __attribute__ ((unused));
+static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg)
+{
+       struct aead_alg *aead = &alg->cra_aead;
+
+       seq_printf(m, "type         : nivaead\n");
+       seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
+                                            "yes" : "no");
+       seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+       seq_printf(m, "ivsize       : %u\n", aead->ivsize);
+       seq_printf(m, "maxauthsize  : %u\n", aead->maxauthsize);
+       seq_printf(m, "geniv        : %s\n", aead->geniv);
+}
+
+const struct crypto_type crypto_nivaead_type = {
+       .ctxsize = crypto_aead_ctxsize,
+       .init = crypto_init_nivaead_ops,
+#ifdef CONFIG_PROC_FS
+       .show = crypto_nivaead_show,
+#endif
+};
+EXPORT_SYMBOL_GPL(crypto_nivaead_type);
+
+static int crypto_grab_nivaead(struct crypto_aead_spawn *spawn,
+                              const char *name, u32 type, u32 mask)
+{
+       struct crypto_alg *alg;
+       int err;
+
+       type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+       type |= CRYPTO_ALG_TYPE_AEAD;
+       mask |= CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV;
+
+       alg = crypto_alg_mod_lookup(name, type, mask);
+       if (IS_ERR(alg))
+               return PTR_ERR(alg);
+
+       err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
+       crypto_mod_put(alg);
+       return err;
+}
+
+struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl,
+                                        struct rtattr **tb, u32 type,
+                                        u32 mask)
+{
+       const char *name;
+       struct crypto_aead_spawn *spawn;
+       struct crypto_attr_type *algt;
+       struct crypto_instance *inst;
+       struct crypto_alg *alg;
+       int err;
+
+       algt = crypto_get_attr_type(tb);
+       err = PTR_ERR(algt);
+       if (IS_ERR(algt))
+               return ERR_PTR(err);
+
+       if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV)) &
+           algt->mask)
+               return ERR_PTR(-EINVAL);
+
+       name = crypto_attr_alg_name(tb[1]);
+       err = PTR_ERR(name);
+       if (IS_ERR(name))
+               return ERR_PTR(err);
+
+       inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+       if (!inst)
+               return ERR_PTR(-ENOMEM);
+
+       spawn = crypto_instance_ctx(inst);
+
+       /* Ignore async algorithms if necessary. */
+       mask |= crypto_requires_sync(algt->type, algt->mask);
+
+       crypto_set_aead_spawn(spawn, inst);
+       err = crypto_grab_nivaead(spawn, name, type, mask);
+       if (err)
+               goto err_free_inst;
+
+       alg = crypto_aead_spawn_alg(spawn);
+
+       err = -EINVAL;
+       if (!alg->cra_aead.ivsize)
+               goto err_drop_alg;
+
+       /*
+        * This is only true if we're constructing an algorithm with its
+        * default IV generator.  For the default generator we elide the
+        * template name and double-check the IV generator.
+        */
+       if (algt->mask & CRYPTO_ALG_GENIV) {
+               if (strcmp(tmpl->name, alg->cra_aead.geniv))
+                       goto err_drop_alg;
+
+               memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
+               memcpy(inst->alg.cra_driver_name, alg->cra_driver_name,
+                      CRYPTO_MAX_ALG_NAME);
+       } else {
+               err = -ENAMETOOLONG;
+               if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
+                            "%s(%s)", tmpl->name, alg->cra_name) >=
+                   CRYPTO_MAX_ALG_NAME)
+                       goto err_drop_alg;
+               if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+                            "%s(%s)", tmpl->name, alg->cra_driver_name) >=
+                   CRYPTO_MAX_ALG_NAME)
+                       goto err_drop_alg;
+       }
+
+       inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV;
+       inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
+       inst->alg.cra_priority = alg->cra_priority;
+       inst->alg.cra_blocksize = alg->cra_blocksize;
+       inst->alg.cra_alignmask = alg->cra_alignmask;
+       inst->alg.cra_type = &crypto_aead_type;
+
+       inst->alg.cra_aead.ivsize = alg->cra_aead.ivsize;
+       inst->alg.cra_aead.maxauthsize = alg->cra_aead.maxauthsize;
+       inst->alg.cra_aead.geniv = alg->cra_aead.geniv;
+
+       inst->alg.cra_aead.setkey = alg->cra_aead.setkey;
+       inst->alg.cra_aead.setauthsize = alg->cra_aead.setauthsize;
+       inst->alg.cra_aead.encrypt = alg->cra_aead.encrypt;
+       inst->alg.cra_aead.decrypt = alg->cra_aead.decrypt;
+
+out:
+       return inst;
+
+err_drop_alg:
+       crypto_drop_aead(spawn);
+err_free_inst:
+       kfree(inst);
+       inst = ERR_PTR(err);
+       goto out;
+}
+EXPORT_SYMBOL_GPL(aead_geniv_alloc);
+
+void aead_geniv_free(struct crypto_instance *inst)
+{
+       crypto_drop_aead(crypto_instance_ctx(inst));
+       kfree(inst);
+}
+EXPORT_SYMBOL_GPL(aead_geniv_free);
+
+int aead_geniv_init(struct crypto_tfm *tfm)
+{
+       struct crypto_instance *inst = (void *)tfm->__crt_alg;
+       struct crypto_aead *aead;
+
+       aead = crypto_spawn_aead(crypto_instance_ctx(inst));
+       if (IS_ERR(aead))
+               return PTR_ERR(aead);
+
+       tfm->crt_aead.base = aead;
+       tfm->crt_aead.reqsize += crypto_aead_reqsize(aead);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(aead_geniv_init);
+
+void aead_geniv_exit(struct crypto_tfm *tfm)
+{
+       crypto_free_aead(tfm->crt_aead.base);
+}
+EXPORT_SYMBOL_GPL(aead_geniv_exit);
+
+static int crypto_nivaead_default(struct crypto_alg *alg, u32 type, u32 mask)
+{
+       struct rtattr *tb[3];
+       struct {
+               struct rtattr attr;
+               struct crypto_attr_type data;
+       } ptype;
+       struct {
+               struct rtattr attr;
+               struct crypto_attr_alg data;
+       } palg;
+       struct crypto_template *tmpl;
+       struct crypto_instance *inst;
+       struct crypto_alg *larval;
+       const char *geniv;
+       int err;
+
+       larval = crypto_larval_lookup(alg->cra_driver_name,
+                                     CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV,
+                                     CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+       err = PTR_ERR(larval);
+       if (IS_ERR(larval))
+               goto out;
+
+       err = -EAGAIN;
+       if (!crypto_is_larval(larval))
+               goto drop_larval;
+
+       ptype.attr.rta_len = sizeof(ptype);
+       ptype.attr.rta_type = CRYPTOA_TYPE;
+       ptype.data.type = type | CRYPTO_ALG_GENIV;
+       /* GENIV tells the template that we're making a default geniv. */
+       ptype.data.mask = mask | CRYPTO_ALG_GENIV;
+       tb[0] = &ptype.attr;
+
+       palg.attr.rta_len = sizeof(palg);
+       palg.attr.rta_type = CRYPTOA_ALG;
+       /* Must use the exact name to locate ourselves. */
+       memcpy(palg.data.name, alg->cra_driver_name, CRYPTO_MAX_ALG_NAME);
+       tb[1] = &palg.attr;
+
+       tb[2] = NULL;
+
+       geniv = alg->cra_aead.geniv;
+
+       tmpl = crypto_lookup_template(geniv);
+       err = -ENOENT;
+       if (!tmpl)
+               goto kill_larval;
+
+       inst = tmpl->alloc(tb);
+       err = PTR_ERR(inst);
+       if (IS_ERR(inst))
+               goto put_tmpl;
+
+       if ((err = crypto_register_instance(tmpl, inst))) {
+               tmpl->free(inst);
+               goto put_tmpl;
+       }
+
+       /* Redo the lookup to use the instance we just registered. */
+       err = -EAGAIN;
+
+put_tmpl:
+       crypto_tmpl_put(tmpl);
+kill_larval:
+       crypto_larval_kill(larval);
+drop_larval:
+       crypto_mod_put(larval);
+out:
+       crypto_mod_put(alg);
+       return err;
+}
+
+static struct crypto_alg *crypto_lookup_aead(const char *name, u32 type,
+                                            u32 mask)
+{
+       struct crypto_alg *alg;
+
+       alg = crypto_alg_mod_lookup(name, type, mask);
+       if (IS_ERR(alg))
+               return alg;
+
+       if (alg->cra_type == &crypto_aead_type)
+               return alg;
+
+       if (!alg->cra_aead.ivsize)
+               return alg;
+
+       return ERR_PTR(crypto_nivaead_default(alg, type, mask));
+}
+
+int crypto_grab_aead(struct crypto_aead_spawn *spawn, const char *name,
+                    u32 type, u32 mask)
+{
+       struct crypto_alg *alg;
+       int err;
+
+       type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+       type |= CRYPTO_ALG_TYPE_AEAD;
+       mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+       mask |= CRYPTO_ALG_TYPE_MASK;
+
+       alg = crypto_lookup_aead(name, type, mask);
+       if (IS_ERR(alg))
+               return PTR_ERR(alg);
+
+       err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
+       crypto_mod_put(alg);
+       return err;
+}
+EXPORT_SYMBOL_GPL(crypto_grab_aead);
+
+struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask)
+{
+       struct crypto_tfm *tfm;
+       int err;
+
+       type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+       type |= CRYPTO_ALG_TYPE_AEAD;
+       mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+       mask |= CRYPTO_ALG_TYPE_MASK;
+
+       for (;;) {
+               struct crypto_alg *alg;
+
+               alg = crypto_lookup_aead(alg_name, type, mask);
+               if (IS_ERR(alg)) {
+                       err = PTR_ERR(alg);
+                       goto err;
+               }
+
+               tfm = __crypto_alloc_tfm(alg, type, mask);
+               if (!IS_ERR(tfm))
+                       return __crypto_aead_cast(tfm);
+
+               crypto_mod_put(alg);
+               err = PTR_ERR(tfm);
+
+err:
+               if (err != -EAGAIN)
+                       break;
+               if (signal_pending(current)) {
+                       err = -EINTR;
+                       break;
+               }
+       }
+
+       return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_aead);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Authenticated Encryption with Associated Data (AEAD)");
index 9401dca85e87c9a5c3908ef6cfbd78fa9a20db89..cf30af74480f4519b50e3b1fcf0e9f7ebb21ac34 100644 (file)
  * ---------------------------------------------------------------------------
  */
 
-/* Some changes from the Gladman version:
-    s/RIJNDAEL(e_key)/E_KEY/g
-    s/RIJNDAEL(d_key)/D_KEY/g
-*/
-
+#include <crypto/aes.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <asm/byteorder.h>
 
-#define AES_MIN_KEY_SIZE       16
-#define AES_MAX_KEY_SIZE       32
-
-#define AES_BLOCK_SIZE         16
-
-/*
- * #define byte(x, nr) ((unsigned char)((x) >> (nr*8))) 
- */
-static inline u8
-byte(const u32 x, const unsigned n)
+static inline u8 byte(const u32 x, const unsigned n)
 {
        return x >> (n << 3);
 }
 
-struct aes_ctx {
-       int key_length;
-       u32 buf[120];
-};
-
-#define E_KEY (&ctx->buf[0])
-#define D_KEY (&ctx->buf[60])
-
 static u8 pow_tab[256] __initdata;
 static u8 log_tab[256] __initdata;
 static u8 sbx_tab[256] __initdata;
 static u8 isb_tab[256] __initdata;
 static u32 rco_tab[10];
-static u32 ft_tab[4][256];
-static u32 it_tab[4][256];
 
-static u32 fl_tab[4][256];
-static u32 il_tab[4][256];
+u32 crypto_ft_tab[4][256];
+u32 crypto_fl_tab[4][256];
+u32 crypto_it_tab[4][256];
+u32 crypto_il_tab[4][256];
 
-static inline u8 __init
-f_mult (u8 a, u8 b)
+EXPORT_SYMBOL_GPL(crypto_ft_tab);
+EXPORT_SYMBOL_GPL(crypto_fl_tab);
+EXPORT_SYMBOL_GPL(crypto_it_tab);
+EXPORT_SYMBOL_GPL(crypto_il_tab);
+
+static inline u8 __init f_mult(u8 a, u8 b)
 {
        u8 aa = log_tab[a], cc = aa + log_tab[b];
 
        return pow_tab[cc + (cc < aa ? 1 : 0)];
 }
 
-#define ff_mult(a,b)    (a && b ? f_mult(a, b) : 0)
-
-#define f_rn(bo, bi, n, k)                                     \
-    bo[n] =  ft_tab[0][byte(bi[n],0)] ^                                \
-             ft_tab[1][byte(bi[(n + 1) & 3],1)] ^              \
-             ft_tab[2][byte(bi[(n + 2) & 3],2)] ^              \
-             ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
-
-#define i_rn(bo, bi, n, k)                                     \
-    bo[n] =  it_tab[0][byte(bi[n],0)] ^                                \
-             it_tab[1][byte(bi[(n + 3) & 3],1)] ^              \
-             it_tab[2][byte(bi[(n + 2) & 3],2)] ^              \
-             it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
-
-#define ls_box(x)                              \
-    ( fl_tab[0][byte(x, 0)] ^                  \
-      fl_tab[1][byte(x, 1)] ^                  \
-      fl_tab[2][byte(x, 2)] ^                  \
-      fl_tab[3][byte(x, 3)] )
-
-#define f_rl(bo, bi, n, k)                                     \
-    bo[n] =  fl_tab[0][byte(bi[n],0)] ^                                \
-             fl_tab[1][byte(bi[(n + 1) & 3],1)] ^              \
-             fl_tab[2][byte(bi[(n + 2) & 3],2)] ^              \
-             fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
-
-#define i_rl(bo, bi, n, k)                                     \
-    bo[n] =  il_tab[0][byte(bi[n],0)] ^                                \
-             il_tab[1][byte(bi[(n + 3) & 3],1)] ^              \
-             il_tab[2][byte(bi[(n + 2) & 3],2)] ^              \
-             il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
-
-static void __init
-gen_tabs (void)
+#define ff_mult(a, b)  (a && b ? f_mult(a, b) : 0)
+
+static void __init gen_tabs(void)
 {
        u32 i, t;
        u8 p, q;
 
-       /* log and power tables for GF(2**8) finite field with
-          0x011b as modular polynomial - the simplest primitive
-          root is 0x03, used here to generate the tables */
+       /*
+        * log and power tables for GF(2**8) finite field with
+        * 0x011b as modular polynomial - the simplest primitive
+        * root is 0x03, used here to generate the tables
+        */
 
        for (i = 0, p = 1; i < 256; ++i) {
                pow_tab[i] = (u8) p;
@@ -169,92 +123,119 @@ gen_tabs (void)
                p = sbx_tab[i];
 
                t = p;
-               fl_tab[0][i] = t;
-               fl_tab[1][i] = rol32(t, 8);
-               fl_tab[2][i] = rol32(t, 16);
-               fl_tab[3][i] = rol32(t, 24);
+               crypto_fl_tab[0][i] = t;
+               crypto_fl_tab[1][i] = rol32(t, 8);
+               crypto_fl_tab[2][i] = rol32(t, 16);
+               crypto_fl_tab[3][i] = rol32(t, 24);
 
-               t = ((u32) ff_mult (2, p)) |
+               t = ((u32) ff_mult(2, p)) |
                    ((u32) p << 8) |
-                   ((u32) p << 16) | ((u32) ff_mult (3, p) << 24);
+                   ((u32) p << 16) | ((u32) ff_mult(3, p) << 24);
 
-               ft_tab[0][i] = t;
-               ft_tab[1][i] = rol32(t, 8);
-               ft_tab[2][i] = rol32(t, 16);
-               ft_tab[3][i] = rol32(t, 24);
+               crypto_ft_tab[0][i] = t;
+               crypto_ft_tab[1][i] = rol32(t, 8);
+               crypto_ft_tab[2][i] = rol32(t, 16);
+               crypto_ft_tab[3][i] = rol32(t, 24);
 
                p = isb_tab[i];
 
                t = p;
-               il_tab[0][i] = t;
-               il_tab[1][i] = rol32(t, 8);
-               il_tab[2][i] = rol32(t, 16);
-               il_tab[3][i] = rol32(t, 24);
-
-               t = ((u32) ff_mult (14, p)) |
-                   ((u32) ff_mult (9, p) << 8) |
-                   ((u32) ff_mult (13, p) << 16) |
-                   ((u32) ff_mult (11, p) << 24);
-
-               it_tab[0][i] = t;
-               it_tab[1][i] = rol32(t, 8);
-               it_tab[2][i] = rol32(t, 16);
-               it_tab[3][i] = rol32(t, 24);
+               crypto_il_tab[0][i] = t;
+               crypto_il_tab[1][i] = rol32(t, 8);
+               crypto_il_tab[2][i] = rol32(t, 16);
+               crypto_il_tab[3][i] = rol32(t, 24);
+
+               t = ((u32) ff_mult(14, p)) |
+                   ((u32) ff_mult(9, p) << 8) |
+                   ((u32) ff_mult(13, p) << 16) |
+                   ((u32) ff_mult(11, p) << 24);
+
+               crypto_it_tab[0][i] = t;
+               crypto_it_tab[1][i] = rol32(t, 8);
+               crypto_it_tab[2][i] = rol32(t, 16);
+               crypto_it_tab[3][i] = rol32(t, 24);
        }
 }
 
-#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
-
-#define imix_col(y,x)       \
-    u   = star_x(x);        \
-    v   = star_x(u);        \
-    w   = star_x(v);        \
-    t   = w ^ (x);          \
-   (y)  = u ^ v ^ w;        \
-   (y) ^= ror32(u ^ t,  8) ^ \
-          ror32(v ^ t, 16) ^ \
-          ror32(t,24)
-
 /* initialise the key schedule from the user supplied key */
 
-#define loop4(i)                                    \
-{   t = ror32(t,  8); t = ls_box(t) ^ rco_tab[i];    \
-    t ^= E_KEY[4 * i];     E_KEY[4 * i + 4] = t;    \
-    t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t;    \
-    t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t;    \
-    t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t;    \
-}
-
-#define loop6(i)                                    \
-{   t = ror32(t,  8); t = ls_box(t) ^ rco_tab[i];    \
-    t ^= E_KEY[6 * i];     E_KEY[6 * i + 6] = t;    \
-    t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t;    \
-    t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t;    \
-    t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t;    \
-    t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t;   \
-    t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t;   \
-}
-
-#define loop8(i)                                    \
-{   t = ror32(t,  8); ; t = ls_box(t) ^ rco_tab[i];  \
-    t ^= E_KEY[8 * i];     E_KEY[8 * i + 8] = t;    \
-    t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t;    \
-    t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t;   \
-    t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t;   \
-    t  = E_KEY[8 * i + 4] ^ ls_box(t);    \
-    E_KEY[8 * i + 12] = t;                \
-    t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t;   \
-    t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t;   \
-    t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t;   \
-}
+#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
 
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-                      unsigned int key_len)
+#define imix_col(y,x)  do {            \
+       u       = star_x(x);            \
+       v       = star_x(u);            \
+       w       = star_x(v);            \
+       t       = w ^ (x);              \
+       (y)     = u ^ v ^ w;            \
+       (y)     ^= ror32(u ^ t, 8) ^    \
+               ror32(v ^ t, 16) ^      \
+               ror32(t, 24);           \
+} while (0)
+
+#define ls_box(x)              \
+       crypto_fl_tab[0][byte(x, 0)] ^  \
+       crypto_fl_tab[1][byte(x, 1)] ^  \
+       crypto_fl_tab[2][byte(x, 2)] ^  \
+       crypto_fl_tab[3][byte(x, 3)]
+
+#define loop4(i)       do {            \
+       t = ror32(t, 8);                \
+       t = ls_box(t) ^ rco_tab[i];     \
+       t ^= ctx->key_enc[4 * i];               \
+       ctx->key_enc[4 * i + 4] = t;            \
+       t ^= ctx->key_enc[4 * i + 1];           \
+       ctx->key_enc[4 * i + 5] = t;            \
+       t ^= ctx->key_enc[4 * i + 2];           \
+       ctx->key_enc[4 * i + 6] = t;            \
+       t ^= ctx->key_enc[4 * i + 3];           \
+       ctx->key_enc[4 * i + 7] = t;            \
+} while (0)
+
+#define loop6(i)       do {            \
+       t = ror32(t, 8);                \
+       t = ls_box(t) ^ rco_tab[i];     \
+       t ^= ctx->key_enc[6 * i];               \
+       ctx->key_enc[6 * i + 6] = t;            \
+       t ^= ctx->key_enc[6 * i + 1];           \
+       ctx->key_enc[6 * i + 7] = t;            \
+       t ^= ctx->key_enc[6 * i + 2];           \
+       ctx->key_enc[6 * i + 8] = t;            \
+       t ^= ctx->key_enc[6 * i + 3];           \
+       ctx->key_enc[6 * i + 9] = t;            \
+       t ^= ctx->key_enc[6 * i + 4];           \
+       ctx->key_enc[6 * i + 10] = t;           \
+       t ^= ctx->key_enc[6 * i + 5];           \
+       ctx->key_enc[6 * i + 11] = t;           \
+} while (0)
+
+#define loop8(i)       do {                    \
+       t = ror32(t, 8);                        \
+       t = ls_box(t) ^ rco_tab[i];             \
+       t ^= ctx->key_enc[8 * i];                       \
+       ctx->key_enc[8 * i + 8] = t;                    \
+       t ^= ctx->key_enc[8 * i + 1];                   \
+       ctx->key_enc[8 * i + 9] = t;                    \
+       t ^= ctx->key_enc[8 * i + 2];                   \
+       ctx->key_enc[8 * i + 10] = t;                   \
+       t ^= ctx->key_enc[8 * i + 3];                   \
+       ctx->key_enc[8 * i + 11] = t;                   \
+       t  = ctx->key_enc[8 * i + 4] ^ ls_box(t);       \
+       ctx->key_enc[8 * i + 12] = t;                   \
+       t ^= ctx->key_enc[8 * i + 5];                   \
+       ctx->key_enc[8 * i + 13] = t;                   \
+       t ^= ctx->key_enc[8 * i + 6];                   \
+       ctx->key_enc[8 * i + 14] = t;                   \
+       t ^= ctx->key_enc[8 * i + 7];                   \
+       ctx->key_enc[8 * i + 15] = t;                   \
+} while (0)
+
+int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+               unsigned int key_len)
 {
-       struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
        const __le32 *key = (const __le32 *)in_key;
        u32 *flags = &tfm->crt_flags;
-       u32 i, t, u, v, w;
+       u32 i, t, u, v, w, j;
 
        if (key_len % 8) {
                *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
@@ -263,95 +244,113 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 
        ctx->key_length = key_len;
 
-       E_KEY[0] = le32_to_cpu(key[0]);
-       E_KEY[1] = le32_to_cpu(key[1]);
-       E_KEY[2] = le32_to_cpu(key[2]);
-       E_KEY[3] = le32_to_cpu(key[3]);
+       ctx->key_dec[key_len + 24] = ctx->key_enc[0] = le32_to_cpu(key[0]);
+       ctx->key_dec[key_len + 25] = ctx->key_enc[1] = le32_to_cpu(key[1]);
+       ctx->key_dec[key_len + 26] = ctx->key_enc[2] = le32_to_cpu(key[2]);
+       ctx->key_dec[key_len + 27] = ctx->key_enc[3] = le32_to_cpu(key[3]);
 
        switch (key_len) {
        case 16:
-               t = E_KEY[3];
+               t = ctx->key_enc[3];
                for (i = 0; i < 10; ++i)
-                       loop4 (i);
+                       loop4(i);
                break;
 
        case 24:
-               E_KEY[4] = le32_to_cpu(key[4]);
-               t = E_KEY[5] = le32_to_cpu(key[5]);
+               ctx->key_enc[4] = le32_to_cpu(key[4]);
+               t = ctx->key_enc[5] = le32_to_cpu(key[5]);
                for (i = 0; i < 8; ++i)
-                       loop6 (i);
+                       loop6(i);
                break;
 
        case 32:
-               E_KEY[4] = le32_to_cpu(key[4]);
-               E_KEY[5] = le32_to_cpu(key[5]);
-               E_KEY[6] = le32_to_cpu(key[6]);
-               t = E_KEY[7] = le32_to_cpu(key[7]);
+               ctx->key_enc[4] = le32_to_cpu(key[4]);
+               ctx->key_enc[5] = le32_to_cpu(key[5]);
+               ctx->key_enc[6] = le32_to_cpu(key[6]);
+               t = ctx->key_enc[7] = le32_to_cpu(key[7]);
                for (i = 0; i < 7; ++i)
-                       loop8 (i);
+                       loop8(i);
                break;
        }
 
-       D_KEY[0] = E_KEY[0];
-       D_KEY[1] = E_KEY[1];
-       D_KEY[2] = E_KEY[2];
-       D_KEY[3] = E_KEY[3];
+       ctx->key_dec[0] = ctx->key_enc[key_len + 24];
+       ctx->key_dec[1] = ctx->key_enc[key_len + 25];
+       ctx->key_dec[2] = ctx->key_enc[key_len + 26];
+       ctx->key_dec[3] = ctx->key_enc[key_len + 27];
 
        for (i = 4; i < key_len + 24; ++i) {
-               imix_col (D_KEY[i], E_KEY[i]);
+               j = key_len + 24 - (i & ~3) + (i & 3);
+               imix_col(ctx->key_dec[j], ctx->key_enc[i]);
        }
-
        return 0;
 }
+EXPORT_SYMBOL_GPL(crypto_aes_set_key);
 
 /* encrypt a block of text */
 
-#define f_nround(bo, bi, k) \
-    f_rn(bo, bi, 0, k);     \
-    f_rn(bo, bi, 1, k);     \
-    f_rn(bo, bi, 2, k);     \
-    f_rn(bo, bi, 3, k);     \
-    k += 4
-
-#define f_lround(bo, bi, k) \
-    f_rl(bo, bi, 0, k);     \
-    f_rl(bo, bi, 1, k);     \
-    f_rl(bo, bi, 2, k);     \
-    f_rl(bo, bi, 3, k)
+#define f_rn(bo, bi, n, k)     do {                            \
+       bo[n] = crypto_ft_tab[0][byte(bi[n], 0)] ^                      \
+               crypto_ft_tab[1][byte(bi[(n + 1) & 3], 1)] ^            \
+               crypto_ft_tab[2][byte(bi[(n + 2) & 3], 2)] ^            \
+               crypto_ft_tab[3][byte(bi[(n + 3) & 3], 3)] ^ *(k + n);  \
+} while (0)
+
+#define f_nround(bo, bi, k)    do {\
+       f_rn(bo, bi, 0, k);     \
+       f_rn(bo, bi, 1, k);     \
+       f_rn(bo, bi, 2, k);     \
+       f_rn(bo, bi, 3, k);     \
+       k += 4;                 \
+} while (0)
+
+#define f_rl(bo, bi, n, k)     do {                            \
+       bo[n] = crypto_fl_tab[0][byte(bi[n], 0)] ^                      \
+               crypto_fl_tab[1][byte(bi[(n + 1) & 3], 1)] ^            \
+               crypto_fl_tab[2][byte(bi[(n + 2) & 3], 2)] ^            \
+               crypto_fl_tab[3][byte(bi[(n + 3) & 3], 3)] ^ *(k + n);  \
+} while (0)
+
+#define f_lround(bo, bi, k)    do {\
+       f_rl(bo, bi, 0, k);     \
+       f_rl(bo, bi, 1, k);     \
+       f_rl(bo, bi, 2, k);     \
+       f_rl(bo, bi, 3, k);     \
+} while (0)
 
 static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-       const struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
+       const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
        const __le32 *src = (const __le32 *)in;
        __le32 *dst = (__le32 *)out;
        u32 b0[4], b1[4];
-       const u32 *kp = E_KEY + 4;
+       const u32 *kp = ctx->key_enc + 4;
+       const int key_len = ctx->key_length;
 
-       b0[0] = le32_to_cpu(src[0]) ^ E_KEY[0];
-       b0[1] = le32_to_cpu(src[1]) ^ E_KEY[1];
-       b0[2] = le32_to_cpu(src[2]) ^ E_KEY[2];
-       b0[3] = le32_to_cpu(src[3]) ^ E_KEY[3];
+       b0[0] = le32_to_cpu(src[0]) ^ ctx->key_enc[0];
+       b0[1] = le32_to_cpu(src[1]) ^ ctx->key_enc[1];
+       b0[2] = le32_to_cpu(src[2]) ^ ctx->key_enc[2];
+       b0[3] = le32_to_cpu(src[3]) ^ ctx->key_enc[3];
 
-       if (ctx->key_length > 24) {
-               f_nround (b1, b0, kp);
-               f_nround (b0, b1, kp);
+       if (key_len > 24) {
+               f_nround(b1, b0, kp);
+               f_nround(b0, b1, kp);
        }
 
-       if (ctx->key_length > 16) {
-               f_nround (b1, b0, kp);
-               f_nround (b0, b1, kp);
+       if (key_len > 16) {
+               f_nround(b1, b0, kp);
+               f_nround(b0, b1, kp);
        }
 
-       f_nround (b1, b0, kp);
-       f_nround (b0, b1, kp);
-       f_nround (b1, b0, kp);
-       f_nround (b0, b1, kp);
-       f_nround (b1, b0, kp);
-       f_nround (b0, b1, kp);
-       f_nround (b1, b0, kp);
-       f_nround (b0, b1, kp);
-       f_nround (b1, b0, kp);
-       f_lround (b0, b1, kp);
+       f_nround(b1, b0, kp);
+       f_nround(b0, b1, kp);
+       f_nround(b1, b0, kp);
+       f_nround(b0, b1, kp);
+       f_nround(b1, b0, kp);
+       f_nround(b0, b1, kp);
+       f_nround(b1, b0, kp);
+       f_nround(b0, b1, kp);
+       f_nround(b1, b0, kp);
+       f_lround(b0, b1, kp);
 
        dst[0] = cpu_to_le32(b0[0]);
        dst[1] = cpu_to_le32(b0[1]);
@@ -361,53 +360,69 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 
 /* decrypt a block of text */
 
-#define i_nround(bo, bi, k) \
-    i_rn(bo, bi, 0, k);     \
-    i_rn(bo, bi, 1, k);     \
-    i_rn(bo, bi, 2, k);     \
-    i_rn(bo, bi, 3, k);     \
-    k -= 4
-
-#define i_lround(bo, bi, k) \
-    i_rl(bo, bi, 0, k);     \
-    i_rl(bo, bi, 1, k);     \
-    i_rl(bo, bi, 2, k);     \
-    i_rl(bo, bi, 3, k)
+#define i_rn(bo, bi, n, k)     do {                            \
+       bo[n] = crypto_it_tab[0][byte(bi[n], 0)] ^                      \
+               crypto_it_tab[1][byte(bi[(n + 3) & 3], 1)] ^            \
+               crypto_it_tab[2][byte(bi[(n + 2) & 3], 2)] ^            \
+               crypto_it_tab[3][byte(bi[(n + 1) & 3], 3)] ^ *(k + n);  \
+} while (0)
+
+#define i_nround(bo, bi, k)    do {\
+       i_rn(bo, bi, 0, k);     \
+       i_rn(bo, bi, 1, k);     \
+       i_rn(bo, bi, 2, k);     \
+       i_rn(bo, bi, 3, k);     \
+       k += 4;                 \
+} while (0)
+
+#define i_rl(bo, bi, n, k)     do {                    \
+       bo[n] = crypto_il_tab[0][byte(bi[n], 0)] ^              \
+       crypto_il_tab[1][byte(bi[(n + 3) & 3], 1)] ^            \
+       crypto_il_tab[2][byte(bi[(n + 2) & 3], 2)] ^            \
+       crypto_il_tab[3][byte(bi[(n + 1) & 3], 3)] ^ *(k + n);  \
+} while (0)
+
+#define i_lround(bo, bi, k)    do {\
+       i_rl(bo, bi, 0, k);     \
+       i_rl(bo, bi, 1, k);     \
+       i_rl(bo, bi, 2, k);     \
+       i_rl(bo, bi, 3, k);     \
+} while (0)
 
 static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-       const struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
+       const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
        const __le32 *src = (const __le32 *)in;
        __le32 *dst = (__le32 *)out;
        u32 b0[4], b1[4];
        const int key_len = ctx->key_length;
-       const u32 *kp = D_KEY + key_len + 20;
+       const u32 *kp = ctx->key_dec + 4;
 
-       b0[0] = le32_to_cpu(src[0]) ^ E_KEY[key_len + 24];
-       b0[1] = le32_to_cpu(src[1]) ^ E_KEY[key_len + 25];
-       b0[2] = le32_to_cpu(src[2]) ^ E_KEY[key_len + 26];
-       b0[3] = le32_to_cpu(src[3]) ^ E_KEY[key_len + 27];
+       b0[0] = le32_to_cpu(src[0]) ^  ctx->key_dec[0];
+       b0[1] = le32_to_cpu(src[1]) ^  ctx->key_dec[1];
+       b0[2] = le32_to_cpu(src[2]) ^  ctx->key_dec[2];
+       b0[3] = le32_to_cpu(src[3]) ^  ctx->key_dec[3];
 
        if (key_len > 24) {
-               i_nround (b1, b0, kp);
-               i_nround (b0, b1, kp);
+               i_nround(b1, b0, kp);
+               i_nround(b0, b1, kp);
        }
 
        if (key_len > 16) {
-               i_nround (b1, b0, kp);
-               i_nround (b0, b1, kp);
+               i_nround(b1, b0, kp);
+               i_nround(b0, b1, kp);
        }
 
-       i_nround (b1, b0, kp);
-       i_nround (b0, b1, kp);
-       i_nround (b1, b0, kp);
-       i_nround (b0, b1, kp);
-       i_nround (b1, b0, kp);
-       i_nround (b0, b1, kp);
-       i_nround (b1, b0, kp);
-       i_nround (b0, b1, kp);
-       i_nround (b1, b0, kp);
-       i_lround (b0, b1, kp);
+       i_nround(b1, b0, kp);
+       i_nround(b0, b1, kp);
+       i_nround(b1, b0, kp);
+       i_nround(b0, b1, kp);
+       i_nround(b1, b0, kp);
+       i_nround(b0, b1, kp);
+       i_nround(b1, b0, kp);
+       i_nround(b0, b1, kp);
+       i_nround(b1, b0, kp);
+       i_lround(b0, b1, kp);
 
        dst[0] = cpu_to_le32(b0[0]);
        dst[1] = cpu_to_le32(b0[1]);
@@ -415,14 +430,13 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
        dst[3] = cpu_to_le32(b0[3]);
 }
 
-
 static struct crypto_alg aes_alg = {
        .cra_name               =       "aes",
        .cra_driver_name        =       "aes-generic",
        .cra_priority           =       100,
        .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
        .cra_blocksize          =       AES_BLOCK_SIZE,
-       .cra_ctxsize            =       sizeof(struct aes_ctx),
+       .cra_ctxsize            =       sizeof(struct crypto_aes_ctx),
        .cra_alignmask          =       3,
        .cra_module             =       THIS_MODULE,
        .cra_list               =       LIST_HEAD_INIT(aes_alg.cra_list),
@@ -430,9 +444,9 @@ static struct crypto_alg aes_alg = {
                .cipher = {
                        .cia_min_keysize        =       AES_MIN_KEY_SIZE,
                        .cia_max_keysize        =       AES_MAX_KEY_SIZE,
-                       .cia_setkey             =       aes_set_key,
-                       .cia_encrypt            =       aes_encrypt,
-                       .cia_decrypt            =       aes_decrypt
+                       .cia_setkey             =       crypto_aes_set_key,
+                       .cia_encrypt            =       aes_encrypt,
+                       .cia_decrypt            =       aes_decrypt
                }
        }
 };
index 8383282de1ddc39422cd227b7a800a095ad14f67..e65cb50cf4aff8b739130ba7012e383196ef286a 100644 (file)
@@ -472,7 +472,7 @@ int crypto_check_attr_type(struct rtattr **tb, u32 type)
 }
 EXPORT_SYMBOL_GPL(crypto_check_attr_type);
 
-struct crypto_alg *crypto_attr_alg(struct rtattr *rta, u32 type, u32 mask)
+const char *crypto_attr_alg_name(struct rtattr *rta)
 {
        struct crypto_attr_alg *alga;
 
@@ -486,7 +486,21 @@ struct crypto_alg *crypto_attr_alg(struct rtattr *rta, u32 type, u32 mask)
        alga = RTA_DATA(rta);
        alga->name[CRYPTO_MAX_ALG_NAME - 1] = 0;
 
-       return crypto_alg_mod_lookup(alga->name, type, mask);
+       return alga->name;
+}
+EXPORT_SYMBOL_GPL(crypto_attr_alg_name);
+
+struct crypto_alg *crypto_attr_alg(struct rtattr *rta, u32 type, u32 mask)
+{
+       const char *name;
+       int err;
+
+       name = crypto_attr_alg_name(rta);
+       err = PTR_ERR(name);
+       if (IS_ERR(name))
+               return ERR_PTR(err);
+
+       return crypto_alg_mod_lookup(name, type, mask);
 }
 EXPORT_SYMBOL_GPL(crypto_attr_alg);
 
@@ -605,6 +619,53 @@ int crypto_tfm_in_queue(struct crypto_queue *queue, struct crypto_tfm *tfm)
 }
 EXPORT_SYMBOL_GPL(crypto_tfm_in_queue);
 
+static inline void crypto_inc_byte(u8 *a, unsigned int size)
+{
+       u8 *b = (a + size);
+       u8 c;
+
+       for (; size; size--) {
+               c = *--b + 1;
+               *b = c;
+               if (c)
+                       break;
+       }
+}
+
+void crypto_inc(u8 *a, unsigned int size)
+{
+       __be32 *b = (__be32 *)(a + size);
+       u32 c;
+
+       for (; size >= 4; size -= 4) {
+               c = be32_to_cpu(*--b) + 1;
+               *b = cpu_to_be32(c);
+               if (c)
+                       return;
+       }
+
+       crypto_inc_byte(a, size);
+}
+EXPORT_SYMBOL_GPL(crypto_inc);
+
+static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size)
+{
+       for (; size; size--)
+               *a++ ^= *b++;
+}
+
+void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
+{
+       u32 *a = (u32 *)dst;
+       u32 *b = (u32 *)src;
+
+       for (; size >= 4; size -= 4)
+               *a++ ^= *b++;
+
+       crypto_xor_byte((u8 *)a, (u8 *)b, size);
+}
+EXPORT_SYMBOL_GPL(crypto_xor);
+
 static int __init crypto_algapi_init(void)
 {
        crypto_init_proc();
index 1f5c724773568acf31f7dde568972f4925f39407..a2496d1bc6d42234dba6477580a9ddafed6759a9 100644 (file)
@@ -137,7 +137,7 @@ static struct crypto_alg *crypto_larval_alloc(const char *name, u32 type,
        return alg;
 }
 
-static void crypto_larval_kill(struct crypto_alg *alg)
+void crypto_larval_kill(struct crypto_alg *alg)
 {
        struct crypto_larval *larval = (void *)alg;
 
@@ -147,6 +147,7 @@ static void crypto_larval_kill(struct crypto_alg *alg)
        complete_all(&larval->completion);
        crypto_alg_put(alg);
 }
+EXPORT_SYMBOL_GPL(crypto_larval_kill);
 
 static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 {
@@ -176,11 +177,9 @@ static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
        return alg;
 }
 
-struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
+struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
 {
        struct crypto_alg *alg;
-       struct crypto_alg *larval;
-       int ok;
 
        if (!name)
                return ERR_PTR(-ENOENT);
@@ -193,7 +192,17 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
        if (alg)
                return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg;
 
-       larval = crypto_larval_alloc(name, type, mask);
+       return crypto_larval_alloc(name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_larval_lookup);
+
+struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
+{
+       struct crypto_alg *alg;
+       struct crypto_alg *larval;
+       int ok;
+
+       larval = crypto_larval_lookup(name, type, mask);
        if (IS_ERR(larval) || !crypto_is_larval(larval))
                return larval;
 
index 126a529b496d5df32959499ff22739767eca52a3..ed8ac5a6fa5ff0bc22fa67fc193a1b0629d4a063 100644 (file)
  *
  */
 
-#include <crypto/algapi.h>
+#include <crypto/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/authenc.h>
+#include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/rtnetlink.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
-#include "scatterwalk.h"
-
 struct authenc_instance_ctx {
        struct crypto_spawn auth;
-       struct crypto_spawn enc;
-
-       unsigned int authsize;
-       unsigned int enckeylen;
+       struct crypto_skcipher_spawn enc;
 };
 
 struct crypto_authenc_ctx {
@@ -37,19 +36,31 @@ struct crypto_authenc_ctx {
 static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
                                 unsigned int keylen)
 {
-       struct authenc_instance_ctx *ictx =
-               crypto_instance_ctx(crypto_aead_alg_instance(authenc));
-       unsigned int enckeylen = ictx->enckeylen;
        unsigned int authkeylen;
+       unsigned int enckeylen;
        struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
        struct crypto_hash *auth = ctx->auth;
        struct crypto_ablkcipher *enc = ctx->enc;
+       struct rtattr *rta = (void *)key;
+       struct crypto_authenc_key_param *param;
        int err = -EINVAL;
 
-       if (keylen < enckeylen) {
-               crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
-               goto out;
-       }
+       if (!RTA_OK(rta, keylen))
+               goto badkey;
+       if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM)
+               goto badkey;
+       if (RTA_PAYLOAD(rta) < sizeof(*param))
+               goto badkey;
+
+       param = RTA_DATA(rta);
+       enckeylen = be32_to_cpu(param->enckeylen);
+
+       key += RTA_ALIGN(rta->rta_len);
+       keylen -= RTA_ALIGN(rta->rta_len);
+
+       if (keylen < enckeylen)
+               goto badkey;
+
        authkeylen = keylen - enckeylen;
 
        crypto_hash_clear_flags(auth, CRYPTO_TFM_REQ_MASK);
@@ -71,21 +82,38 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 
 out:
        return err;
+
+badkey:
+       crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
+       goto out;
 }
 
-static int crypto_authenc_hash(struct aead_request *req)
+static void authenc_chain(struct scatterlist *head, struct scatterlist *sg,
+                         int chain)
+{
+       if (chain) {
+               head->length += sg->length;
+               sg = scatterwalk_sg_next(sg);
+       }
+
+       if (sg)
+               scatterwalk_sg_chain(head, 2, sg);
+       else
+               sg_mark_end(head);
+}
+
+static u8 *crypto_authenc_hash(struct aead_request *req, unsigned int flags,
+                              struct scatterlist *cipher,
+                              unsigned int cryptlen)
 {
        struct crypto_aead *authenc = crypto_aead_reqtfm(req);
-       struct authenc_instance_ctx *ictx =
-               crypto_instance_ctx(crypto_aead_alg_instance(authenc));
        struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
        struct crypto_hash *auth = ctx->auth;
        struct hash_desc desc = {
                .tfm = auth,
+               .flags = aead_request_flags(req) & flags,
        };
        u8 *hash = aead_request_ctx(req);
-       struct scatterlist *dst = req->dst;
-       unsigned int cryptlen = req->cryptlen;
        int err;
 
        hash = (u8 *)ALIGN((unsigned long)hash + crypto_hash_alignmask(auth), 
@@ -100,7 +128,7 @@ static int crypto_authenc_hash(struct aead_request *req)
        if (err)
                goto auth_unlock;
 
-       err = crypto_hash_update(&desc, dst, cryptlen);
+       err = crypto_hash_update(&desc, cipher, cryptlen);
        if (err)
                goto auth_unlock;
 
@@ -109,17 +137,53 @@ auth_unlock:
        spin_unlock_bh(&ctx->auth_lock);
 
        if (err)
-               return err;
+               return ERR_PTR(err);
+
+       return hash;
+}
 
-       scatterwalk_map_and_copy(hash, dst, cryptlen, ictx->authsize, 1);
+static int crypto_authenc_genicv(struct aead_request *req, u8 *iv,
+                                unsigned int flags)
+{
+       struct crypto_aead *authenc = crypto_aead_reqtfm(req);
+       struct scatterlist *dst = req->dst;
+       struct scatterlist cipher[2];
+       struct page *dstp;
+       unsigned int ivsize = crypto_aead_ivsize(authenc);
+       unsigned int cryptlen;
+       u8 *vdst;
+       u8 *hash;
+
+       dstp = sg_page(dst);
+       vdst = PageHighMem(dstp) ? NULL : page_address(dstp) + dst->offset;
+
+       sg_init_table(cipher, 2);
+       sg_set_buf(cipher, iv, ivsize);
+       authenc_chain(cipher, dst, vdst == iv + ivsize);
+
+       cryptlen = req->cryptlen + ivsize;
+       hash = crypto_authenc_hash(req, flags, cipher, cryptlen);
+       if (IS_ERR(hash))
+               return PTR_ERR(hash);
+
+       scatterwalk_map_and_copy(hash, cipher, cryptlen,
+                                crypto_aead_authsize(authenc), 1);
        return 0;
 }
 
 static void crypto_authenc_encrypt_done(struct crypto_async_request *req,
                                        int err)
 {
-       if (!err)
-               err = crypto_authenc_hash(req->data);
+       if (!err) {
+               struct aead_request *areq = req->data;
+               struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
+               struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
+               struct ablkcipher_request *abreq = aead_request_ctx(areq);
+               u8 *iv = (u8 *)(abreq + 1) +
+                        crypto_ablkcipher_reqsize(ctx->enc);
+
+               err = crypto_authenc_genicv(areq, iv, 0);
+       }
 
        aead_request_complete(req->data, err);
 }
@@ -129,72 +193,99 @@ static int crypto_authenc_encrypt(struct aead_request *req)
        struct crypto_aead *authenc = crypto_aead_reqtfm(req);
        struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
        struct ablkcipher_request *abreq = aead_request_ctx(req);
+       struct crypto_ablkcipher *enc = ctx->enc;
+       struct scatterlist *dst = req->dst;
+       unsigned int cryptlen = req->cryptlen;
+       u8 *iv = (u8 *)(abreq + 1) + crypto_ablkcipher_reqsize(enc);
        int err;
 
-       ablkcipher_request_set_tfm(abreq, ctx->enc);
+       ablkcipher_request_set_tfm(abreq, enc);
        ablkcipher_request_set_callback(abreq, aead_request_flags(req),
                                        crypto_authenc_encrypt_done, req);
-       ablkcipher_request_set_crypt(abreq, req->src, req->dst, req->cryptlen,
-                                    req->iv);
+       ablkcipher_request_set_crypt(abreq, req->src, dst, cryptlen, req->iv);
+
+       memcpy(iv, req->iv, crypto_aead_ivsize(authenc));
 
        err = crypto_ablkcipher_encrypt(abreq);
        if (err)
                return err;
 
-       return crypto_authenc_hash(req);
+       return crypto_authenc_genicv(req, iv, CRYPTO_TFM_REQ_MAY_SLEEP);
 }
 
-static int crypto_authenc_verify(struct aead_request *req)
+static void crypto_authenc_givencrypt_done(struct crypto_async_request *req,
+                                          int err)
 {
-       struct crypto_aead *authenc = crypto_aead_reqtfm(req);
-       struct authenc_instance_ctx *ictx =
-               crypto_instance_ctx(crypto_aead_alg_instance(authenc));
+       if (!err) {
+               struct aead_givcrypt_request *greq = req->data;
+
+               err = crypto_authenc_genicv(&greq->areq, greq->giv, 0);
+       }
+
+       aead_request_complete(req->data, err);
+}
+
+static int crypto_authenc_givencrypt(struct aead_givcrypt_request *req)
+{
+       struct crypto_aead *authenc = aead_givcrypt_reqtfm(req);
        struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
-       struct crypto_hash *auth = ctx->auth;
-       struct hash_desc desc = {
-               .tfm = auth,
-               .flags = aead_request_flags(req),
-       };
-       u8 *ohash = aead_request_ctx(req);
-       u8 *ihash;
-       struct scatterlist *src = req->src;
-       unsigned int cryptlen = req->cryptlen;
-       unsigned int authsize;
+       struct aead_request *areq = &req->areq;
+       struct skcipher_givcrypt_request *greq = aead_request_ctx(areq);
+       u8 *iv = req->giv;
        int err;
 
-       ohash = (u8 *)ALIGN((unsigned long)ohash + crypto_hash_alignmask(auth), 
-                           crypto_hash_alignmask(auth) + 1);
-       ihash = ohash + crypto_hash_digestsize(auth);
-
-       spin_lock_bh(&ctx->auth_lock);
-       err = crypto_hash_init(&desc);
-       if (err)
-               goto auth_unlock;
+       skcipher_givcrypt_set_tfm(greq, ctx->enc);
+       skcipher_givcrypt_set_callback(greq, aead_request_flags(areq),
+                                      crypto_authenc_givencrypt_done, areq);
+       skcipher_givcrypt_set_crypt(greq, areq->src, areq->dst, areq->cryptlen,
+                                   areq->iv);
+       skcipher_givcrypt_set_giv(greq, iv, req->seq);
 
-       err = crypto_hash_update(&desc, req->assoc, req->assoclen);
+       err = crypto_skcipher_givencrypt(greq);
        if (err)
-               goto auth_unlock;
+               return err;
 
-       err = crypto_hash_update(&desc, src, cryptlen);
-       if (err)
-               goto auth_unlock;
+       return crypto_authenc_genicv(areq, iv, CRYPTO_TFM_REQ_MAY_SLEEP);
+}
 
-       err = crypto_hash_final(&desc, ohash);
-auth_unlock:
-       spin_unlock_bh(&ctx->auth_lock);
+static int crypto_authenc_verify(struct aead_request *req,
+                                struct scatterlist *cipher,
+                                unsigned int cryptlen)
+{
+       struct crypto_aead *authenc = crypto_aead_reqtfm(req);
+       u8 *ohash;
+       u8 *ihash;
+       unsigned int authsize;
 
-       if (err)
-               return err;
+       ohash = crypto_authenc_hash(req, CRYPTO_TFM_REQ_MAY_SLEEP, cipher,
+                                   cryptlen);
+       if (IS_ERR(ohash))
+               return PTR_ERR(ohash);
 
-       authsize = ictx->authsize;
-       scatterwalk_map_and_copy(ihash, src, cryptlen, authsize, 0);
-       return memcmp(ihash, ohash, authsize) ? -EINVAL : 0;
+       authsize = crypto_aead_authsize(authenc);
+       ihash = ohash + authsize;
+       scatterwalk_map_and_copy(ihash, cipher, cryptlen, authsize, 0);
+       return memcmp(ihash, ohash, authsize) ? -EBADMSG: 0;
 }
 
-static void crypto_authenc_decrypt_done(struct crypto_async_request *req,
-                                       int err)
+static int crypto_authenc_iverify(struct aead_request *req, u8 *iv,
+                                 unsigned int cryptlen)
 {
-       aead_request_complete(req->data, err);
+       struct crypto_aead *authenc = crypto_aead_reqtfm(req);
+       struct scatterlist *src = req->src;
+       struct scatterlist cipher[2];
+       struct page *srcp;
+       unsigned int ivsize = crypto_aead_ivsize(authenc);
+       u8 *vsrc;
+
+       srcp = sg_page(src);
+       vsrc = PageHighMem(srcp) ? NULL : page_address(srcp) + src->offset;
+
+       sg_init_table(cipher, 2);
+       sg_set_buf(cipher, iv, ivsize);
+       authenc_chain(cipher, src, vsrc == iv + ivsize);
+
+       return crypto_authenc_verify(req, cipher, cryptlen + ivsize);
 }
 
 static int crypto_authenc_decrypt(struct aead_request *req)
@@ -202,17 +293,23 @@ static int crypto_authenc_decrypt(struct aead_request *req)
        struct crypto_aead *authenc = crypto_aead_reqtfm(req);
        struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
        struct ablkcipher_request *abreq = aead_request_ctx(req);
+       unsigned int cryptlen = req->cryptlen;
+       unsigned int authsize = crypto_aead_authsize(authenc);
+       u8 *iv = req->iv;
        int err;
 
-       err = crypto_authenc_verify(req);
+       if (cryptlen < authsize)
+               return -EINVAL;
+       cryptlen -= authsize;
+
+       err = crypto_authenc_iverify(req, iv, cryptlen);
        if (err)
                return err;
 
        ablkcipher_request_set_tfm(abreq, ctx->enc);
        ablkcipher_request_set_callback(abreq, aead_request_flags(req),
-                                       crypto_authenc_decrypt_done, req);
-       ablkcipher_request_set_crypt(abreq, req->src, req->dst, req->cryptlen,
-                                    req->iv);
+                                       req->base.complete, req->base.data);
+       ablkcipher_request_set_crypt(abreq, req->src, req->dst, cryptlen, iv);
 
        return crypto_ablkcipher_decrypt(abreq);
 }
@@ -224,19 +321,13 @@ static int crypto_authenc_init_tfm(struct crypto_tfm *tfm)
        struct crypto_authenc_ctx *ctx = crypto_tfm_ctx(tfm);
        struct crypto_hash *auth;
        struct crypto_ablkcipher *enc;
-       unsigned int digestsize;
        int err;
 
        auth = crypto_spawn_hash(&ictx->auth);
        if (IS_ERR(auth))
                return PTR_ERR(auth);
 
-       err = -EINVAL;
-       digestsize = crypto_hash_digestsize(auth);
-       if (ictx->authsize > digestsize)
-               goto err_free_hash;
-
-       enc = crypto_spawn_ablkcipher(&ictx->enc);
+       enc = crypto_spawn_skcipher(&ictx->enc);
        err = PTR_ERR(enc);
        if (IS_ERR(enc))
                goto err_free_hash;
@@ -246,9 +337,10 @@ static int crypto_authenc_init_tfm(struct crypto_tfm *tfm)
        tfm->crt_aead.reqsize = max_t(unsigned int,
                                      (crypto_hash_alignmask(auth) &
                                       ~(crypto_tfm_ctx_alignment() - 1)) +
-                                     digestsize * 2,
-                                     sizeof(struct ablkcipher_request) +
-                                     crypto_ablkcipher_reqsize(enc));
+                                     crypto_hash_digestsize(auth) * 2,
+                                     sizeof(struct skcipher_givcrypt_request) +
+                                     crypto_ablkcipher_reqsize(enc) +
+                                     crypto_ablkcipher_ivsize(enc));
 
        spin_lock_init(&ctx->auth_lock);
 
@@ -269,75 +361,74 @@ static void crypto_authenc_exit_tfm(struct crypto_tfm *tfm)
 
 static struct crypto_instance *crypto_authenc_alloc(struct rtattr **tb)
 {
+       struct crypto_attr_type *algt;
        struct crypto_instance *inst;
        struct crypto_alg *auth;
        struct crypto_alg *enc;
        struct authenc_instance_ctx *ctx;
-       unsigned int authsize;
-       unsigned int enckeylen;
+       const char *enc_name;
        int err;
 
-       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD);
-       if (err)
+       algt = crypto_get_attr_type(tb);
+       err = PTR_ERR(algt);
+       if (IS_ERR(algt))
                return ERR_PTR(err);
 
+       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
+               return ERR_PTR(-EINVAL);
+
        auth = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_HASH,
                               CRYPTO_ALG_TYPE_HASH_MASK);
        if (IS_ERR(auth))
                return ERR_PTR(PTR_ERR(auth));
 
-       err = crypto_attr_u32(tb[2], &authsize);
-       inst = ERR_PTR(err);
-       if (err)
-               goto out_put_auth;
-
-       enc = crypto_attr_alg(tb[3], CRYPTO_ALG_TYPE_BLKCIPHER,
-                             CRYPTO_ALG_TYPE_MASK);
-       inst = ERR_PTR(PTR_ERR(enc));
-       if (IS_ERR(enc))
+       enc_name = crypto_attr_alg_name(tb[2]);
+       err = PTR_ERR(enc_name);
+       if (IS_ERR(enc_name))
                goto out_put_auth;
 
-       err = crypto_attr_u32(tb[4], &enckeylen);
-       if (err)
-               goto out_put_enc;
-
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        err = -ENOMEM;
        if (!inst)
-               goto out_put_enc;
-
-       err = -ENAMETOOLONG;
-       if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
-                    "authenc(%s,%u,%s,%u)", auth->cra_name, authsize,
-                    enc->cra_name, enckeylen) >= CRYPTO_MAX_ALG_NAME)
-               goto err_free_inst;
-
-       if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-                    "authenc(%s,%u,%s,%u)", auth->cra_driver_name,
-                    authsize, enc->cra_driver_name, enckeylen) >=
-           CRYPTO_MAX_ALG_NAME)
-               goto err_free_inst;
+               goto out_put_auth;
 
        ctx = crypto_instance_ctx(inst);
-       ctx->authsize = authsize;
-       ctx->enckeylen = enckeylen;
 
        err = crypto_init_spawn(&ctx->auth, auth, inst, CRYPTO_ALG_TYPE_MASK);
        if (err)
                goto err_free_inst;
 
-       err = crypto_init_spawn(&ctx->enc, enc, inst, CRYPTO_ALG_TYPE_MASK);
+       crypto_set_skcipher_spawn(&ctx->enc, inst);
+       err = crypto_grab_skcipher(&ctx->enc, enc_name, 0,
+                                  crypto_requires_sync(algt->type,
+                                                       algt->mask));
        if (err)
                goto err_drop_auth;
 
-       inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
+       enc = crypto_skcipher_spawn_alg(&ctx->enc);
+
+       err = -ENAMETOOLONG;
+       if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
+                    "authenc(%s,%s)", auth->cra_name, enc->cra_name) >=
+           CRYPTO_MAX_ALG_NAME)
+               goto err_drop_enc;
+
+       if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+                    "authenc(%s,%s)", auth->cra_driver_name,
+                    enc->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+               goto err_drop_enc;
+
+       inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD;
+       inst->alg.cra_flags |= enc->cra_flags & CRYPTO_ALG_ASYNC;
        inst->alg.cra_priority = enc->cra_priority * 10 + auth->cra_priority;
        inst->alg.cra_blocksize = enc->cra_blocksize;
-       inst->alg.cra_alignmask = max(auth->cra_alignmask, enc->cra_alignmask);
+       inst->alg.cra_alignmask = auth->cra_alignmask | enc->cra_alignmask;
        inst->alg.cra_type = &crypto_aead_type;
 
-       inst->alg.cra_aead.ivsize = enc->cra_blkcipher.ivsize;
-       inst->alg.cra_aead.authsize = authsize;
+       inst->alg.cra_aead.ivsize = enc->cra_ablkcipher.ivsize;
+       inst->alg.cra_aead.maxauthsize = auth->cra_type == &crypto_hash_type ?
+                                        auth->cra_hash.digestsize :
+                                        auth->cra_digest.dia_digestsize;
 
        inst->alg.cra_ctxsize = sizeof(struct crypto_authenc_ctx);
 
@@ -347,18 +438,19 @@ static struct crypto_instance *crypto_authenc_alloc(struct rtattr **tb)
        inst->alg.cra_aead.setkey = crypto_authenc_setkey;
        inst->alg.cra_aead.encrypt = crypto_authenc_encrypt;
        inst->alg.cra_aead.decrypt = crypto_authenc_decrypt;
+       inst->alg.cra_aead.givencrypt = crypto_authenc_givencrypt;
 
 out:
-       crypto_mod_put(enc);
-out_put_auth:
        crypto_mod_put(auth);
        return inst;
 
+err_drop_enc:
+       crypto_drop_skcipher(&ctx->enc);
 err_drop_auth:
        crypto_drop_spawn(&ctx->auth);
 err_free_inst:
        kfree(inst);
-out_put_enc:
+out_put_auth:
        inst = ERR_PTR(err);
        goto out;
 }
@@ -367,7 +459,7 @@ static void crypto_authenc_free(struct crypto_instance *inst)
 {
        struct authenc_instance_ctx *ctx = crypto_instance_ctx(inst);
 
-       crypto_drop_spawn(&ctx->enc);
+       crypto_drop_skcipher(&ctx->enc);
        crypto_drop_spawn(&ctx->auth);
        kfree(inst);
 }
index f6c67f9d4e5c2f9085558991a6fc87893e269670..4a7e65c4df4dc3ac6af007efa1d136e1232be053 100644 (file)
@@ -14,7 +14,8 @@
  *
  */
 
-#include <linux/crypto.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/errno.h>
 #include <linux/hardirq.h>
 #include <linux/kernel.h>
@@ -25,7 +26,6 @@
 #include <linux/string.h>
 
 #include "internal.h"
-#include "scatterwalk.h"
 
 enum {
        BLKCIPHER_WALK_PHYS = 1 << 0,
@@ -433,9 +433,8 @@ static unsigned int crypto_blkcipher_ctxsize(struct crypto_alg *alg, u32 type,
        struct blkcipher_alg *cipher = &alg->cra_blkcipher;
        unsigned int len = alg->cra_ctxsize;
 
-       type ^= CRYPTO_ALG_ASYNC;
-       mask &= CRYPTO_ALG_ASYNC;
-       if ((type & mask) && cipher->ivsize) {
+       if ((mask & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_MASK &&
+           cipher->ivsize) {
                len = ALIGN(len, (unsigned long)alg->cra_alignmask + 1);
                len += cipher->ivsize;
        }
@@ -451,6 +450,11 @@ static int crypto_init_blkcipher_ops_async(struct crypto_tfm *tfm)
        crt->setkey = async_setkey;
        crt->encrypt = async_encrypt;
        crt->decrypt = async_decrypt;
+       if (!alg->ivsize) {
+               crt->givencrypt = skcipher_null_givencrypt;
+               crt->givdecrypt = skcipher_null_givdecrypt;
+       }
+       crt->base = __crypto_ablkcipher_cast(tfm);
        crt->ivsize = alg->ivsize;
 
        return 0;
@@ -482,9 +486,7 @@ static int crypto_init_blkcipher_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
        if (alg->ivsize > PAGE_SIZE / 8)
                return -EINVAL;
 
-       type ^= CRYPTO_ALG_ASYNC;
-       mask &= CRYPTO_ALG_ASYNC;
-       if (type & mask)
+       if ((mask & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_MASK)
                return crypto_init_blkcipher_ops_sync(tfm);
        else
                return crypto_init_blkcipher_ops_async(tfm);
@@ -499,6 +501,8 @@ static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
        seq_printf(m, "min keysize  : %u\n", alg->cra_blkcipher.min_keysize);
        seq_printf(m, "max keysize  : %u\n", alg->cra_blkcipher.max_keysize);
        seq_printf(m, "ivsize       : %u\n", alg->cra_blkcipher.ivsize);
+       seq_printf(m, "geniv        : %s\n", alg->cra_blkcipher.geniv ?:
+                                            "<default>");
 }
 
 const struct crypto_type crypto_blkcipher_type = {
@@ -510,5 +514,187 @@ const struct crypto_type crypto_blkcipher_type = {
 };
 EXPORT_SYMBOL_GPL(crypto_blkcipher_type);
 
+static int crypto_grab_nivcipher(struct crypto_skcipher_spawn *spawn,
+                               const char *name, u32 type, u32 mask)
+{
+       struct crypto_alg *alg;
+       int err;
+
+       type = crypto_skcipher_type(type);
+       mask = crypto_skcipher_mask(mask) | CRYPTO_ALG_GENIV;
+
+       alg = crypto_alg_mod_lookup(name, type, mask);
+       if (IS_ERR(alg))
+               return PTR_ERR(alg);
+
+       err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
+       crypto_mod_put(alg);
+       return err;
+}
+
+struct crypto_instance *skcipher_geniv_alloc(struct crypto_template *tmpl,
+                                            struct rtattr **tb, u32 type,
+                                            u32 mask)
+{
+       struct {
+               int (*setkey)(struct crypto_ablkcipher *tfm, const u8 *key,
+                             unsigned int keylen);
+               int (*encrypt)(struct ablkcipher_request *req);
+               int (*decrypt)(struct ablkcipher_request *req);
+
+               unsigned int min_keysize;
+               unsigned int max_keysize;
+               unsigned int ivsize;
+
+               const char *geniv;
+       } balg;
+       const char *name;
+       struct crypto_skcipher_spawn *spawn;
+       struct crypto_attr_type *algt;
+       struct crypto_instance *inst;
+       struct crypto_alg *alg;
+       int err;
+
+       algt = crypto_get_attr_type(tb);
+       err = PTR_ERR(algt);
+       if (IS_ERR(algt))
+               return ERR_PTR(err);
+
+       if ((algt->type ^ (CRYPTO_ALG_TYPE_GIVCIPHER | CRYPTO_ALG_GENIV)) &
+           algt->mask)
+               return ERR_PTR(-EINVAL);
+
+       name = crypto_attr_alg_name(tb[1]);
+       err = PTR_ERR(name);
+       if (IS_ERR(name))
+               return ERR_PTR(err);
+
+       inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+       if (!inst)
+               return ERR_PTR(-ENOMEM);
+
+       spawn = crypto_instance_ctx(inst);
+
+       /* Ignore async algorithms if necessary. */
+       mask |= crypto_requires_sync(algt->type, algt->mask);
+
+       crypto_set_skcipher_spawn(spawn, inst);
+       err = crypto_grab_nivcipher(spawn, name, type, mask);
+       if (err)
+               goto err_free_inst;
+
+       alg = crypto_skcipher_spawn_alg(spawn);
+
+       if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+           CRYPTO_ALG_TYPE_BLKCIPHER) {
+               balg.ivsize = alg->cra_blkcipher.ivsize;
+               balg.min_keysize = alg->cra_blkcipher.min_keysize;
+               balg.max_keysize = alg->cra_blkcipher.max_keysize;
+
+               balg.setkey = async_setkey;
+               balg.encrypt = async_encrypt;
+               balg.decrypt = async_decrypt;
+
+               balg.geniv = alg->cra_blkcipher.geniv;
+       } else {
+               balg.ivsize = alg->cra_ablkcipher.ivsize;
+               balg.min_keysize = alg->cra_ablkcipher.min_keysize;
+               balg.max_keysize = alg->cra_ablkcipher.max_keysize;
+
+               balg.setkey = alg->cra_ablkcipher.setkey;
+               balg.encrypt = alg->cra_ablkcipher.encrypt;
+               balg.decrypt = alg->cra_ablkcipher.decrypt;
+
+               balg.geniv = alg->cra_ablkcipher.geniv;
+       }
+
+       err = -EINVAL;
+       if (!balg.ivsize)
+               goto err_drop_alg;
+
+       /*
+        * This is only true if we're constructing an algorithm with its
+        * default IV generator.  For the default generator we elide the
+        * template name and double-check the IV generator.
+        */
+       if (algt->mask & CRYPTO_ALG_GENIV) {
+               if (!balg.geniv)
+                       balg.geniv = crypto_default_geniv(alg);
+               err = -EAGAIN;
+               if (strcmp(tmpl->name, balg.geniv))
+                       goto err_drop_alg;
+
+               memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
+               memcpy(inst->alg.cra_driver_name, alg->cra_driver_name,
+                      CRYPTO_MAX_ALG_NAME);
+       } else {
+               err = -ENAMETOOLONG;
+               if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
+                            "%s(%s)", tmpl->name, alg->cra_name) >=
+                   CRYPTO_MAX_ALG_NAME)
+                       goto err_drop_alg;
+               if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+                            "%s(%s)", tmpl->name, alg->cra_driver_name) >=
+                   CRYPTO_MAX_ALG_NAME)
+                       goto err_drop_alg;
+       }
+
+       inst->alg.cra_flags = CRYPTO_ALG_TYPE_GIVCIPHER | CRYPTO_ALG_GENIV;
+       inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
+       inst->alg.cra_priority = alg->cra_priority;
+       inst->alg.cra_blocksize = alg->cra_blocksize;
+       inst->alg.cra_alignmask = alg->cra_alignmask;
+       inst->alg.cra_type = &crypto_givcipher_type;
+
+       inst->alg.cra_ablkcipher.ivsize = balg.ivsize;
+       inst->alg.cra_ablkcipher.min_keysize = balg.min_keysize;
+       inst->alg.cra_ablkcipher.max_keysize = balg.max_keysize;
+       inst->alg.cra_ablkcipher.geniv = balg.geniv;
+
+       inst->alg.cra_ablkcipher.setkey = balg.setkey;
+       inst->alg.cra_ablkcipher.encrypt = balg.encrypt;
+       inst->alg.cra_ablkcipher.decrypt = balg.decrypt;
+
+out:
+       return inst;
+
+err_drop_alg:
+       crypto_drop_skcipher(spawn);
+err_free_inst:
+       kfree(inst);
+       inst = ERR_PTR(err);
+       goto out;
+}
+EXPORT_SYMBOL_GPL(skcipher_geniv_alloc);
+
+void skcipher_geniv_free(struct crypto_instance *inst)
+{
+       crypto_drop_skcipher(crypto_instance_ctx(inst));
+       kfree(inst);
+}
+EXPORT_SYMBOL_GPL(skcipher_geniv_free);
+
+int skcipher_geniv_init(struct crypto_tfm *tfm)
+{
+       struct crypto_instance *inst = (void *)tfm->__crt_alg;
+       struct crypto_ablkcipher *cipher;
+
+       cipher = crypto_spawn_skcipher(crypto_instance_ctx(inst));
+       if (IS_ERR(cipher))
+               return PTR_ERR(cipher);
+
+       tfm->crt_ablkcipher.base = cipher;
+       tfm->crt_ablkcipher.reqsize += crypto_ablkcipher_reqsize(cipher);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(skcipher_geniv_init);
+
+void skcipher_geniv_exit(struct crypto_tfm *tfm)
+{
+       crypto_free_ablkcipher(tfm->crt_ablkcipher.base);
+}
+EXPORT_SYMBOL_GPL(skcipher_geniv_exit);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Generic block chaining cipher type");
index 6877ecfd90bb8b2d34f2dce3bab6e6689830830e..493fee7e0a8b46bbd9933d92681e0a7c16d0ab81 100644 (file)
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-
-#define CAMELLIA_MIN_KEY_SIZE        16
-#define CAMELLIA_MAX_KEY_SIZE        32
-#define CAMELLIA_BLOCK_SIZE 16
-#define CAMELLIA_TABLE_BYTE_LEN 272
-#define CAMELLIA_TABLE_WORD_LEN (CAMELLIA_TABLE_BYTE_LEN / 4)
-
-typedef u32 KEY_TABLE_TYPE[CAMELLIA_TABLE_WORD_LEN];
-
-
-/* key constants */
-
-#define CAMELLIA_SIGMA1L (0xA09E667FL)
-#define CAMELLIA_SIGMA1R (0x3BCC908BL)
-#define CAMELLIA_SIGMA2L (0xB67AE858L)
-#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
-#define CAMELLIA_SIGMA3L (0xC6EF372FL)
-#define CAMELLIA_SIGMA3R (0xE94F82BEL)
-#define CAMELLIA_SIGMA4L (0x54FF53A5L)
-#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
-#define CAMELLIA_SIGMA5L (0x10E527FAL)
-#define CAMELLIA_SIGMA5R (0xDE682D1DL)
-#define CAMELLIA_SIGMA6L (0xB05688C2L)
-#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
-
-struct camellia_ctx {
-       int key_length;
-       KEY_TABLE_TYPE key_table;
-};
-
-
-/*
- *  macros
- */
-
-
-# define GETU32(pt) (((u32)(pt)[0] << 24)      \
-                    ^ ((u32)(pt)[1] << 16)     \
-                    ^ ((u32)(pt)[2] <<  8)     \
-                    ^ ((u32)(pt)[3]))
-
-#define COPY4WORD(dst, src)                    \
-    do {                                       \
-       (dst)[0]=(src)[0];                      \
-       (dst)[1]=(src)[1];                      \
-       (dst)[2]=(src)[2];                      \
-       (dst)[3]=(src)[3];                      \
-    }while(0)
-
-#define SWAP4WORD(word)                                \
-    do {                                       \
-       CAMELLIA_SWAP4((word)[0]);              \
-       CAMELLIA_SWAP4((word)[1]);              \
-       CAMELLIA_SWAP4((word)[2]);              \
-       CAMELLIA_SWAP4((word)[3]);              \
-    }while(0)
-
-#define XOR4WORD(a, b)/* a = a ^ b */          \
-    do {                                       \
-       (a)[0]^=(b)[0];                         \
-       (a)[1]^=(b)[1];                         \
-       (a)[2]^=(b)[2];                         \
-       (a)[3]^=(b)[3];                         \
-    }while(0)
-
-#define XOR4WORD2(a, b, c)/* a = b ^ c */      \
-    do {                                       \
-       (a)[0]=(b)[0]^(c)[0];                   \
-       (a)[1]=(b)[1]^(c)[1];                   \
-       (a)[2]=(b)[2]^(c)[2];                   \
-       (a)[3]=(b)[3]^(c)[3];                   \
-    }while(0)
-
-#define CAMELLIA_SUBKEY_L(INDEX) (subkey[(INDEX)*2])
-#define CAMELLIA_SUBKEY_R(INDEX) (subkey[(INDEX)*2 + 1])
-
-/* rotation right shift 1byte */
-#define CAMELLIA_RR8(x) (((x) >> 8) + ((x) << 24))
-/* rotation left shift 1bit */
-#define CAMELLIA_RL1(x) (((x) << 1) + ((x) >> 31))
-/* rotation left shift 1byte */
-#define CAMELLIA_RL8(x) (((x) << 8) + ((x) >> 24))
-
-#define CAMELLIA_ROLDQ(ll, lr, rl, rr, w0, w1, bits)   \
-    do {                                               \
-       w0 = ll;                                        \
-       ll = (ll << bits) + (lr >> (32 - bits));        \
-       lr = (lr << bits) + (rl >> (32 - bits));        \
-       rl = (rl << bits) + (rr >> (32 - bits));        \
-       rr = (rr << bits) + (w0 >> (32 - bits));        \
-    } while(0)
-
-#define CAMELLIA_ROLDQo32(ll, lr, rl, rr, w0, w1, bits)        \
-    do {                                               \
-       w0 = ll;                                        \
-       w1 = lr;                                        \
-       ll = (lr << (bits - 32)) + (rl >> (64 - bits)); \
-       lr = (rl << (bits - 32)) + (rr >> (64 - bits)); \
-       rl = (rr << (bits - 32)) + (w0 >> (64 - bits)); \
-       rr = (w0 << (bits - 32)) + (w1 >> (64 - bits)); \
-    } while(0)
-
-#define CAMELLIA_SP1110(INDEX) (camellia_sp1110[(INDEX)])
-#define CAMELLIA_SP0222(INDEX) (camellia_sp0222[(INDEX)])
-#define CAMELLIA_SP3033(INDEX) (camellia_sp3033[(INDEX)])
-#define CAMELLIA_SP4404(INDEX) (camellia_sp4404[(INDEX)])
-
-#define CAMELLIA_F(xl, xr, kl, kr, yl, yr, il, ir, t0, t1)     \
-    do {                                                       \
-       il = xl ^ kl;                                           \
-       ir = xr ^ kr;                                           \
-       t0 = il >> 16;                                          \
-       t1 = ir >> 16;                                          \
-       yl = CAMELLIA_SP1110(ir & 0xff)                         \
-           ^ CAMELLIA_SP0222((t1 >> 8) & 0xff)                 \
-           ^ CAMELLIA_SP3033(t1 & 0xff)                        \
-           ^ CAMELLIA_SP4404((ir >> 8) & 0xff);                \
-       yr = CAMELLIA_SP1110((t0 >> 8) & 0xff)                  \
-           ^ CAMELLIA_SP0222(t0 & 0xff)                        \
-           ^ CAMELLIA_SP3033((il >> 8) & 0xff)                 \
-           ^ CAMELLIA_SP4404(il & 0xff);                       \
-       yl ^= yr;                                               \
-       yr = CAMELLIA_RR8(yr);                                  \
-       yr ^= yl;                                               \
-    } while(0)
-
-
-/*
- * for speed up
- *
- */
-#define CAMELLIA_FLS(ll, lr, rl, rr, kll, klr, krl, krr, t0, t1, t2, t3) \
-    do {                                                               \
-       t0 = kll;                                                       \
-       t2 = krr;                                                       \
-       t0 &= ll;                                                       \
-       t2 |= rr;                                                       \
-       rl ^= t2;                                                       \
-       lr ^= CAMELLIA_RL1(t0);                                         \
-       t3 = krl;                                                       \
-       t1 = klr;                                                       \
-       t3 &= rl;                                                       \
-       t1 |= lr;                                                       \
-       ll ^= t1;                                                       \
-       rr ^= CAMELLIA_RL1(t3);                                         \
-    } while(0)
-
-#define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1)       \
-    do {                                                               \
-       ir =  CAMELLIA_SP1110(xr & 0xff);                               \
-       il =  CAMELLIA_SP1110((xl>>24) & 0xff);                         \
-       ir ^= CAMELLIA_SP0222((xr>>24) & 0xff);                         \
-       il ^= CAMELLIA_SP0222((xl>>16) & 0xff);                         \
-       ir ^= CAMELLIA_SP3033((xr>>16) & 0xff);                         \
-       il ^= CAMELLIA_SP3033((xl>>8) & 0xff);                          \
-       ir ^= CAMELLIA_SP4404((xr>>8) & 0xff);                          \
-       il ^= CAMELLIA_SP4404(xl & 0xff);                               \
-       il ^= kl;                                                       \
-       ir ^= il ^ kr;                                                  \
-       yl ^= ir;                                                       \
-       yr ^= CAMELLIA_RR8(il) ^ ir;                                    \
-    } while(0)
-
-/**
- * Stuff related to the Camellia key schedule
- */
-#define SUBL(x) subL[(x)]
-#define SUBR(x) subR[(x)]
-
-
 static const u32 camellia_sp1110[256] = {
        0x70707000,0x82828200,0x2c2c2c00,0xececec00,
        0xb3b3b300,0x27272700,0xc0c0c000,0xe5e5e500,
@@ -475,67 +305,348 @@ static const u32 camellia_sp4404[256] = {
 };
 
 
+#define CAMELLIA_MIN_KEY_SIZE        16
+#define CAMELLIA_MAX_KEY_SIZE        32
+#define CAMELLIA_BLOCK_SIZE          16
+#define CAMELLIA_TABLE_BYTE_LEN     272
+
+/*
+ * NB: L and R below stand for 'left' and 'right' as in written numbers.
+ * That is, in (xxxL,xxxR) pair xxxL holds most significant digits,
+ * _not_ least significant ones!
+ */
+
+
+/* key constants */
+
+#define CAMELLIA_SIGMA1L (0xA09E667FL)
+#define CAMELLIA_SIGMA1R (0x3BCC908BL)
+#define CAMELLIA_SIGMA2L (0xB67AE858L)
+#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
+#define CAMELLIA_SIGMA3L (0xC6EF372FL)
+#define CAMELLIA_SIGMA3R (0xE94F82BEL)
+#define CAMELLIA_SIGMA4L (0x54FF53A5L)
+#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
+#define CAMELLIA_SIGMA5L (0x10E527FAL)
+#define CAMELLIA_SIGMA5R (0xDE682D1DL)
+#define CAMELLIA_SIGMA6L (0xB05688C2L)
+#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
+
+/*
+ *  macros
+ */
+#define GETU32(v, pt) \
+    do { \
+       /* latest breed of gcc is clever enough to use move */ \
+       memcpy(&(v), (pt), 4); \
+       (v) = be32_to_cpu(v); \
+    } while(0)
+
+/* rotation right shift 1byte */
+#define ROR8(x) (((x) >> 8) + ((x) << 24))
+/* rotation left shift 1bit */
+#define ROL1(x) (((x) << 1) + ((x) >> 31))
+/* rotation left shift 1byte */
+#define ROL8(x) (((x) << 8) + ((x) >> 24))
+
+#define ROLDQ(ll, lr, rl, rr, w0, w1, bits)            \
+    do {                                               \
+       w0 = ll;                                        \
+       ll = (ll << bits) + (lr >> (32 - bits));        \
+       lr = (lr << bits) + (rl >> (32 - bits));        \
+       rl = (rl << bits) + (rr >> (32 - bits));        \
+       rr = (rr << bits) + (w0 >> (32 - bits));        \
+    } while(0)
+
+#define ROLDQo32(ll, lr, rl, rr, w0, w1, bits)         \
+    do {                                               \
+       w0 = ll;                                        \
+       w1 = lr;                                        \
+       ll = (lr << (bits - 32)) + (rl >> (64 - bits)); \
+       lr = (rl << (bits - 32)) + (rr >> (64 - bits)); \
+       rl = (rr << (bits - 32)) + (w0 >> (64 - bits)); \
+       rr = (w0 << (bits - 32)) + (w1 >> (64 - bits)); \
+    } while(0)
+
+#define CAMELLIA_F(xl, xr, kl, kr, yl, yr, il, ir, t0, t1)     \
+    do {                                                       \
+       il = xl ^ kl;                                           \
+       ir = xr ^ kr;                                           \
+       t0 = il >> 16;                                          \
+       t1 = ir >> 16;                                          \
+       yl = camellia_sp1110[(u8)(ir     )]                     \
+          ^ camellia_sp0222[    (t1 >> 8)]                     \
+          ^ camellia_sp3033[(u8)(t1     )]                     \
+          ^ camellia_sp4404[(u8)(ir >> 8)];                    \
+       yr = camellia_sp1110[    (t0 >> 8)]                     \
+          ^ camellia_sp0222[(u8)(t0     )]                     \
+          ^ camellia_sp3033[(u8)(il >> 8)]                     \
+          ^ camellia_sp4404[(u8)(il     )];                    \
+       yl ^= yr;                                               \
+       yr = ROR8(yr);                                          \
+       yr ^= yl;                                               \
+    } while(0)
+
+#define SUBKEY_L(INDEX) (subkey[(INDEX)*2])
+#define SUBKEY_R(INDEX) (subkey[(INDEX)*2 + 1])
+
+static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max)
+{
+       u32 dw, tl, tr;
+       u32 kw4l, kw4r;
+       int i;
+
+       /* absorb kw2 to other subkeys */
+       /* round 2 */
+       subL[3] ^= subL[1]; subR[3] ^= subR[1];
+       /* round 4 */
+       subL[5] ^= subL[1]; subR[5] ^= subR[1];
+       /* round 6 */
+       subL[7] ^= subL[1]; subR[7] ^= subR[1];
+       subL[1] ^= subR[1] & ~subR[9];
+       dw = subL[1] & subL[9],
+               subR[1] ^= ROL1(dw); /* modified for FLinv(kl2) */
+       /* round 8 */
+       subL[11] ^= subL[1]; subR[11] ^= subR[1];
+       /* round 10 */
+       subL[13] ^= subL[1]; subR[13] ^= subR[1];
+       /* round 12 */
+       subL[15] ^= subL[1]; subR[15] ^= subR[1];
+       subL[1] ^= subR[1] & ~subR[17];
+       dw = subL[1] & subL[17],
+               subR[1] ^= ROL1(dw); /* modified for FLinv(kl4) */
+       /* round 14 */
+       subL[19] ^= subL[1]; subR[19] ^= subR[1];
+       /* round 16 */
+       subL[21] ^= subL[1]; subR[21] ^= subR[1];
+       /* round 18 */
+       subL[23] ^= subL[1]; subR[23] ^= subR[1];
+       if (max == 24) {
+               /* kw3 */
+               subL[24] ^= subL[1]; subR[24] ^= subR[1];
+
+       /* absorb kw4 to other subkeys */
+               kw4l = subL[25]; kw4r = subR[25];
+       } else {
+               subL[1] ^= subR[1] & ~subR[25];
+               dw = subL[1] & subL[25],
+                       subR[1] ^= ROL1(dw); /* modified for FLinv(kl6) */
+               /* round 20 */
+               subL[27] ^= subL[1]; subR[27] ^= subR[1];
+               /* round 22 */
+               subL[29] ^= subL[1]; subR[29] ^= subR[1];
+               /* round 24 */
+               subL[31] ^= subL[1]; subR[31] ^= subR[1];
+               /* kw3 */
+               subL[32] ^= subL[1]; subR[32] ^= subR[1];
+
+       /* absorb kw4 to other subkeys */
+               kw4l = subL[33]; kw4r = subR[33];
+               /* round 23 */
+               subL[30] ^= kw4l; subR[30] ^= kw4r;
+               /* round 21 */
+               subL[28] ^= kw4l; subR[28] ^= kw4r;
+               /* round 19 */
+               subL[26] ^= kw4l; subR[26] ^= kw4r;
+               kw4l ^= kw4r & ~subR[24];
+               dw = kw4l & subL[24],
+                       kw4r ^= ROL1(dw); /* modified for FL(kl5) */
+       }
+       /* round 17 */
+       subL[22] ^= kw4l; subR[22] ^= kw4r;
+       /* round 15 */
+       subL[20] ^= kw4l; subR[20] ^= kw4r;
+       /* round 13 */
+       subL[18] ^= kw4l; subR[18] ^= kw4r;
+       kw4l ^= kw4r & ~subR[16];
+       dw = kw4l & subL[16],
+               kw4r ^= ROL1(dw); /* modified for FL(kl3) */
+       /* round 11 */
+       subL[14] ^= kw4l; subR[14] ^= kw4r;
+       /* round 9 */
+       subL[12] ^= kw4l; subR[12] ^= kw4r;
+       /* round 7 */
+       subL[10] ^= kw4l; subR[10] ^= kw4r;
+       kw4l ^= kw4r & ~subR[8];
+       dw = kw4l & subL[8],
+               kw4r ^= ROL1(dw); /* modified for FL(kl1) */
+       /* round 5 */
+       subL[6] ^= kw4l; subR[6] ^= kw4r;
+       /* round 3 */
+       subL[4] ^= kw4l; subR[4] ^= kw4r;
+       /* round 1 */
+       subL[2] ^= kw4l; subR[2] ^= kw4r;
+       /* kw1 */
+       subL[0] ^= kw4l; subR[0] ^= kw4r;
+
+       /* key XOR is end of F-function */
+       SUBKEY_L(0) = subL[0] ^ subL[2];/* kw1 */
+       SUBKEY_R(0) = subR[0] ^ subR[2];
+       SUBKEY_L(2) = subL[3];       /* round 1 */
+       SUBKEY_R(2) = subR[3];
+       SUBKEY_L(3) = subL[2] ^ subL[4]; /* round 2 */
+       SUBKEY_R(3) = subR[2] ^ subR[4];
+       SUBKEY_L(4) = subL[3] ^ subL[5]; /* round 3 */
+       SUBKEY_R(4) = subR[3] ^ subR[5];
+       SUBKEY_L(5) = subL[4] ^ subL[6]; /* round 4 */
+       SUBKEY_R(5) = subR[4] ^ subR[6];
+       SUBKEY_L(6) = subL[5] ^ subL[7]; /* round 5 */
+       SUBKEY_R(6) = subR[5] ^ subR[7];
+       tl = subL[10] ^ (subR[10] & ~subR[8]);
+       dw = tl & subL[8],  /* FL(kl1) */
+               tr = subR[10] ^ ROL1(dw);
+       SUBKEY_L(7) = subL[6] ^ tl; /* round 6 */
+       SUBKEY_R(7) = subR[6] ^ tr;
+       SUBKEY_L(8) = subL[8];       /* FL(kl1) */
+       SUBKEY_R(8) = subR[8];
+       SUBKEY_L(9) = subL[9];       /* FLinv(kl2) */
+       SUBKEY_R(9) = subR[9];
+       tl = subL[7] ^ (subR[7] & ~subR[9]);
+       dw = tl & subL[9],  /* FLinv(kl2) */
+               tr = subR[7] ^ ROL1(dw);
+       SUBKEY_L(10) = tl ^ subL[11]; /* round 7 */
+       SUBKEY_R(10) = tr ^ subR[11];
+       SUBKEY_L(11) = subL[10] ^ subL[12]; /* round 8 */
+       SUBKEY_R(11) = subR[10] ^ subR[12];
+       SUBKEY_L(12) = subL[11] ^ subL[13]; /* round 9 */
+       SUBKEY_R(12) = subR[11] ^ subR[13];
+       SUBKEY_L(13) = subL[12] ^ subL[14]; /* round 10 */
+       SUBKEY_R(13) = subR[12] ^ subR[14];
+       SUBKEY_L(14) = subL[13] ^ subL[15]; /* round 11 */
+       SUBKEY_R(14) = subR[13] ^ subR[15];
+       tl = subL[18] ^ (subR[18] & ~subR[16]);
+       dw = tl & subL[16], /* FL(kl3) */
+               tr = subR[18] ^ ROL1(dw);
+       SUBKEY_L(15) = subL[14] ^ tl; /* round 12 */
+       SUBKEY_R(15) = subR[14] ^ tr;
+       SUBKEY_L(16) = subL[16];     /* FL(kl3) */
+       SUBKEY_R(16) = subR[16];
+       SUBKEY_L(17) = subL[17];     /* FLinv(kl4) */
+       SUBKEY_R(17) = subR[17];
+       tl = subL[15] ^ (subR[15] & ~subR[17]);
+       dw = tl & subL[17], /* FLinv(kl4) */
+               tr = subR[15] ^ ROL1(dw);
+       SUBKEY_L(18) = tl ^ subL[19]; /* round 13 */
+       SUBKEY_R(18) = tr ^ subR[19];
+       SUBKEY_L(19) = subL[18] ^ subL[20]; /* round 14 */
+       SUBKEY_R(19) = subR[18] ^ subR[20];
+       SUBKEY_L(20) = subL[19] ^ subL[21]; /* round 15 */
+       SUBKEY_R(20) = subR[19] ^ subR[21];
+       SUBKEY_L(21) = subL[20] ^ subL[22]; /* round 16 */
+       SUBKEY_R(21) = subR[20] ^ subR[22];
+       SUBKEY_L(22) = subL[21] ^ subL[23]; /* round 17 */
+       SUBKEY_R(22) = subR[21] ^ subR[23];
+       if (max == 24) {
+               SUBKEY_L(23) = subL[22];     /* round 18 */
+               SUBKEY_R(23) = subR[22];
+               SUBKEY_L(24) = subL[24] ^ subL[23]; /* kw3 */
+               SUBKEY_R(24) = subR[24] ^ subR[23];
+       } else {
+               tl = subL[26] ^ (subR[26] & ~subR[24]);
+               dw = tl & subL[24], /* FL(kl5) */
+                       tr = subR[26] ^ ROL1(dw);
+               SUBKEY_L(23) = subL[22] ^ tl; /* round 18 */
+               SUBKEY_R(23) = subR[22] ^ tr;
+               SUBKEY_L(24) = subL[24];     /* FL(kl5) */
+               SUBKEY_R(24) = subR[24];
+               SUBKEY_L(25) = subL[25];     /* FLinv(kl6) */
+               SUBKEY_R(25) = subR[25];
+               tl = subL[23] ^ (subR[23] & ~subR[25]);
+               dw = tl & subL[25], /* FLinv(kl6) */
+                       tr = subR[23] ^ ROL1(dw);
+               SUBKEY_L(26) = tl ^ subL[27]; /* round 19 */
+               SUBKEY_R(26) = tr ^ subR[27];
+               SUBKEY_L(27) = subL[26] ^ subL[28]; /* round 20 */
+               SUBKEY_R(27) = subR[26] ^ subR[28];
+               SUBKEY_L(28) = subL[27] ^ subL[29]; /* round 21 */
+               SUBKEY_R(28) = subR[27] ^ subR[29];
+               SUBKEY_L(29) = subL[28] ^ subL[30]; /* round 22 */
+               SUBKEY_R(29) = subR[28] ^ subR[30];
+               SUBKEY_L(30) = subL[29] ^ subL[31]; /* round 23 */
+               SUBKEY_R(30) = subR[29] ^ subR[31];
+               SUBKEY_L(31) = subL[30];     /* round 24 */
+               SUBKEY_R(31) = subR[30];
+               SUBKEY_L(32) = subL[32] ^ subL[31]; /* kw3 */
+               SUBKEY_R(32) = subR[32] ^ subR[31];
+       }
+
+       /* apply the inverse of the last half of P-function */
+       i = 2;
+       do {
+               dw = SUBKEY_L(i + 0) ^ SUBKEY_R(i + 0); dw = ROL8(dw);/* round 1 */
+               SUBKEY_R(i + 0) = SUBKEY_L(i + 0) ^ dw; SUBKEY_L(i + 0) = dw;
+               dw = SUBKEY_L(i + 1) ^ SUBKEY_R(i + 1); dw = ROL8(dw);/* round 2 */
+               SUBKEY_R(i + 1) = SUBKEY_L(i + 1) ^ dw; SUBKEY_L(i + 1) = dw;
+               dw = SUBKEY_L(i + 2) ^ SUBKEY_R(i + 2); dw = ROL8(dw);/* round 3 */
+               SUBKEY_R(i + 2) = SUBKEY_L(i + 2) ^ dw; SUBKEY_L(i + 2) = dw;
+               dw = SUBKEY_L(i + 3) ^ SUBKEY_R(i + 3); dw = ROL8(dw);/* round 4 */
+               SUBKEY_R(i + 3) = SUBKEY_L(i + 3) ^ dw; SUBKEY_L(i + 3) = dw;
+               dw = SUBKEY_L(i + 4) ^ SUBKEY_R(i + 4); dw = ROL8(dw);/* round 5 */
+               SUBKEY_R(i + 4) = SUBKEY_L(i + 4) ^ dw; SUBKEY_L(i + 4) = dw;
+               dw = SUBKEY_L(i + 5) ^ SUBKEY_R(i + 5); dw = ROL8(dw);/* round 6 */
+               SUBKEY_R(i + 5) = SUBKEY_L(i + 5) ^ dw; SUBKEY_L(i + 5) = dw;
+               i += 8;
+       } while (i < max);
+}
 
 static void camellia_setup128(const unsigned char *key, u32 *subkey)
 {
        u32 kll, klr, krl, krr;
        u32 il, ir, t0, t1, w0, w1;
-       u32 kw4l, kw4r, dw, tl, tr;
        u32 subL[26];
        u32 subR[26];
 
        /**
-        *  k == kll || klr || krl || krr (|| is concatination)
-        */
-       kll = GETU32(key     );
-       klr = GETU32(key +  4);
-       krl = GETU32(key +  8);
-       krr = GETU32(key + 12);
-       /**
-        * generate KL dependent subkeys
+        *  k == kll || klr || krl || krr (|| is concatenation)
         */
+       GETU32(kll, key     );
+       GETU32(klr, key +  4);
+       GETU32(krl, key +  8);
+       GETU32(krr, key + 12);
+
+       /* generate KL dependent subkeys */
        /* kw1 */
-       SUBL(0) = kll; SUBR(0) = klr;
+       subL[0] = kll; subR[0] = klr;
        /* kw2 */
-       SUBL(1) = krl; SUBR(1) = krr;
+       subL[1] = krl; subR[1] = krr;
        /* rotation left shift 15bit */
-       CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+       ROLDQ(kll, klr, krl, krr, w0, w1, 15);
        /* k3 */
-       SUBL(4) = kll; SUBR(4) = klr;
+       subL[4] = kll; subR[4] = klr;
        /* k4 */
-       SUBL(5) = krl; SUBR(5) = krr;
+       subL[5] = krl; subR[5] = krr;
        /* rotation left shift 15+30bit */
-       CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30);
+       ROLDQ(kll, klr, krl, krr, w0, w1, 30);
        /* k7 */
-       SUBL(10) = kll; SUBR(10) = klr;
+       subL[10] = kll; subR[10] = klr;
        /* k8 */
-       SUBL(11) = krl; SUBR(11) = krr;
+       subL[11] = krl; subR[11] = krr;
        /* rotation left shift 15+30+15bit */
-       CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+       ROLDQ(kll, klr, krl, krr, w0, w1, 15);
        /* k10 */
-       SUBL(13) = krl; SUBR(13) = krr;
+       subL[13] = krl; subR[13] = krr;
        /* rotation left shift 15+30+15+17 bit */
-       CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+       ROLDQ(kll, klr, krl, krr, w0, w1, 17);
        /* kl3 */
-       SUBL(16) = kll; SUBR(16) = klr;
+       subL[16] = kll; subR[16] = klr;
        /* kl4 */
-       SUBL(17) = krl; SUBR(17) = krr;
+       subL[17] = krl; subR[17] = krr;
        /* rotation left shift 15+30+15+17+17 bit */
-       CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+       ROLDQ(kll, klr, krl, krr, w0, w1, 17);
        /* k13 */
-       SUBL(18) = kll; SUBR(18) = klr;
+       subL[18] = kll; subR[18] = klr;
        /* k14 */
-       SUBL(19) = krl; SUBR(19) = krr;
+       subL[19] = krl; subR[19] = krr;
        /* rotation left shift 15+30+15+17+17+17 bit */
-       CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+       ROLDQ(kll, klr, krl, krr, w0, w1, 17);
        /* k17 */
-       SUBL(22) = kll; SUBR(22) = klr;
+       subL[22] = kll; subR[22] = klr;
        /* k18 */
-       SUBL(23) = krl; SUBR(23) = krr;
+       subL[23] = krl; subR[23] = krr;
 
        /* generate KA */
-       kll = SUBL(0); klr = SUBR(0);
-       krl = SUBL(1); krr = SUBR(1);
+       kll = subL[0]; klr = subR[0];
+       krl = subL[1]; krr = subR[1];
        CAMELLIA_F(kll, klr,
                   CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R,
                   w0, w1, il, ir, t0, t1);
@@ -555,306 +666,108 @@ static void camellia_setup128(const unsigned char *key, u32 *subkey)
 
        /* generate KA dependent subkeys */
        /* k1, k2 */
-       SUBL(2) = kll; SUBR(2) = klr;
-       SUBL(3) = krl; SUBR(3) = krr;
-       CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+       subL[2] = kll; subR[2] = klr;
+