Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 21:04:16 +0000 (14:04 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 21:04:16 +0000 (14:04 -0700)
Pull crypto updates from Herbert Xu:
 "Here is the crypto update for 4.9:

  API:
   - The crypto engine code now supports hashes.

  Algorithms:
   - Allow keys >= 2048 bits in FIPS mode for RSA.

  Drivers:
   - Memory overwrite fix for vmx ghash.
   - Add support for building ARM sha1-neon in Thumb2 mode.
   - Reenable ARM ghash-ce code by adding import/export.
   - Reenable img-hash by adding import/export.
   - Add support for multiple cores in omap-aes.
   - Add little-endian support for sha1-powerpc.
   - Add Cavium HWRNG driver for ThunderX SoC"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (137 commits)
  crypto: caam - treat SGT address pointer as u64
  crypto: ccp - Make syslog errors human-readable
  crypto: ccp - clean up data structure
  crypto: vmx - Ensure ghash-generic is enabled
  crypto: testmgr - add guard to dst buffer for ahash_export
  crypto: caam - Unmap region obtained by of_iomap
  crypto: sha1-powerpc - little-endian support
  crypto: gcm - Fix IV buffer size in crypto_gcm_setkey
  crypto: vmx - Fix memory corruption caused by p8_ghash
  crypto: ghash-generic - move common definitions to a new header file
  crypto: caam - fix sg dump
  hwrng: omap - Only fail if pm_runtime_get_sync returns < 0
  crypto: omap-sham - shrink the internal buffer size
  crypto: omap-sham - add support for export/import
  crypto: omap-sham - convert driver logic to use sgs for data xmit
  crypto: omap-sham - change the DMA threshold value to a define
  crypto: omap-sham - add support functions for sg based data handling
  crypto: omap-sham - rename sgl to sgl_tmp for deprecation
  crypto: omap-sham - align algorithms on word offset
  crypto: omap-sham - add context export/import stubs
  ...

77 files changed:
Documentation/DocBook/crypto-API.tmpl
arch/arm/crypto/ghash-ce-glue.c
arch/arm/crypto/sha1-armv7-neon.S
arch/powerpc/crypto/sha1-powerpc-asm.S
crypto/algif_hash.c
crypto/crct10dif_generic.c
crypto/crypto_engine.c
crypto/drbg.c
crypto/gcm.c
crypto/ghash-generic.c
crypto/mcryptd.c
crypto/rsa_helper.c
crypto/testmgr.c
crypto/testmgr.h
crypto/xor.c
crypto/xts.c
drivers/char/hw_random/Kconfig
drivers/char/hw_random/Makefile
drivers/char/hw_random/amd-rng.c
drivers/char/hw_random/bcm2835-rng.c
drivers/char/hw_random/cavium-rng-vf.c [new file with mode: 0644]
drivers/char/hw_random/cavium-rng.c [new file with mode: 0644]
drivers/char/hw_random/core.c
drivers/char/hw_random/geode-rng.c
drivers/char/hw_random/meson-rng.c
drivers/char/hw_random/omap-rng.c
drivers/char/hw_random/omap3-rom-rng.c
drivers/char/hw_random/pasemi-rng.c
drivers/char/hw_random/pic32-rng.c
drivers/char/hw_random/st-rng.c
drivers/char/hw_random/tx4939-rng.c
drivers/crypto/Kconfig
drivers/crypto/caam/caamalg.c
drivers/crypto/caam/caamhash.c
drivers/crypto/caam/ctrl.c
drivers/crypto/caam/desc.h
drivers/crypto/caam/desc_constr.h
drivers/crypto/caam/intern.h
drivers/crypto/caam/jr.c
drivers/crypto/caam/regs.h
drivers/crypto/caam/sg_sw_sec4.h
drivers/crypto/ccp/Makefile
drivers/crypto/ccp/ccp-crypto-sha.c
drivers/crypto/ccp/ccp-dev-v3.c
drivers/crypto/ccp/ccp-dev-v5.c [new file with mode: 0644]
drivers/crypto/ccp/ccp-dev.c
drivers/crypto/ccp/ccp-dev.h
drivers/crypto/ccp/ccp-dmaengine.c
drivers/crypto/ccp/ccp-ops.c
drivers/crypto/ccp/ccp-pci.c
drivers/crypto/hifn_795x.c
drivers/crypto/img-hash.c
drivers/crypto/ixp4xx_crypto.c
drivers/crypto/marvell/cesa.c
drivers/crypto/marvell/hash.c
drivers/crypto/marvell/tdma.c
drivers/crypto/mv_cesa.c
drivers/crypto/mxc-scc.c
drivers/crypto/omap-aes.c
drivers/crypto/omap-des.c
drivers/crypto/omap-sham.c
drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
drivers/crypto/qat/qat_common/adf_admin.c
drivers/crypto/qat/qat_common/qat_uclo.c
drivers/crypto/rockchip/rk3288_crypto.c
drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
drivers/crypto/sunxi-ss/sun4i-ss-core.c
drivers/crypto/sunxi-ss/sun4i-ss-hash.c
drivers/crypto/sunxi-ss/sun4i-ss.h
drivers/crypto/vmx/Kconfig
drivers/crypto/vmx/ghash.c
drivers/pci/quirks.c
include/crypto/algapi.h
include/crypto/engine.h [new file with mode: 0644]
include/crypto/ghash.h [new file with mode: 0644]
include/linux/ccp.h
include/linux/hw_random.h

index fb2a1526f6ecdf15295a9a15ba39cc48472b8566..088b79c341ff1013a0362d3927c3033966ede5ff 100644 (file)
@@ -797,7 +797,8 @@ kernel crypto API            |       Caller
      include/linux/crypto.h and their definition can be seen below.
      The former function registers a single transformation, while
      the latter works on an array of transformation descriptions.
-     The latter is useful when registering transformations in bulk.
+     The latter is useful when registering transformations in bulk,
+     for example when a driver implements multiple transformations.
     </para>
 
     <programlisting>
@@ -822,18 +823,31 @@ kernel crypto API            |       Caller
     </para>
 
     <para>
-     The bulk registration / unregistration functions require
-     that struct crypto_alg is an array of count size. These
-     functions simply loop over that array and register /
-     unregister each individual algorithm. If an error occurs,
-     the loop is terminated at the offending algorithm definition.
-     That means, the algorithms prior to the offending algorithm
-     are successfully registered. Note, the caller has no way of
-     knowing which cipher implementations have successfully
-     registered. If this is important to know, the caller should
-     loop through the different implementations using the single
-     instance *_alg functions for each individual implementation.
+     The bulk registration/unregistration functions
+     register/unregister each transformation in the given array of
+     length count.  They handle errors as follows:
     </para>
+    <itemizedlist>
+     <listitem>
+      <para>
+       crypto_register_algs() succeeds if and only if it
+       successfully registers all the given transformations. If an
+       error occurs partway through, then it rolls back successful
+       registrations before returning the error code. Note that if
+       a driver needs to handle registration errors for individual
+       transformations, then it will need to use the non-bulk
+       function crypto_register_alg() instead.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       crypto_unregister_algs() tries to unregister all the given
+       transformations, continuing on error. It logs errors and
+       always returns zero.
+      </para>
+     </listitem>
+    </itemizedlist>
+
    </sect1>
 
    <sect1><title>Single-Block Symmetric Ciphers [CIPHER]</title>
index 1568cb5cd870507dfb5c9799b7fdf090083b9b03..7546b3c024665ed0e1bf0ce3b94f99cff306189f 100644 (file)
@@ -138,7 +138,7 @@ static struct shash_alg ghash_alg = {
        .setkey                 = ghash_setkey,
        .descsize               = sizeof(struct ghash_desc_ctx),
        .base                   = {
-               .cra_name       = "ghash",
+               .cra_name       = "__ghash",
                .cra_driver_name = "__driver-ghash-ce",
                .cra_priority   = 0,
                .cra_flags      = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
@@ -220,6 +220,27 @@ static int ghash_async_digest(struct ahash_request *req)
        }
 }
 
+static int ghash_async_import(struct ahash_request *req, const void *in)
+{
+       struct ahash_request *cryptd_req = ahash_request_ctx(req);
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+
+       desc->tfm = cryptd_ahash_child(ctx->cryptd_tfm);
+       desc->flags = req->base.flags;
+
+       return crypto_shash_import(desc, in);
+}
+
+static int ghash_async_export(struct ahash_request *req, void *out)
+{
+       struct ahash_request *cryptd_req = ahash_request_ctx(req);
+       struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+
+       return crypto_shash_export(desc, out);
+}
+
 static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
                              unsigned int keylen)
 {
@@ -268,7 +289,10 @@ static struct ahash_alg ghash_async_alg = {
        .final                  = ghash_async_final,
        .setkey                 = ghash_async_setkey,
        .digest                 = ghash_async_digest,
+       .import                 = ghash_async_import,
+       .export                 = ghash_async_export,
        .halg.digestsize        = GHASH_DIGEST_SIZE,
+       .halg.statesize         = sizeof(struct ghash_desc_ctx),
        .halg.base              = {
                .cra_name       = "ghash",
                .cra_driver_name = "ghash-ce",
index dcd01f3f0bb00e987fdc18ab78449679dfd9f724..2468fade49cf3f3d2b14dfa875b06113c8640c99 100644 (file)
@@ -12,7 +12,6 @@
 #include <asm/assembler.h>
 
 .syntax unified
-.code   32
 .fpu neon
 
 .text
index 125e16520061289aff815417fd7aa18ec41e3df7..82ddc9bdfeb11a18a83469c5c35f1ea02b36cb5c 100644 (file)
@@ -7,6 +7,15 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 
+#ifdef __BIG_ENDIAN__
+#define LWZ(rt, d, ra) \
+       lwz     rt,d(ra)
+#else
+#define LWZ(rt, d, ra) \
+       li      rt,d;   \
+       lwbrx   rt,rt,ra
+#endif
+
 /*
  * We roll the registers for T, A, B, C, D, E around on each
  * iteration; T on iteration t is A on iteration t+1, and so on.
@@ -23,7 +32,7 @@
 #define W(t)   (((t)%16)+16)
 
 #define LOADW(t)                               \
-       lwz     W(t),(t)*4(r4)
+       LWZ(W(t),(t)*4,r4)
 
 #define STEPD0_LOAD(t)                         \
        andc    r0,RD(t),RB(t);         \
@@ -33,7 +42,7 @@
        add     r0,RE(t),r15;                   \
        add     RT(t),RT(t),r6;         \
        add     r14,r0,W(t);                    \
-       lwz     W((t)+4),((t)+4)*4(r4); \
+       LWZ(W((t)+4),((t)+4)*4,r4);     \
        rotlwi  RB(t),RB(t),30;                 \
        add     RT(t),RT(t),r14
 
index 68a5ceaa04c81072f453a7b2ca2605bed39bd5a1..2d8466f9e49b8632527ed1e2f35617ff02f5fac1 100644 (file)
@@ -39,6 +39,37 @@ struct algif_hash_tfm {
        bool has_key;
 };
 
+static int hash_alloc_result(struct sock *sk, struct hash_ctx *ctx)
+{
+       unsigned ds;
+
+       if (ctx->result)
+               return 0;
+
+       ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
+
+       ctx->result = sock_kmalloc(sk, ds, GFP_KERNEL);
+       if (!ctx->result)
+               return -ENOMEM;
+
+       memset(ctx->result, 0, ds);
+
+       return 0;
+}
+
+static void hash_free_result(struct sock *sk, struct hash_ctx *ctx)
+{
+       unsigned ds;
+
+       if (!ctx->result)
+               return;
+
+       ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
+
+       sock_kzfree_s(sk, ctx->result, ds);
+       ctx->result = NULL;
+}
+
 static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
                        size_t ignored)
 {
@@ -54,6 +85,9 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 
        lock_sock(sk);
        if (!ctx->more) {
+               if ((msg->msg_flags & MSG_MORE))
+                       hash_free_result(sk, ctx);
+
                err = af_alg_wait_for_completion(crypto_ahash_init(&ctx->req),
                                                &ctx->completion);
                if (err)
@@ -90,6 +124,10 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 
        ctx->more = msg->msg_flags & MSG_MORE;
        if (!ctx->more) {
+               err = hash_alloc_result(sk, ctx);
+               if (err)
+                       goto unlock;
+
                ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
                err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
                                                 &ctx->completion);
@@ -116,6 +154,13 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page,
        sg_init_table(ctx->sgl.sg, 1);
        sg_set_page(ctx->sgl.sg, page, size, offset);
 
+       if (!(flags & MSG_MORE)) {
+               err = hash_alloc_result(sk, ctx);
+               if (err)
+                       goto unlock;
+       } else if (!ctx->more)
+               hash_free_result(sk, ctx);
+
        ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, ctx->result, size);
 
        if (!(flags & MSG_MORE)) {
@@ -153,6 +198,7 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
        struct alg_sock *ask = alg_sk(sk);
        struct hash_ctx *ctx = ask->private;
        unsigned ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
+       bool result;
        int err;
 
        if (len > ds)
@@ -161,17 +207,29 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                msg->msg_flags |= MSG_TRUNC;
 
        lock_sock(sk);
+       result = ctx->result;
+       err = hash_alloc_result(sk, ctx);
+       if (err)
+               goto unlock;
+
+       ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
+
        if (ctx->more) {
                ctx->more = 0;
-               ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
                err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
                                                 &ctx->completion);
                if (err)
                        goto unlock;
+       } else if (!result) {
+               err = af_alg_wait_for_completion(
+                               crypto_ahash_digest(&ctx->req),
+                               &ctx->completion);
        }
 
        err = memcpy_to_msg(msg, ctx->result, len);
 
+       hash_free_result(sk, ctx);
+
 unlock:
        release_sock(sk);
 
@@ -394,8 +452,7 @@ static void hash_sock_destruct(struct sock *sk)
        struct alg_sock *ask = alg_sk(sk);
        struct hash_ctx *ctx = ask->private;
 
-       sock_kzfree_s(sk, ctx->result,
-                     crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req)));
+       hash_free_result(sk, ctx);
        sock_kfree_s(sk, ctx, ctx->len);
        af_alg_release_parent(sk);
 }
@@ -407,20 +464,12 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk)
        struct algif_hash_tfm *tfm = private;
        struct crypto_ahash *hash = tfm->hash;
        unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash);
-       unsigned ds = crypto_ahash_digestsize(hash);
 
        ctx = sock_kmalloc(sk, len, GFP_KERNEL);
        if (!ctx)
                return -ENOMEM;
 
-       ctx->result = sock_kmalloc(sk, ds, GFP_KERNEL);
-       if (!ctx->result) {
-               sock_kfree_s(sk, ctx, len);
-               return -ENOMEM;
-       }
-
-       memset(ctx->result, 0, ds);
-
+       ctx->result = NULL;
        ctx->len = len;
        ctx->more = 0;
        af_alg_init_completion(&ctx->completion);
index c1229614c7e324e5ee9341d6f2be530afd437487..8e94e29dc6fc89e71f9d2e724814c74e3277683b 100644 (file)
@@ -107,10 +107,7 @@ static struct shash_alg alg = {
 
 static int __init crct10dif_mod_init(void)
 {
-       int ret;
-
-       ret = crypto_register_shash(&alg);
-       return ret;
+       return crypto_register_shash(&alg);
 }
 
 static void __exit crct10dif_mod_fini(void)
index a55c82dd48efe514b6073173e9ef5538964933f5..bfb92ace2c91a9a3786ab3c2cedb00224d2b62f0 100644 (file)
 
 #include <linux/err.h>
 #include <linux/delay.h>
+#include <crypto/engine.h>
+#include <crypto/internal/hash.h>
 #include "internal.h"
 
 #define CRYPTO_ENGINE_MAX_QLEN 10
 
-void crypto_finalize_request(struct crypto_engine *engine,
-                            struct ablkcipher_request *req, int err);
-
 /**
  * crypto_pump_requests - dequeue one request from engine queue to process
  * @engine: the hardware engine
@@ -34,10 +33,11 @@ static void crypto_pump_requests(struct crypto_engine *engine,
                                 bool in_kthread)
 {
        struct crypto_async_request *async_req, *backlog;
-       struct ablkcipher_request *req;
+       struct ahash_request *hreq;
+       struct ablkcipher_request *breq;
        unsigned long flags;
        bool was_busy = false;
-       int ret;
+       int ret, rtype;
 
        spin_lock_irqsave(&engine->queue_lock, flags);
 
@@ -82,9 +82,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
        if (!async_req)
                goto out;
 
-       req = ablkcipher_request_cast(async_req);
-
-       engine->cur_req = req;
+       engine->cur_req = async_req;
        if (backlog)
                backlog->complete(backlog, -EINPROGRESS);
 
@@ -95,6 +93,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
        spin_unlock_irqrestore(&engine->queue_lock, flags);
 
+       rtype = crypto_tfm_alg_type(engine->cur_req->tfm);
        /* Until here we get the request need to be encrypted successfully */
        if (!was_busy && engine->prepare_crypt_hardware) {
                ret = engine->prepare_crypt_hardware(engine);
@@ -104,24 +103,55 @@ static void crypto_pump_requests(struct crypto_engine *engine,
                }
        }
 
-       if (engine->prepare_request) {
-               ret = engine->prepare_request(engine, engine->cur_req);
+       switch (rtype) {
+       case CRYPTO_ALG_TYPE_AHASH:
+               hreq = ahash_request_cast(engine->cur_req);
+               if (engine->prepare_hash_request) {
+                       ret = engine->prepare_hash_request(engine, hreq);
+                       if (ret) {
+                               pr_err("failed to prepare request: %d\n", ret);
+                               goto req_err;
+                       }
+                       engine->cur_req_prepared = true;
+               }
+               ret = engine->hash_one_request(engine, hreq);
                if (ret) {
-                       pr_err("failed to prepare request: %d\n", ret);
+                       pr_err("failed to hash one request from queue\n");
                        goto req_err;
                }
-               engine->cur_req_prepared = true;
-       }
-
-       ret = engine->crypt_one_request(engine, engine->cur_req);
-       if (ret) {
-               pr_err("failed to crypt one request from queue\n");
-               goto req_err;
+               return;
+       case CRYPTO_ALG_TYPE_ABLKCIPHER:
+               breq = ablkcipher_request_cast(engine->cur_req);
+               if (engine->prepare_cipher_request) {
+                       ret = engine->prepare_cipher_request(engine, breq);
+                       if (ret) {
+                               pr_err("failed to prepare request: %d\n", ret);
+                               goto req_err;
+                       }
+                       engine->cur_req_prepared = true;
+               }
+               ret = engine->cipher_one_request(engine, breq);
+               if (ret) {
+                       pr_err("failed to cipher one request from queue\n");
+                       goto req_err;
+               }
+               return;
+       default:
+               pr_err("failed to prepare request of unknown type\n");
+               return;
        }
-       return;
 
 req_err:
-       crypto_finalize_request(engine, engine->cur_req, ret);
+       switch (rtype) {
+       case CRYPTO_ALG_TYPE_AHASH:
+               hreq = ahash_request_cast(engine->cur_req);
+               crypto_finalize_hash_request(engine, hreq, ret);
+               break;
+       case CRYPTO_ALG_TYPE_ABLKCIPHER:
+               breq = ablkcipher_request_cast(engine->cur_req);
+               crypto_finalize_cipher_request(engine, breq, ret);
+               break;
+       }
        return;
 
 out:
@@ -137,12 +167,14 @@ static void crypto_pump_work(struct kthread_work *work)
 }
 
 /**
- * crypto_transfer_request - transfer the new request into the engine queue
+ * crypto_transfer_cipher_request - transfer the new request into the
+ * enginequeue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  */
-int crypto_transfer_request(struct crypto_engine *engine,
-                           struct ablkcipher_request *req, bool need_pump)
+int crypto_transfer_cipher_request(struct crypto_engine *engine,
+                                  struct ablkcipher_request *req,
+                                  bool need_pump)
 {
        unsigned long flags;
        int ret;
@@ -162,46 +194,125 @@ int crypto_transfer_request(struct crypto_engine *engine,
        spin_unlock_irqrestore(&engine->queue_lock, flags);
        return ret;
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_request);
+EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request);
+
+/**
+ * crypto_transfer_cipher_request_to_engine - transfer one request to list
+ * into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
+                                            struct ablkcipher_request *req)
+{
+       return crypto_transfer_cipher_request(engine, req, true);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request_to_engine);
+
+/**
+ * crypto_transfer_hash_request - transfer the new request into the
+ * enginequeue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_hash_request(struct crypto_engine *engine,
+                                struct ahash_request *req, bool need_pump)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&engine->queue_lock, flags);
+
+       if (!engine->running) {
+               spin_unlock_irqrestore(&engine->queue_lock, flags);
+               return -ESHUTDOWN;
+       }
+
+       ret = ahash_enqueue_request(&engine->queue, req);
+
+       if (!engine->busy && need_pump)
+               queue_kthread_work(&engine->kworker, &engine->pump_requests);
+
+       spin_unlock_irqrestore(&engine->queue_lock, flags);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_hash_request);
 
 /**
- * crypto_transfer_request_to_engine - transfer one request to list into the
- * engine queue
+ * crypto_transfer_hash_request_to_engine - transfer one request to list
+ * into the engine queue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  */
-int crypto_transfer_request_to_engine(struct crypto_engine *engine,
-                                     struct ablkcipher_request *req)
+int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
+                                          struct ahash_request *req)
 {
-       return crypto_transfer_request(engine, req, true);
+       return crypto_transfer_hash_request(engine, req, true);
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_request_to_engine);
+EXPORT_SYMBOL_GPL(crypto_transfer_hash_request_to_engine);
 
 /**
- * crypto_finalize_request - finalize one request if the request is done
+ * crypto_finalize_cipher_request - finalize one request if the request is done
  * @engine: the hardware engine
  * @req: the request need to be finalized
  * @err: error number
  */
-void crypto_finalize_request(struct crypto_engine *engine,
-                            struct ablkcipher_request *req, int err)
+void crypto_finalize_cipher_request(struct crypto_engine *engine,
+                                   struct ablkcipher_request *req, int err)
 {
        unsigned long flags;
        bool finalize_cur_req = false;
        int ret;
 
        spin_lock_irqsave(&engine->queue_lock, flags);
-       if (engine->cur_req == req)
+       if (engine->cur_req == &req->base)
                finalize_cur_req = true;
        spin_unlock_irqrestore(&engine->queue_lock, flags);
 
        if (finalize_cur_req) {
-               if (engine->cur_req_prepared && engine->unprepare_request) {
-                       ret = engine->unprepare_request(engine, req);
+               if (engine->cur_req_prepared &&
+                   engine->unprepare_cipher_request) {
+                       ret = engine->unprepare_cipher_request(engine, req);
                        if (ret)
                                pr_err("failed to unprepare request\n");
                }
+               spin_lock_irqsave(&engine->queue_lock, flags);
+               engine->cur_req = NULL;
+               engine->cur_req_prepared = false;
+               spin_unlock_irqrestore(&engine->queue_lock, flags);
+       }
+
+       req->base.complete(&req->base, err);
 
+       queue_kthread_work(&engine->kworker, &engine->pump_requests);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
+
+/**
+ * crypto_finalize_hash_request - finalize one request if the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_hash_request(struct crypto_engine *engine,
+                                 struct ahash_request *req, int err)
+{
+       unsigned long flags;
+       bool finalize_cur_req = false;
+       int ret;
+
+       spin_lock_irqsave(&engine->queue_lock, flags);
+       if (engine->cur_req == &req->base)
+               finalize_cur_req = true;
+       spin_unlock_irqrestore(&engine->queue_lock, flags);
+
+       if (finalize_cur_req) {
+               if (engine->cur_req_prepared &&
+                   engine->unprepare_hash_request) {
+                       ret = engine->unprepare_hash_request(engine, req);
+                       if (ret)
+                               pr_err("failed to unprepare request\n");
+               }
                spin_lock_irqsave(&engine->queue_lock, flags);
                engine->cur_req = NULL;
                engine->cur_req_prepared = false;
@@ -212,7 +323,7 @@ void crypto_finalize_request(struct crypto_engine *engine,
 
        queue_kthread_work(&engine->kworker, &engine->pump_requests);
 }
-EXPORT_SYMBOL_GPL(crypto_finalize_request);
+EXPORT_SYMBOL_GPL(crypto_finalize_hash_request);
 
 /**
  * crypto_engine_start - start the hardware engine
@@ -249,7 +360,7 @@ EXPORT_SYMBOL_GPL(crypto_engine_start);
 int crypto_engine_stop(struct crypto_engine *engine)
 {
        unsigned long flags;
-       unsigned limit = 500;
+       unsigned int limit = 500;
        int ret = 0;
 
        spin_lock_irqsave(&engine->queue_lock, flags);
index f752da3a7c757672170efea3f1f0a20296e36cd7..fb33f7d3b052f5bb016342c1da2ef61b9f74ad8e 100644 (file)
@@ -1178,12 +1178,16 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
                goto err;
 
        drbg->Vbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL);
-       if (!drbg->Vbuf)
+       if (!drbg->Vbuf) {
+               ret = -ENOMEM;
                goto fini;
+       }
        drbg->V = PTR_ALIGN(drbg->Vbuf, ret + 1);
        drbg->Cbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL);
-       if (!drbg->Cbuf)
+       if (!drbg->Cbuf) {
+               ret = -ENOMEM;
                goto fini;
+       }
        drbg->C = PTR_ALIGN(drbg->Cbuf, ret + 1);
        /* scratchpad is only generated for CTR and Hash */
        if (drbg->core->flags & DRBG_HMAC)
@@ -1199,8 +1203,10 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
 
        if (0 < sb_size) {
                drbg->scratchpadbuf = kzalloc(sb_size + ret, GFP_KERNEL);
-               if (!drbg->scratchpadbuf)
+               if (!drbg->scratchpadbuf) {
+                       ret = -ENOMEM;
                        goto fini;
+               }
                drbg->scratchpad = PTR_ALIGN(drbg->scratchpadbuf, ret + 1);
        }
 
@@ -1917,6 +1923,8 @@ static inline int __init drbg_healthcheck_sanity(void)
                return -ENOMEM;
 
        mutex_init(&drbg->drbg_mutex);
+       drbg->core = &drbg_cores[coreref];
+       drbg->reseed_threshold = drbg_max_requests(drbg);
 
        /*
         * if the following tests fail, it is likely that there is a buffer
@@ -1926,12 +1934,6 @@ static inline int __init drbg_healthcheck_sanity(void)
         * grave bug.
         */
 
-       /* get a valid instance of DRBG for following tests */
-       ret = drbg_instantiate(drbg, NULL, coreref, pr);
-       if (ret) {
-               rc = ret;
-               goto outbuf;
-       }
        max_addtllen = drbg_max_addtl(drbg);
        max_request_bytes = drbg_max_request_bytes(drbg);
        drbg_string_fill(&addtl, buf, max_addtllen + 1);
@@ -1941,10 +1943,9 @@ static inline int __init drbg_healthcheck_sanity(void)
        /* overflow max_bits */
        len = drbg_generate(drbg, buf, (max_request_bytes + 1), NULL);
        BUG_ON(0 < len);
-       drbg_uninstantiate(drbg);
 
        /* overflow max addtllen with personalization string */
-       ret = drbg_instantiate(drbg, &addtl, coreref, pr);
+       ret = drbg_seed(drbg, &addtl, false);
        BUG_ON(0 == ret);
        /* all tests passed */
        rc = 0;
@@ -1952,9 +1953,7 @@ static inline int __init drbg_healthcheck_sanity(void)
        pr_devel("DRBG: Sanity tests for failure code paths successfully "
                 "completed\n");
 
-       drbg_uninstantiate(drbg);
-outbuf:
-       kzfree(drbg);
+       kfree(drbg);
        return rc;
 }
 
@@ -2006,7 +2005,7 @@ static int __init drbg_init(void)
 {
        unsigned int i = 0; /* pointer to drbg_algs */
        unsigned int j = 0; /* pointer to drbg_cores */
-       int ret = -EFAULT;
+       int ret;
 
        ret = drbg_healthcheck_sanity();
        if (ret)
@@ -2016,7 +2015,7 @@ static int __init drbg_init(void)
                pr_info("DRBG: Cannot register all DRBG types"
                        "(slots needed: %zu, slots available: %zu)\n",
                        ARRAY_SIZE(drbg_cores) * 2, ARRAY_SIZE(drbg_algs));
-               return ret;
+               return -EFAULT;
        }
 
        /*
index 70a892e87ccb527d774c8aba72463813faca8b22..f624ac98c94e4d9b1910178feb279a610c603e28 100644 (file)
@@ -117,7 +117,7 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key,
        struct crypto_skcipher *ctr = ctx->ctr;
        struct {
                be128 hash;
-               u8 iv[8];
+               u8 iv[16];
 
                struct crypto_gcm_setkey_result result;
 
index bac70995e0640a49fbc56797c4f7b605791ff98b..12ad3e3a84e3d7d570e75fad4c521718a3bf9790 100644 (file)
 
 #include <crypto/algapi.h>
 #include <crypto/gf128mul.h>
+#include <crypto/ghash.h>
 #include <crypto/internal/hash.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-#define GHASH_BLOCK_SIZE       16
-#define GHASH_DIGEST_SIZE      16
-
-struct ghash_ctx {
-       struct gf128mul_4k *gf128;
-};
-
-struct ghash_desc_ctx {
-       u8 buffer[GHASH_BLOCK_SIZE];
-       u32 bytes;
-};
-
 static int ghash_init(struct shash_desc *desc)
 {
        struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
index 86fb59b109a996193eefde142abb89bc29e87258..94ee44acd4656811c081549b849c295ef160bd12 100644 (file)
@@ -612,12 +612,7 @@ EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
 
 int ahash_mcryptd_digest(struct ahash_request *desc)
 {
-       int err;
-
-       err = crypto_ahash_init(desc) ?:
-             ahash_mcryptd_finup(desc);
-
-       return err;
+       return crypto_ahash_init(desc) ?: ahash_mcryptd_finup(desc);
 }
 
 int ahash_mcryptd_update(struct ahash_request *desc)
index 4df6451e7543670f7836c2507e77d7329d65b10f..0b66dc8246068aa084dd0b44210b04dee5f2bccb 100644 (file)
@@ -35,8 +35,8 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
                        n_sz--;
                }
 
-               /* In FIPS mode only allow key size 2K & 3K */
-               if (n_sz != 256 && n_sz != 384) {
+               /* In FIPS mode only allow key size 2K and higher */
+               if (n_sz < 256) {
                        pr_err("RSA: key size not allowed in FIPS mode\n");
                        return -EINVAL;
                }
index 5c9d5a5e7b65182ed1f78b5c0332c5c9b2a328c1..62dffa0028acdb54f286d2ca1aa89ca9b422945e 100644 (file)
@@ -209,16 +209,19 @@ static int ahash_partial_update(struct ahash_request **preq,
        char *state;
        struct ahash_request *req;
        int statesize, ret = -EINVAL;
+       const char guard[] = { 0x00, 0xba, 0xad, 0x00 };
 
        req = *preq;
        statesize = crypto_ahash_statesize(
                        crypto_ahash_reqtfm(req));
-       state = kmalloc(statesize, GFP_KERNEL);
+       state = kmalloc(statesize + sizeof(guard), GFP_KERNEL);
        if (!state) {
                pr_err("alt: hash: Failed to alloc state for %s\n", algo);
                goto out_nostate;
        }
+       memcpy(state + statesize, guard, sizeof(guard));
        ret = crypto_ahash_export(req, state);
+       WARN_ON(memcmp(state + statesize, guard, sizeof(guard)));
        if (ret) {
                pr_err("alt: hash: Failed to export() for %s\n", algo);
                goto out;
@@ -665,7 +668,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
                memcpy(key, template[i].key, template[i].klen);
 
                ret = crypto_aead_setkey(tfm, key, template[i].klen);
-               if (!ret == template[i].fail) {
+               if (template[i].fail == !ret) {
                        pr_err("alg: aead%s: setkey failed on test %d for %s: flags=%x\n",
                               d, j, algo, crypto_aead_get_flags(tfm));
                        goto out;
@@ -770,7 +773,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
                memcpy(key, template[i].key, template[i].klen);
 
                ret = crypto_aead_setkey(tfm, key, template[i].klen);
-               if (!ret == template[i].fail) {
+               if (template[i].fail == !ret) {
                        pr_err("alg: aead%s: setkey failed on chunk test %d for %s: flags=%x\n",
                               d, j, algo, crypto_aead_get_flags(tfm));
                        goto out;
@@ -1008,6 +1011,9 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
                if (template[i].np)
                        continue;
 
+               if (fips_enabled && template[i].fips_skip)
+                       continue;
+
                j++;
 
                ret = -EINVAL;
@@ -1023,7 +1029,7 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
 
                ret = crypto_cipher_setkey(tfm, template[i].key,
                                           template[i].klen);
-               if (!ret == template[i].fail) {
+               if (template[i].fail == !ret) {
                        printk(KERN_ERR "alg: cipher: setkey failed "
                               "on test %d for %s: flags=%x\n", j,
                               algo, crypto_cipher_get_flags(tfm));
@@ -1112,6 +1118,9 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
                if (template[i].np && !template[i].also_non_np)
                        continue;
 
+               if (fips_enabled && template[i].fips_skip)
+                       continue;
+
                if (template[i].iv)
                        memcpy(iv, template[i].iv, ivsize);
                else
@@ -1133,7 +1142,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
 
                ret = crypto_skcipher_setkey(tfm, template[i].key,
                                             template[i].klen);
-               if (!ret == template[i].fail) {
+               if (template[i].fail == !ret) {
                        pr_err("alg: skcipher%s: setkey failed on test %d for %s: flags=%x\n",
                               d, j, algo, crypto_skcipher_get_flags(tfm));
                        goto out;
@@ -1198,6 +1207,9 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
                if (!template[i].np)
                        continue;
 
+               if (fips_enabled && template[i].fips_skip)
+                       continue;
+
                if (template[i].iv)
                        memcpy(iv, template[i].iv, ivsize);
                else
@@ -1211,7 +1223,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
 
                ret = crypto_skcipher_setkey(tfm, template[i].key,
                                             template[i].klen);
-               if (!ret == template[i].fail) {
+               if (template[i].fail == !ret) {
                        pr_err("alg: skcipher%s: setkey failed on chunk test %d for %s: flags=%x\n",
                               d, j, algo, crypto_skcipher_get_flags(tfm));
                        goto out;
index acb6bbff781a2d3be030d69109912efe5e6b865f..e64a4ef9d8ca28fa1415e3e97a5d404478b89619 100644 (file)
@@ -59,6 +59,7 @@ struct hash_testvec {
  * @tap:       How to distribute data in @np SGs
  * @also_non_np:       if set to 1, the test will be also done without
  *                     splitting data in @np SGs
+ * @fips_skip: Skip the test vector in FIPS mode
  */
 
 struct cipher_testvec {
@@ -75,6 +76,7 @@ struct cipher_testvec {
        unsigned char klen;
        unsigned short ilen;
        unsigned short rlen;
+       bool fips_skip;
 };
 
 struct aead_testvec {
@@ -18224,6 +18226,7 @@ static struct cipher_testvec aes_xts_enc_tv_template[] = {
                          "\x00\x00\x00\x00\x00\x00\x00\x00"
                          "\x00\x00\x00\x00\x00\x00\x00\x00",
                .klen   = 32,
+               .fips_skip = 1,
                .iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
                          "\x00\x00\x00\x00\x00\x00\x00\x00",
                .input  = "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -18566,6 +18569,7 @@ static struct cipher_testvec aes_xts_dec_tv_template[] = {
                          "\x00\x00\x00\x00\x00\x00\x00\x00"
                          "\x00\x00\x00\x00\x00\x00\x00\x00",
                .klen   = 32,
+               .fips_skip = 1,
                .iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
                          "\x00\x00\x00\x00\x00\x00\x00\x00",
                .input = "\x91\x7c\xf6\x9e\xbd\x68\xb2\xec"
index 35d6b3adf2308fbb4ab2bd7d3358b8328bff8936..263af9fb45ea281c94c41d5698d4d264d489bba9 100644 (file)
 #include <linux/preempt.h>
 #include <asm/xor.h>
 
+#ifndef XOR_SELECT_TEMPLATE
+#define XOR_SELECT_TEMPLATE(x) (x)
+#endif
+
 /* The xor routines to use.  */
 static struct xor_block_template *active_template;
 
@@ -109,6 +113,15 @@ calibrate_xor_blocks(void)
        void *b1, *b2;
        struct xor_block_template *f, *fastest;
 
+       fastest = XOR_SELECT_TEMPLATE(NULL);
+
+       if (fastest) {
+               printk(KERN_INFO "xor: automatically using best "
+                                "checksumming function   %-10s\n",
+                      fastest->name);
+               goto out;
+       }
+
        /*
         * Note: Since the memory is not actually used for _anything_ but to
         * test the XOR speed, we don't really want kmemcheck to warn about
@@ -126,36 +139,22 @@ calibrate_xor_blocks(void)
         * all the possible functions, just test the best one
         */
 
-       fastest = NULL;
-
-#ifdef XOR_SELECT_TEMPLATE
-               fastest = XOR_SELECT_TEMPLATE(fastest);
-#endif
-
 #define xor_speed(templ)       do_xor_speed((templ), b1, b2)
 
-       if (fastest) {
-               printk(KERN_INFO "xor: automatically using best "
-                                "checksumming function:\n");
-               xor_speed(fastest);
-               goto out;
-       } else {
-               printk(KERN_INFO "xor: measuring software checksum speed\n");
-               XOR_TRY_TEMPLATES;
-               fastest = template_list;
-               for (f = fastest; f; f = f->next)
-                       if (f->speed > fastest->speed)
-                               fastest = f;
-       }
+       printk(KERN_INFO "xor: measuring software checksum speed\n");
+       XOR_TRY_TEMPLATES;
+       fastest = template_list;
+       for (f = fastest; f; f = f->next)
+               if (f->speed > fastest->speed)
+                       fastest = f;
 
        printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
               fastest->name, fastest->speed / 1000, fastest->speed % 1000);
 
 #undef xor_speed
 
- out:
        free_pages((unsigned long)b1, 2);
-
+out:
        active_template = fastest;
        return 0;
 }
index 26ba5833b9944039ad1b172534e5afb09a7902eb..305343f22a02a839d2710b785a4679b842ffc7f7 100644 (file)
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 2007 Rik Snel <rsnel@cube.dyndns.org>
  *
- * Based om ecb.c
+ * Based on ecb.c
  * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
  *
  * This program is free software; you can redistribute it and/or modify it
index 8c0770bf8881351c1d956d310c5d670684090770..200dab5136a7f99576fffcb440edc7eedfa71e78 100644 (file)
@@ -410,6 +410,19 @@ config HW_RANDOM_MESON
 
          If unsure, say Y.
 
+config HW_RANDOM_CAVIUM
+       tristate "Cavium ThunderX Random Number Generator support"
+       depends on HW_RANDOM && PCI && (ARM64 || (COMPILE_TEST && 64BIT))
+       default HW_RANDOM
+       ---help---
+         This driver provides kernel-side support for the Random Number
+         Generator hardware found on Cavium SoCs.
+
+         To compile this driver as a module, choose M here: the
+         module will be called cavium_rng.
+
+         If unsure, say Y.
+
 endif # HW_RANDOM
 
 config UML_RANDOM
index 04bb0b03356ff75a9b8792d6e6a96cbf788e5b68..5f52b1e4e7bed93a42ef0357b14fc7d130c1a7c7 100644 (file)
@@ -35,3 +35,4 @@ obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o
 obj-$(CONFIG_HW_RANDOM_STM32) += stm32-rng.o
 obj-$(CONFIG_HW_RANDOM_PIC32) += pic32-rng.o
 obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o
+obj-$(CONFIG_HW_RANDOM_CAVIUM) += cavium-rng.o cavium-rng-vf.o
index 48f6a83cdd61b5379f2a6ff14a6faf3dc52b04ad..4a99ac756f0815a890665f1433b1038a98069be9 100644 (file)
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/hw_random.h>
-#include <linux/delay.h>
-#include <asm/io.h>
 
+#define DRV_NAME "AMD768-HWRNG"
 
-#define PFX    KBUILD_MODNAME ": "
-
+#define RNGDATA                0x00
+#define RNGDONE                0x04
+#define PMBASE_OFFSET  0xF0
+#define PMBASE_SIZE    8
 
 /*
  * Data for PCI driver interface
@@ -50,72 +52,84 @@ static const struct pci_device_id pci_tbl[] = {
 };
 MODULE_DEVICE_TABLE(pci, pci_tbl);
 
-static struct pci_dev *amd_pdev;
-
+struct amd768_priv {
+       void __iomem *iobase;
+       struct pci_dev *pcidev;
+};
 
-static int amd_rng_data_present(struct hwrng *rng, int wait)
+static int amd_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
 {
-       u32 pmbase = (u32)rng->priv;
-       int data, i;
-
-       for (i = 0; i < 20; i++) {
-               data = !!(inl(pmbase + 0xF4) & 1);
-               if (data || !wait)
-                       break;
-               udelay(10);
+       u32 *data = buf;
+       struct amd768_priv *priv = (struct amd768_priv *)rng->priv;
+       size_t read = 0;
+       /* We will wait at maximum one time per read */
+       int timeout = max / 4 + 1;
+
+       /*
+        * RNG data is available when RNGDONE is set to 1
+        * New random numbers are generated approximately 128 microseconds
+        * after RNGDATA is read
+        */
+       while (read < max) {
+               if (ioread32(priv->iobase + RNGDONE) == 0) {
+                       if (wait) {
+                               /* Delay given by datasheet */
+                               usleep_range(128, 196);
+                               if (timeout-- == 0)
+                                       return read;
+                       } else {
+                               return 0;
+                       }
+               } else {
+                       *data = ioread32(priv->iobase + RNGDATA);
+                       data++;
+                       read += 4;
+               }
        }
-       return data;
-}
 
-static int amd_rng_data_read(struct hwrng *rng, u32 *data)
-{
-       u32 pmbase = (u32)rng->priv;
-
-       *data = inl(pmbase + 0xF0);
-
-       return 4;
+       return read;
 }
 
 static int amd_rng_init(struct hwrng *rng)
 {
+       struct amd768_priv *priv = (struct amd768_priv *)rng->priv;
        u8 rnen;
 
-       pci_read_config_byte(amd_pdev, 0x40, &rnen);
-       rnen |= (1 << 7);       /* RNG on */
-       pci_write_config_byte(amd_pdev, 0x40, rnen);
+       pci_read_config_byte(priv->pcidev, 0x40, &rnen);
+       rnen |= BIT(7); /* RNG on */
+       pci_write_config_byte(priv->pcidev, 0x40, rnen);
 
-       pci_read_config_byte(amd_pdev, 0x41, &rnen);
-       rnen |= (1 << 7);       /* PMIO enable */
-       pci_write_config_byte(amd_pdev, 0x41, rnen);
+       pci_read_config_byte(priv->pcidev, 0x41, &rnen);
+       rnen |= BIT(7); /* PMIO enable */
+       pci_write_config_byte(priv->pcidev, 0x41, rnen);
 
        return 0;
 }
 
 static void amd_rng_cleanup(struct hwrng *rng)
 {
+       struct amd768_priv *priv = (struct amd768_priv *)rng->priv;
        u8 rnen;
 
-       pci_read_config_byte(amd_pdev, 0x40, &rnen);
-       rnen &= ~(1 << 7);      /* RNG off */
-       pci_write_config_byte(amd_pdev, 0x40, rnen);
+       pci_read_config_byte(priv->pcidev, 0x40, &rnen);
+       rnen &= ~BIT(7);        /* RNG off */
+       pci_write_config_byte(priv->pcidev, 0x40, rnen);
 }
 
-
 static struct hwrng amd_rng = {
        .name           = "amd",
        .init           = amd_rng_init,
        .cleanup        = amd_rng_cleanup,
-       .data_present   = amd_rng_data_present,
-       .data_read      = amd_rng_data_read,
+       .read           = amd_rng_read,
 };
 
-
 static int __init mod_init(void)
 {
        int err = -ENODEV;
        struct pci_dev *pdev = NULL;
        const struct pci_device_id *ent;
        u32 pmbase;
+       struct amd768_priv *priv;
 
        for_each_pci_dev(pdev) {
                ent = pci_match_id(pci_tbl, pdev);
@@ -123,42 +137,44 @@ static int __init mod_init(void)
                        goto found;
        }
        /* Device not found. */
-       goto out;
+       return -ENODEV;
 
 found:
        err = pci_read_config_dword(pdev, 0x58, &pmbase);
        if (err)
-               goto out;
-       err = -EIO;
+               return err;
+
        pmbase &= 0x0000FF00;
        if (pmbase == 0)
-               goto out;
-       if (!request_region(pmbase + 0xF0, 8, "AMD HWRNG")) {
-               dev_err(&pdev->dev, "AMD HWRNG region 0x%x already in use!\n",
+               return -EIO;
+
+       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       if (!devm_request_region(&pdev->dev, pmbase + PMBASE_OFFSET,
+                               PMBASE_SIZE, DRV_NAME)) {
+               dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n",
                        pmbase + 0xF0);
-               err = -EBUSY;
-               goto out;
+               return -EBUSY;
        }
-       amd_rng.priv = (unsigned long)pmbase;
-       amd_pdev = pdev;
-
-       pr_info("AMD768 RNG detected\n");
-       err = hwrng_register(&amd_rng);
-       if (err) {
-               pr_err(PFX "RNG registering failed (%d)\n",
-                      err);
-               release_region(pmbase + 0xF0, 8);
-               goto out;
+
+       priv->iobase = devm_ioport_map(&pdev->dev, pmbase + PMBASE_OFFSET,
+                       PMBASE_SIZE);
+       if (!priv->iobase) {
+               pr_err(DRV_NAME "Cannot map ioport\n");
+               return -ENOMEM;
        }
-out:
-       return err;
+
+       amd_rng.priv = (unsigned long)priv;
+       priv->pcidev = pdev;
+
+       pr_info(DRV_NAME " detected\n");
+       return devm_hwrng_register(&pdev->dev, &amd_rng);
 }
 
 static void __exit mod_exit(void)
 {
-       u32 pmbase = (unsigned long)amd_rng.priv;
-       release_region(pmbase + 0xF0, 8);
-       hwrng_unregister(&amd_rng);
 }
 
 module_init(mod_init);
index af2149273fe049b075d0953034cda0aab1525f23..574211a495491d9d6021dcaefe4274a63ed02055 100644 (file)
@@ -92,9 +92,10 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
        bcm2835_rng_ops.priv = (unsigned long)rng_base;
 
        rng_id = of_match_node(bcm2835_rng_of_match, np);
-       if (!rng_id)
+       if (!rng_id) {
+               iounmap(rng_base);
                return -EINVAL;
-
+       }
        /* Check for rng init function, execute it */
        rng_setup = rng_id->data;
        if (rng_setup)
diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c
new file mode 100644 (file)
index 0000000..066ae0e
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Hardware Random Number Generator support for Cavium, Inc.
+ * Thunder processor family.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ */
+
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+struct cavium_rng {
+       struct hwrng ops;
+       void __iomem *result;
+};
+
+/* Read data from the RNG unit */
+static int cavium_rng_read(struct hwrng *rng, void *dat, size_t max, bool wait)
+{
+       struct cavium_rng *p = container_of(rng, struct cavium_rng, ops);
+       unsigned int size = max;
+
+       while (size >= 8) {
+               *((u64 *)dat) = readq(p->result);
+               size -= 8;
+               dat += 8;
+       }
+       while (size > 0) {
+               *((u8 *)dat) = readb(p->result);
+               size--;
+               dat++;
+       }
+       return max;
+}
+
+/* Map Cavium RNG to an HWRNG object */
+static int cavium_rng_probe_vf(struct  pci_dev         *pdev,
+                        const struct   pci_device_id   *id)
+{
+       struct  cavium_rng *rng;
+       int     ret;
+
+       rng = devm_kzalloc(&pdev->dev, sizeof(*rng), GFP_KERNEL);
+       if (!rng)
+               return -ENOMEM;
+
+       /* Map the RNG result */
+       rng->result = pcim_iomap(pdev, 0, 0);
+       if (!rng->result) {
+               dev_err(&pdev->dev, "Error iomap failed retrieving result.\n");
+               return -ENOMEM;
+       }
+
+       rng->ops.name    = "cavium rng";
+       rng->ops.read    = cavium_rng_read;
+       rng->ops.quality = 1000;
+
+       pci_set_drvdata(pdev, rng);
+
+       ret = hwrng_register(&rng->ops);
+       if (ret) {
+               dev_err(&pdev->dev, "Error registering device as HWRNG.\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+/* Remove the VF */
+void  cavium_rng_remove_vf(struct pci_dev *pdev)
+{
+       struct cavium_rng *rng;
+
+       rng = pci_get_drvdata(pdev);
+       hwrng_unregister(&rng->ops);
+}
+
+static const struct pci_device_id cavium_rng_vf_id_table[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa033), 0, 0, 0},
+       {0,},
+};
+MODULE_DEVICE_TABLE(pci, cavium_rng_vf_id_table);
+
+static struct pci_driver cavium_rng_vf_driver = {
+       .name           = "cavium_rng_vf",
+       .id_table       = cavium_rng_vf_id_table,
+       .probe          = cavium_rng_probe_vf,
+       .remove         = cavium_rng_remove_vf,
+};
+module_pci_driver(cavium_rng_vf_driver);
+
+MODULE_AUTHOR("Omer Khaliq <okhaliq@caviumnetworks.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/cavium-rng.c b/drivers/char/hw_random/cavium-rng.c
new file mode 100644 (file)
index 0000000..a944e0a
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ * Hardware Random Number Generator support for Cavium Inc.
+ * Thunder processor family.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ */
+
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+#define THUNDERX_RNM_ENT_EN     0x1
+#define THUNDERX_RNM_RNG_EN     0x2
+
+struct cavium_rng_pf {
+       void __iomem *control_status;
+};
+
+/* Enable the RNG hardware and activate the VF */
+static int cavium_rng_probe(struct pci_dev *pdev,
+                       const struct pci_device_id *id)
+{
+       struct  cavium_rng_pf *rng;
+       int     iov_err;
+
+       rng = devm_kzalloc(&pdev->dev, sizeof(*rng), GFP_KERNEL);
+       if (!rng)
+               return -ENOMEM;
+
+       /*Map the RNG control */
+       rng->control_status = pcim_iomap(pdev, 0, 0);
+       if (!rng->control_status) {
+               dev_err(&pdev->dev,
+                       "Error iomap failed retrieving control_status.\n");
+               return -ENOMEM;
+       }
+
+       /* Enable the RNG hardware and entropy source */
+       writeq(THUNDERX_RNM_RNG_EN | THUNDERX_RNM_ENT_EN,
+               rng->control_status);
+
+       pci_set_drvdata(pdev, rng);
+
+       /* Enable the Cavium RNG as a VF */
+       iov_err = pci_enable_sriov(pdev, 1);
+       if (iov_err != 0) {
+               /* Disable the RNG hardware and entropy source */
+               writeq(0, rng->control_status);
+               dev_err(&pdev->dev,
+                       "Error initializing RNG virtual function,(%i).\n",
+                       iov_err);
+               return iov_err;
+       }
+
+       return 0;
+}
+
+/* Disable VF and RNG Hardware */
+void  cavium_rng_remove(struct pci_dev *pdev)
+{
+       struct cavium_rng_pf *rng;
+
+       rng = pci_get_drvdata(pdev);
+
+       /* Remove the VF */
+       pci_disable_sriov(pdev);
+
+       /* Disable the RNG hardware and entropy source */
+       writeq(0, rng->control_status);
+}
+
+static const struct pci_device_id cavium_rng_pf_id_table[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa018), 0, 0, 0}, /* Thunder RNM */
+       {0,},
+};
+
+MODULE_DEVICE_TABLE(pci, cavium_rng_pf_id_table);
+
+static struct pci_driver cavium_rng_pf_driver = {
+       .name           = "cavium_rng_pf",
+       .id_table       = cavium_rng_pf_id_table,
+       .probe          = cavium_rng_probe,
+       .remove         = cavium_rng_remove,
+};
+
+module_pci_driver(cavium_rng_pf_driver);
+MODULE_AUTHOR("Omer Khaliq <okhaliq@caviumnetworks.com>");
+MODULE_LICENSE("GPL");
index 9203f2d130c06fd8d4091bf36aeccfa1e538e371..482794526e8cd52418dc1c1e6e5b200943d3c4c8 100644 (file)
@@ -449,22 +449,6 @@ int hwrng_register(struct hwrng *rng)
                goto out;
 
        mutex_lock(&rng_mutex);
-
-       /* kmalloc makes this safe for virt_to_page() in virtio_rng.c */
-       err = -ENOMEM;
-       if (!rng_buffer) {
-               rng_buffer = kmalloc(rng_buffer_size(), GFP_KERNEL);
-               if (!rng_buffer)
-                       goto out_unlock;
-       }
-       if (!rng_fillbuf) {
-               rng_fillbuf = kmalloc(rng_buffer_size(), GFP_KERNEL);
-               if (!rng_fillbuf) {
-                       kfree(rng_buffer);
-                       goto out_unlock;
-               }
-       }
-
        /* Must not register two RNGs with the same name. */
        err = -EEXIST;
        list_for_each_entry(tmp, &rng_list, list) {
@@ -573,7 +557,26 @@ EXPORT_SYMBOL_GPL(devm_hwrng_unregister);
 
 static int __init hwrng_modinit(void)
 {
-       return register_miscdev();
+       int ret = -ENOMEM;
+
+       /* kmalloc makes this safe for virt_to_page() in virtio_rng.c */
+       rng_buffer = kmalloc(rng_buffer_size(), GFP_KERNEL);
+       if (!rng_buffer)
+               return -ENOMEM;
+
+       rng_fillbuf = kmalloc(rng_buffer_size(), GFP_KERNEL);
+       if (!rng_fillbuf) {
+               kfree(rng_buffer);
+               return -ENOMEM;
+       }
+
+       ret = register_miscdev();
+       if (ret) {
+               kfree(rng_fillbuf);
+               kfree(rng_buffer);
+       }
+
+       return ret;
 }
 
 static void __exit hwrng_modexit(void)
index 0d0579fe465e3bdc71149b81e4a5e1f14171c9a5..e7a2459420291b6b931b285fe2b9be22cdd3e897 100644 (file)
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/hw_random.h>
-#include <linux/delay.h>
-#include <asm/io.h>
-
-
-#define PFX    KBUILD_MODNAME ": "
 
 #define GEODE_RNG_DATA_REG   0x50
 #define GEODE_RNG_STATUS_REG 0x54
@@ -85,7 +82,6 @@ static struct hwrng geode_rng = {
 
 static int __init mod_init(void)
 {
-       int err = -ENODEV;
        struct pci_dev *pdev = NULL;
        const struct pci_device_id *ent;
        void __iomem *mem;
@@ -93,43 +89,27 @@ static int __init mod_init(void)
 
        for_each_pci_dev(pdev) {
                ent = pci_match_id(pci_tbl, pdev);
-               if (ent)
-                       goto found;
-       }
-       /* Device not found. */
-       goto out;
-
-found:
-       rng_base = pci_resource_start(pdev, 0);
-       if (rng_base == 0)
-               goto out;
-       err = -ENOMEM;
-       mem = ioremap(rng_base, 0x58);
-       if (!mem)
-               goto out;
-       geode_rng.priv = (unsigned long)mem;
-
-       pr_info("AMD Geode RNG detected\n");
-       err = hwrng_register(&geode_rng);
-       if (err) {
-               pr_err(PFX "RNG registering failed (%d)\n",
-                      err);
-               goto err_unmap;
+               if (ent) {
+                       rng_base = pci_resource_start(pdev, 0);
+                       if (rng_base == 0)
+                               return -ENODEV;
+
+                       mem = devm_ioremap(&pdev->dev, rng_base, 0x58);
+                       if (!mem)
+                               return -ENOMEM;
+                       geode_rng.priv = (unsigned long)mem;
+
+                       pr_info("AMD Geode RNG detected\n");
+                       return devm_hwrng_register(&pdev->dev, &geode_rng);
+               }
        }
-out:
-       return err;
 
-err_unmap:
-       iounmap(mem);
-       goto out;
+       /* Device not found. */
+       return -ENODEV;
 }
 
 static void __exit mod_exit(void)
 {
-       void __iomem *mem = (void __iomem *)geode_rng.priv;
-
-       hwrng_unregister(&geode_rng);
-       iounmap(mem);
 }
 
 module_init(mod_init);
index 0cfd81bcaeacf492946b5e4f37a612414ed43407..58bef39f72860906068d18f65ab362ffbe1afa78 100644 (file)
@@ -76,9 +76,6 @@ static int meson_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
        struct meson_rng_data *data =
                        container_of(rng, struct meson_rng_data, rng);
 
-       if (max < sizeof(u32))
-               return 0;
-
        *(u32 *)buf = readl_relaxed(data->base + RNG_DATA);
 
        return sizeof(u32);
index 01d4be2c354b06a83b176c05536e7023e0551097..f5c26a5f687582c1cbcd3ea981aae7a3e6738ac7 100644 (file)
@@ -385,7 +385,7 @@ static int omap_rng_probe(struct platform_device *pdev)
 
        pm_runtime_enable(&pdev->dev);
        ret = pm_runtime_get_sync(&pdev->dev);
-       if (ret) {
+       if (ret < 0) {
                dev_err(&pdev->dev, "Failed to runtime_get device: %d\n", ret);
                pm_runtime_put_noidle(&pdev->dev);
                goto err_ioremap;
@@ -443,7 +443,7 @@ static int __maybe_unused omap_rng_resume(struct device *dev)
        int ret;
 
        ret = pm_runtime_get_sync(dev);
-       if (ret) {
+       if (ret < 0) {
                dev_err(dev, "Failed to runtime_get device: %d\n", ret);
                pm_runtime_put_noidle(dev);
                return ret;
index 8da14f1a1f569291a8c61107e085ecef331ce232..37a58d78aab317a8a97b39ebec02df1fb58fcaae 100644 (file)
@@ -71,12 +71,7 @@ static int omap3_rom_rng_get_random(void *buf, unsigned int count)
        return 0;
 }
 
-static int omap3_rom_rng_data_present(struct hwrng *rng, int wait)
-{
-       return 1;
-}
-
-static int omap3_rom_rng_data_read(struct hwrng *rng, u32 *data)
+static int omap3_rom_rng_read(struct hwrng *rng, void *data, size_t max, bool w)
 {
        int r;
 
@@ -88,8 +83,7 @@ static int omap3_rom_rng_data_read(struct hwrng *rng, u32 *data)
 
 static struct hwrng omap3_rom_rng_ops = {
        .name           = "omap3-rom",
-       .data_present   = omap3_rom_rng_data_present,
-       .data_read      = omap3_rom_rng_data_read,
+       .read           = omap3_rom_rng_read,
 };
 
 static int omap3_rom_rng_probe(struct platform_device *pdev)
index c19e23d22b36f79332bacf0cd732cdb7e78e39b5..545df485bcc42ee27da679fc4457074520d1abb0 100644 (file)
@@ -95,42 +95,20 @@ static struct hwrng pasemi_rng = {
        .data_read      = pasemi_rng_data_read,
 };
 
-static int rng_probe(struct platform_device *ofdev)
+static int rng_probe(struct platform_device *pdev)
 {
        void __iomem *rng_regs;
-       struct device_node *rng_np = ofdev->dev.of_node;
-       struct resource res;
-       int err = 0;
+       struct resource *res;
 
-       err = of_address_to_resource(rng_np, 0, &res);
-       if (err)
-               return -ENODEV;
-
-       rng_regs = ioremap(res.start, 0x100);
-
-       if (!rng_regs)
-               return -ENOMEM;
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       rng_regs = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(rng_regs))
+               return PTR_ERR(rng_regs);
 
        pasemi_rng.priv = (unsigned long)rng_regs;
 
        pr_info("Registering PA Semi RNG\n");
-
-       err = hwrng_register(&pasemi_rng);
-
-       if (err)
-               iounmap(rng_regs);
-
-       return err;
-}
-
-static int rng_remove(struct platform_device *dev)
-{
-       void __iomem *rng_regs = (void __iomem *)pasemi_rng.priv;
-
-       hwrng_unregister(&pasemi_rng);
-       iounmap(rng_regs);
-
-       return 0;
+       return devm_hwrng_register(&pdev->dev, &pasemi_rng);
 }
 
 static const struct of_device_id rng_match[] = {
@@ -146,7 +124,6 @@ static struct platform_driver rng_driver = {
                .of_match_table = rng_match,
        },
        .probe          = rng_probe,
-       .remove         = rng_remove,
 };
 
 module_platform_driver(rng_driver);
index 108897bea2d042eeb857bd0a61f0d8458e6c8186..11dc9b7c09cef7dbf168f9cc156b0d7accedfc66 100644 (file)
@@ -143,7 +143,6 @@ static struct platform_driver pic32_rng_driver = {
        .remove         = pic32_rng_remove,
        .driver         = {
                .name   = "pic32-rng",
-               .owner  = THIS_MODULE,
                .of_match_table = of_match_ptr(pic32_rng_of_match),
        },
 };
index 1d35363d23c51929eff4d1924bed08684a6587cc..938ec10e733d6f9e57d4751d6d7c4ad6192441e7 100644 (file)
@@ -54,9 +54,6 @@ static int st_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
        u32 status;
        int i;
 
-       if (max < sizeof(u16))
-               return -EINVAL;
-
        /* Wait until FIFO is full - max 4uS*/
        for (i = 0; i < ST_RNG_FILL_FIFO_TIMEOUT; i++) {
                status = readl_relaxed(ddata->base + ST_RNG_STATUS_REG);
@@ -111,6 +108,7 @@ static int st_rng_probe(struct platform_device *pdev)
        ret = hwrng_register(&ddata->ops);
        if (ret) {
                dev_err(&pdev->dev, "Failed to register HW RNG\n");
+               clk_disable_unprepare(clk);
                return ret;
        }
 
index a7b69491341665526fcd1ee3a3074c2383ff27ed..1093583b579c56852daeda0d8d5033f484a48443 100644 (file)
@@ -144,22 +144,13 @@ static int __init tx4939_rng_probe(struct platform_device *dev)
        }
 
        platform_set_drvdata(dev, rngdev);
-       return hwrng_register(&rngdev->rng);
-}
-
-static int __exit tx4939_rng_remove(struct platform_device *dev)
-{
-       struct tx4939_rng *rngdev = platform_get_drvdata(dev);
-
-       hwrng_unregister(&rngdev->rng);
-       return 0;
+       return devm_hwrng_register(&dev->dev, &rngdev->rng);
 }
 
 static struct platform_driver tx4939_rng_driver = {
        .driver         = {
                .name   = "tx4939-rng",
        },
-       .remove = tx4939_rng_remove,
 };
 
 module_platform_driver_probe(tx4939_rng_driver, tx4939_rng_probe);
index 9b035b7d7f4f9be60724b7b9678063f023923f64..4d2b81f2b2230f4ce09df3e82d8ee042cb156930 100644 (file)
@@ -318,6 +318,9 @@ config CRYPTO_DEV_OMAP_AES
        select CRYPTO_AES
        select CRYPTO_BLKCIPHER
        select CRYPTO_ENGINE
+       select CRYPTO_CBC
+       select CRYPTO_ECB
+       select CRYPTO_CTR
        help
          OMAP processors have AES module accelerator. Select this if you
          want to use the OMAP module for AES algorithms.
index b3044219772cd7ac57e0bf2559eb7dea8caeca08..156aad167cd6fcd44e66ea9456e253f8e3e36866 100644 (file)
 #else
 #define debug(format, arg...)
 #endif
+
+#ifdef DEBUG
+#include <linux/highmem.h>
+
+static void dbg_dump_sg(const char *level, const char *prefix_str,
+                       int prefix_type, int rowsize, int groupsize,
+                       struct scatterlist *sg, size_t tlen, bool ascii,
+                       bool may_sleep)
+{
+       struct scatterlist *it;
+       void *it_page;
+       size_t len;
+       void *buf;
+
+       for (it = sg; it != NULL && tlen > 0 ; it = sg_next(sg)) {
+               /*
+                * make sure the scatterlist's page
+                * has a valid virtual memory mapping
+                */
+               it_page = kmap_atomic(sg_page(it));
+               if (unlikely(!it_page)) {
+                       printk(KERN_ERR "dbg_dump_sg: kmap failed\n");
+                       return;
+               }
+
+               buf = it_page + it->offset;
+               len = min(tlen, it->length);
+               print_hex_dump(level, prefix_str, prefix_type, rowsize,
+                              groupsize, buf, len, ascii);
+               tlen -= len;
+
+               kunmap_atomic(it_page);
+       }
+}
+#endif
+
 static struct list_head alg_list;
 
 struct caam_alg_entry {
@@ -227,8 +263,9 @@ static void append_key_aead(u32 *desc, struct caam_ctx *ctx,
        if (is_rfc3686) {
                nonce = (u32 *)((void *)ctx->key + ctx->split_key_pad_len +
                               enckeylen);
-               append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-                                   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
                append_move(desc,
                            MOVE_SRC_OUTFIFO |
                            MOVE_DEST_CLASS1CTX |
@@ -500,11 +537,10 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 
        /* Load Counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        /* Class 1 operation */
        append_operation(desc, ctx->class1_alg_type |
@@ -578,11 +614,10 @@ skip_enc:
 
        /* Load Counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        /* Choose operation */
        if (ctr_mode)
@@ -683,11 +718,10 @@ copy_iv:
 
        /* Load Counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        /* Class 1 operation */
        append_operation(desc, ctx->class1_alg_type |
@@ -1478,7 +1512,7 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
        int ret = 0;
        u32 *key_jump_cmd;
        u32 *desc;
-       u32 *nonce;
+       u8 *nonce;
        u32 geniv;
        u32 ctx1_iv_off = 0;
        const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) ==
@@ -1531,9 +1565,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load nonce into CONTEXT1 reg */
        if (is_rfc3686) {
-               nonce = (u32 *)(key + keylen);
-               append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-                                   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               nonce = (u8 *)key + keylen;
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
                append_move(desc, MOVE_WAITCOMP |
                            MOVE_SRC_OUTFIFO |
                            MOVE_DEST_CLASS1CTX |
@@ -1549,11 +1584,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        /* Load operation */
        append_operation(desc, ctx->class1_alg_type |
@@ -1590,9 +1624,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load nonce into CONTEXT1 reg */
        if (is_rfc3686) {
-               nonce = (u32 *)(key + keylen);
-               append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-                                   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               nonce = (u8 *)key + keylen;
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
                append_move(desc, MOVE_WAITCOMP |
                            MOVE_SRC_OUTFIFO |
                            MOVE_DEST_CLASS1CTX |
@@ -1608,11 +1643,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        /* Choose operation */
        if (ctr_mode)
@@ -1653,9 +1687,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load Nonce into CONTEXT1 reg */
        if (is_rfc3686) {
-               nonce = (u32 *)(key + keylen);
-               append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-                                   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               nonce = (u8 *)key + keylen;
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
                append_move(desc, MOVE_WAITCOMP |
                            MOVE_SRC_OUTFIFO |
                            MOVE_DEST_CLASS1CTX |
@@ -1685,11 +1720,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
        /* Load Counter into CONTEXT1 reg */
        if (is_rfc3686)
-               append_load_imm_u32(desc, (u32)1, LDST_IMM |
-                                   LDST_CLASS_1_CCB |
-                                   LDST_SRCDST_BYTE_CONTEXT |
-                                   ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                    LDST_OFFSET_SHIFT));
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
 
        if (ctx1_iv_off)
                append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP |
@@ -1995,9 +2029,9 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
        print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
                       edesc->src_nents > 1 ? 100 : ivsize, 1);
-       print_hex_dump(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-                      edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+       dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
+                   DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+                   edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
 #endif
 
        ablkcipher_unmap(jrdev, edesc, req);
@@ -2027,9 +2061,9 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
        print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
                       ivsize, 1);
-       print_hex_dump(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-                      edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+       dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
+                   DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+                   edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
 #endif
 
        ablkcipher_unmap(jrdev, edesc, req);
@@ -2184,12 +2218,15 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
        int len, sec4_sg_index = 0;
 
 #ifdef DEBUG
+       bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+                                             CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
        print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
                       ivsize, 1);
-       print_hex_dump(KERN_ERR, "src    @"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-                      edesc->src_nents ? 100 : req->nbytes, 1);
+       printk(KERN_ERR "asked=%d, nbytes%d\n", (int)edesc->src_nents ? 100 : req->nbytes, req->nbytes);
+       dbg_dump_sg(KERN_ERR, "src    @"__stringify(__LINE__)": ",
+                   DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+                   edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
 #endif
 
        len = desc_len(sh_desc);
@@ -2241,12 +2278,14 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
        int len, sec4_sg_index = 0;
 
 #ifdef DEBUG
+       bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+                                             CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
        print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
                       ivsize, 1);
-       print_hex_dump(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-                      edesc->src_nents ? 100 : req->nbytes, 1);
+       dbg_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
+                   DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+                   edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
 #endif
 
        len = desc_len(sh_desc);
@@ -2516,18 +2555,20 @@ static int aead_decrypt(struct aead_request *req)
        u32 *desc;
        int ret = 0;
 
+#ifdef DEBUG
+       bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+                                             CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
+       dbg_dump_sg(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
+                   DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+                   req->assoclen + req->cryptlen, 1, may_sleep);
+#endif
+
        /* allocate extended descriptor */
        edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,
                                 &all_contig, false);
        if (IS_ERR(edesc))
                return PTR_ERR(edesc);
 
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-                      req->assoclen + req->cryptlen, 1);
-#endif
-
        /* Create and submit job descriptor*/
        init_authenc_job(req, edesc, all_contig, false);
 #ifdef DEBUG
index 36365b3efdfdce5ffb6bc19a8b72fe31a3a463b3..660dc206969faa509ae8b858d2c2c8296e097d32 100644 (file)
@@ -99,17 +99,17 @@ static struct list_head hash_list;
 
 /* ahash per-session context */
 struct caam_hash_ctx {
-       struct device *jrdev;
-       u32 sh_desc_update[DESC_HASH_MAX_USED_LEN];
-       u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN];
-       u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN];
-       u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN];
-       u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN];
-       dma_addr_t sh_desc_update_dma;
+       u32 sh_desc_update[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+       u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+       u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+       u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+       u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+       dma_addr_t sh_desc_update_dma ____cacheline_aligned;
        dma_addr_t sh_desc_update_first_dma;
        dma_addr_t sh_desc_fin_dma;
        dma_addr_t sh_desc_digest_dma;
        dma_addr_t sh_desc_finup_dma;
+       struct device *jrdev;
        u32 alg_type;
        u32 alg_op;
        u8 key[CAAM_MAX_HASH_KEY_SIZE];
@@ -187,15 +187,6 @@ static inline dma_addr_t buf_map_to_sec4_sg(struct device *jrdev,
        return buf_dma;
 }
 
-/* Map req->src and put it in link table */
-static inline void src_map_to_sec4_sg(struct device *jrdev,
-                                     struct scatterlist *src, int src_nents,
-                                     struct sec4_sg_entry *sec4_sg)
-{
-       dma_map_sg(jrdev, src, src_nents, DMA_TO_DEVICE);
-       sg_to_sec4_sg_last(src, src_nents, sec4_sg, 0);
-}
-
 /*
  * Only put buffer in link table if it contains data, which is possible,
  * since a buffer has previously been used, and needs to be unmapped,
@@ -449,7 +440,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
        u32 *desc;
        struct split_key_result result;
        dma_addr_t src_dma, dst_dma;
-       int ret = 0;
+       int ret;
 
        desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
        if (!desc) {
@@ -526,7 +517,7 @@ static int ahash_setkey(struct crypto_ahash *ahash,
        struct device *jrdev = ctx->jrdev;
        int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
        int digestsize = crypto_ahash_digestsize(ahash);
-       int ret = 0;
+       int ret;
        u8 *hashed_key = NULL;
 
 #ifdef DEBUG
@@ -534,14 +525,15 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 #endif
 
        if (keylen > blocksize) {
-               hashed_key = kmalloc(sizeof(u8) * digestsize, GFP_KERNEL |
-                                    GFP_DMA);
+               hashed_key = kmalloc_array(digestsize,
+                                          sizeof(*hashed_key),
+                                          GFP_KERNEL | GFP_DMA);
                if (!hashed_key)
                        return -ENOMEM;
                ret = hash_digest_key(ctx, key, &keylen, hashed_key,
                                      digestsize);
                if (ret)
-                       goto badkey;
+                       goto bad_free_key;
                key = hashed_key;
        }
 
@@ -559,14 +551,14 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 
        ret = gen_split_hash_key(ctx, key, keylen);
        if (ret)
-               goto badkey;
+               goto bad_free_key;
 
        ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len,
                                      DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, ctx->key_dma)) {
                dev_err(jrdev, "unable to map key i/o memory\n");
                ret = -ENOMEM;
-               goto map_err;
+               goto error_free_key;
        }
 #ifdef DEBUG
        print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
@@ -579,11 +571,10 @@ static int ahash_setkey(struct crypto_ahash *ahash,
                dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len,
                                 DMA_TO_DEVICE);
        }
-
-map_err:
+ error_free_key:
        kfree(hashed_key);
        return ret;
-badkey:
+ bad_free_key:
        kfree(hashed_key);
        crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
        return -EINVAL;
@@ -595,16 +586,16 @@ badkey:
  * @sec4_sg_dma: physical mapped address of h/w link table
  * @src_nents: number of segments in input scatterlist
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
- * @sec4_sg: pointer to h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
+ * @sec4_sg: h/w link table
  */
 struct ahash_edesc {
        dma_addr_t dst_dma;
        dma_addr_t sec4_sg_dma;
        int src_nents;
        int sec4_sg_bytes;
-       struct sec4_sg_entry *sec4_sg;
-       u32 hw_desc[0];
+       u32 hw_desc[DESC_JOB_IO_LEN / sizeof(u32)] ____cacheline_aligned;
+       struct sec4_sg_entry sec4_sg[0];
 };
 
 static inline void ahash_unmap(struct device *dev,
@@ -774,6 +765,65 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
        req->base.complete(&req->base, err);
 }
 
+/*
+ * Allocate an enhanced descriptor, which contains the hardware descriptor
+ * and space for hardware scatter table containing sg_num entries.
+ */
+static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx,
+                                            int sg_num, u32 *sh_desc,
+                                            dma_addr_t sh_desc_dma,
+                                            gfp_t flags)
+{
+       struct ahash_edesc *edesc;
+       unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry);
+
+       edesc = kzalloc(sizeof(*edesc) + sg_size, GFP_DMA | flags);
+       if (!edesc) {
+               dev_err(ctx->jrdev, "could not allocate extended descriptor\n");
+               return NULL;
+       }
+
+       init_job_desc_shared(edesc->hw_desc, sh_desc_dma, desc_len(sh_desc),
+                            HDR_SHARE_DEFER | HDR_REVERSE);
+
+       return edesc;
+}
+
+static int ahash_edesc_add_src(struct caam_hash_ctx *ctx,
+                              struct ahash_edesc *edesc,
+                              struct ahash_request *req, int nents,
+                              unsigned int first_sg,
+                              unsigned int first_bytes, size_t to_hash)
+{
+       dma_addr_t src_dma;
+       u32 options;
+
+       if (nents > 1 || first_sg) {
+               struct sec4_sg_entry *sg = edesc->sec4_sg;
+               unsigned int sgsize = sizeof(*sg) * (first_sg + nents);
+
+               sg_to_sec4_sg_last(req->src, nents, sg + first_sg, 0);
+
+               src_dma = dma_map_single(ctx->jrdev, sg, sgsize, DMA_TO_DEVICE);
+               if (dma_mapping_error(ctx->jrdev, src_dma)) {
+                       dev_err(ctx->jrdev, "unable to map S/G table\n");
+                       return -ENOMEM;
+               }
+
+               edesc->sec4_sg_bytes = sgsize;
+               edesc->sec4_sg_dma = src_dma;
+               options = LDST_SGF;
+       } else {
+               src_dma = sg_dma_address(req->src);
+               options = 0;
+       }
+
+       append_seq_in_ptr(edesc->hw_desc, src_dma, first_bytes + to_hash,
+                         options);
+
+       return 0;
+}
+
 /* submit update job descriptor */
 static int ahash_update_ctx(struct ahash_request *req)
 {
@@ -789,12 +839,10 @@ static int ahash_update_ctx(struct ahash_request *req)
        int *next_buflen = state->current_buf ? &state->buflen_0 :
                           &state->buflen_1, last_buflen;
        int in_len = *buflen + req->nbytes, to_hash;
-       u32 *sh_desc = ctx->sh_desc_update, *desc;
-       dma_addr_t ptr = ctx->sh_desc_update_dma;
-       int src_nents, sec4_sg_bytes, sec4_sg_src_index;
+       u32 *desc;
+       int src_nents, mapped_nents, sec4_sg_bytes, sec4_sg_src_index;
        struct ahash_edesc *edesc;
        int ret = 0;
-       int sh_len;
 
        last_buflen = *next_buflen;
        *next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
@@ -807,40 +855,51 @@ static int ahash_update_ctx(struct ahash_request *req)
                        dev_err(jrdev, "Invalid number of src SG.\n");
                        return src_nents;
                }
+
+               if (src_nents) {
+                       mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                                 DMA_TO_DEVICE);
+                       if (!mapped_nents) {
+                               dev_err(jrdev, "unable to DMA map source\n");
+                               return -ENOMEM;
+                       }
+               } else {
+                       mapped_nents = 0;
+               }
+
                sec4_sg_src_index = 1 + (*buflen ? 1 : 0);
-               sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
+               sec4_sg_bytes = (sec4_sg_src_index + mapped_nents) *
                                 sizeof(struct sec4_sg_entry);
 
                /*
                 * allocate space for base edesc and hw desc commands,
                 * link tables
                 */
-               edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN +
-                               sec4_sg_bytes, GFP_DMA | flags);
+               edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
+                                         ctx->sh_desc_update,
+                                         ctx->sh_desc_update_dma, flags);
                if (!edesc) {
-                       dev_err(jrdev,
-                               "could not allocate extended descriptor\n");
+                       dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                        return -ENOMEM;
                }
 
                edesc->src_nents = src_nents;
                edesc->sec4_sg_bytes = sec4_sg_bytes;
-               edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                                DESC_JOB_IO_LEN;
 
                ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
                                         edesc->sec4_sg, DMA_BIDIRECTIONAL);
                if (ret)
-                       return ret;
+                       goto unmap_ctx;
 
                state->buf_dma = try_buf_map_to_sec4_sg(jrdev,
                                                        edesc->sec4_sg + 1,
                                                        buf, state->buf_dma,
                                                        *buflen, last_buflen);
 
-               if (src_nents) {
-                       src_map_to_sec4_sg(jrdev, req->src, src_nents,
-                                          edesc->sec4_sg + sec4_sg_src_index);
+               if (mapped_nents) {
+                       sg_to_sec4_sg_last(req->src, mapped_nents,
+                                          edesc->sec4_sg + sec4_sg_src_index,
+                                          0);
                        if (*next_buflen)
                                scatterwalk_map_and_copy(next_buf, req->src,
                                                         to_hash - *buflen,
@@ -852,17 +911,15 @@ static int ahash_update_ctx(struct ahash_request *req)
 
                state->current_buf = !state->current_buf;
 
-               sh_len = desc_len(sh_desc);
                desc = edesc->hw_desc;
-               init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
-                                    HDR_REVERSE);
 
                edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
                                                     sec4_sg_bytes,
                                                     DMA_TO_DEVICE);
                if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
                        dev_err(jrdev, "unable to map S/G table\n");
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto unmap_ctx;
                }
 
                append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len +
@@ -877,13 +934,10 @@ static int ahash_update_ctx(struct ahash_request *req)
 #endif
 
                ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req);
-               if (!ret) {
-                       ret = -EINPROGRESS;
-               } else {
-                       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
-                                          DMA_BIDIRECTIONAL);
-                       kfree(edesc);
-               }
+               if (ret)
+                       goto unmap_ctx;
+
+               ret = -EINPROGRESS;
        } else if (*next_buflen) {
                scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
                                         req->nbytes, 0);
@@ -898,6 +952,10 @@ static int ahash_update_ctx(struct ahash_request *req)
                       *next_buflen, 1);
 #endif
 
+       return ret;
+ unmap_ctx:
+       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
+       kfree(edesc);
        return ret;
 }
 
@@ -913,38 +971,31 @@ static int ahash_final_ctx(struct ahash_request *req)
        int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
        int last_buflen = state->current_buf ? state->buflen_0 :
                          state->buflen_1;
-       u32 *sh_desc = ctx->sh_desc_fin, *desc;
-       dma_addr_t ptr = ctx->sh_desc_fin_dma;
+       u32 *desc;
        int sec4_sg_bytes, sec4_sg_src_index;
        int digestsize = crypto_ahash_digestsize(ahash);
        struct ahash_edesc *edesc;
-       int ret = 0;
-       int sh_len;
+       int ret;
 
        sec4_sg_src_index = 1 + (buflen ? 1 : 0);
        sec4_sg_bytes = sec4_sg_src_index * sizeof(struct sec4_sg_entry);
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN + sec4_sg_bytes,
-                       GFP_DMA | flags);
-       if (!edesc) {
-               dev_err(jrdev, "could not allocate extended descriptor\n");
+       edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index,
+                                 ctx->sh_desc_fin, ctx->sh_desc_fin_dma,
+                                 flags);
+       if (!edesc)
                return -ENOMEM;
-       }
 
-       sh_len = desc_len(sh_desc);
        desc = edesc->hw_desc;
-       init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
        edesc->sec4_sg_bytes = sec4_sg_bytes;
-       edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                        DESC_JOB_IO_LEN;
        edesc->src_nents = 0;
 
        ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
                                 edesc->sec4_sg, DMA_TO_DEVICE);
        if (ret)
-               return ret;
+               goto unmap_ctx;
 
        state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
                                                buf, state->buf_dma, buflen,
@@ -956,7 +1007,8 @@ static int ahash_final_ctx(struct ahash_request *req)
                                            sec4_sg_bytes, DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
                dev_err(jrdev, "unable to map S/G table\n");
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto unmap_ctx;
        }
 
        append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + buflen,
@@ -966,7 +1018,8 @@ static int ahash_final_ctx(struct ahash_request *req)
                                                digestsize);
        if (dma_mapping_error(jrdev, edesc->dst_dma)) {
                dev_err(jrdev, "unable to map dst\n");
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto unmap_ctx;
        }
 
 #ifdef DEBUG
@@ -975,13 +1028,13 @@ static int ahash_final_ctx(struct ahash_request *req)
 #endif
 
        ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
-       if (!ret) {
-               ret = -EINPROGRESS;
-       } else {
-               ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
-               kfree(edesc);
-       }
+       if (ret)
+               goto unmap_ctx;
 
+       return -EINPROGRESS;
+ unmap_ctx:
+       ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
+       kfree(edesc);
        return ret;
 }
 
@@ -997,68 +1050,66 @@ static int ahash_finup_ctx(struct ahash_request *req)
        int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
        int last_buflen = state->current_buf ? state->buflen_0 :
                          state->buflen_1;
-       u32 *sh_desc = ctx->sh_desc_finup, *desc;
-       dma_addr_t ptr = ctx->sh_desc_finup_dma;
-       int sec4_sg_bytes, sec4_sg_src_index;
-       int src_nents;
+       u32 *desc;
+       int sec4_sg_src_index;
+       int src_nents, mapped_nents;
        int digestsize = crypto_ahash_digestsize(ahash);
        struct ahash_edesc *edesc;
-       int ret = 0;
-       int sh_len;
+       int ret;
 
        src_nents = sg_nents_for_len(req->src, req->nbytes);
        if (src_nents < 0) {
                dev_err(jrdev, "Invalid number of src SG.\n");
                return src_nents;
        }
+
+       if (src_nents) {
+               mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                         DMA_TO_DEVICE);
+               if (!mapped_nents) {
+                       dev_err(jrdev, "unable to DMA map source\n");
+                       return -ENOMEM;
+               }
+       } else {
+               mapped_nents = 0;
+       }
+
        sec4_sg_src_index = 1 + (buflen ? 1 : 0);
-       sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
-                        sizeof(struct sec4_sg_entry);
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN + sec4_sg_bytes,
-                       GFP_DMA | flags);
+       edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
+                                 ctx->sh_desc_finup, ctx->sh_desc_finup_dma,
+                                 flags);
        if (!edesc) {
-               dev_err(jrdev, "could not allocate extended descriptor\n");
+               dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                return -ENOMEM;
        }
 
-       sh_len = desc_len(sh_desc);
        desc = edesc->hw_desc;
-       init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
        edesc->src_nents = src_nents;
-       edesc->sec4_sg_bytes = sec4_sg_bytes;
-       edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                        DESC_JOB_IO_LEN;
 
        ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
                                 edesc->sec4_sg, DMA_TO_DEVICE);
        if (ret)
-               return ret;
+               goto unmap_ctx;
 
        state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
                                                buf, state->buf_dma, buflen,
                                                last_buflen);
 
-       src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg +
-                          sec4_sg_src_index);
-
-       edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
-                                           sec4_sg_bytes, DMA_TO_DEVICE);
-       if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
-               dev_err(jrdev, "unable to map S/G table\n");
-               return -ENOMEM;
-       }
-
-       append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len +
-                              buflen + req->nbytes, LDST_SGF);
+       ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents,
+                                 sec4_sg_src_index, ctx->ctx_len + buflen,
+                                 req->nbytes);
+       if (ret)
+               goto unmap_ctx;
 
        edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
                                                digestsize);
        if (dma_mapping_error(jrdev, edesc->dst_dma)) {
                dev_err(jrdev, "unable to map dst\n");
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto unmap_ctx;
        }
 
 #ifdef DEBUG
@@ -1067,13 +1118,13 @@ static int ahash_finup_ctx(struct ahash_request *req)
 #endif
 
        ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
-       if (!ret) {
-               ret = -EINPROGRESS;
-       } else {
-               ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
-               kfree(edesc);
-       }
+       if (ret)
+               goto unmap_ctx;
 
+       return -EINPROGRESS;
+ unmap_ctx:
+       ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
+       kfree(edesc);
        return ret;
 }
 
@@ -1084,60 +1135,56 @@ static int ahash_digest(struct ahash_request *req)
        struct device *jrdev = ctx->jrdev;
        gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
                       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
-       u32 *sh_desc = ctx->sh_desc_digest, *desc;
-       dma_addr_t ptr = ctx->sh_desc_digest_dma;
+       u32 *desc;
        int digestsize = crypto_ahash_digestsize(ahash);
-       int src_nents, sec4_sg_bytes;
-       dma_addr_t src_dma;
+       int src_nents, mapped_nents;
        struct ahash_edesc *edesc;
-       int ret = 0;
-       u32 options;
-       int sh_len;
+       int ret;
 
-       src_nents = sg_count(req->src, req->nbytes);
+       src_nents = sg_nents_for_len(req->src, req->nbytes);
        if (src_nents < 0) {
                dev_err(jrdev, "Invalid number of src SG.\n");
                return src_nents;
        }
-       dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE);
-       sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry);
+
+       if (src_nents) {
+               mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                         DMA_TO_DEVICE);
+               if (!mapped_nents) {
+                       dev_err(jrdev, "unable to map source for DMA\n");
+                       return -ENOMEM;
+               }
+       } else {
+               mapped_nents = 0;
+       }
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kzalloc(sizeof(*edesc) + sec4_sg_bytes + DESC_JOB_IO_LEN,
-                       GFP_DMA | flags);
+       edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ? mapped_nents : 0,
+                                 ctx->sh_desc_digest, ctx->sh_desc_digest_dma,
+                                 flags);
        if (!edesc) {
-               dev_err(jrdev, "could not allocate extended descriptor\n");
+               dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                return -ENOMEM;
        }
-       edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                         DESC_JOB_IO_LEN;
-       edesc->sec4_sg_bytes = sec4_sg_bytes;
-       edesc->src_nents = src_nents;
 
-       sh_len = desc_len(sh_desc);
-       desc = edesc->hw_desc;
-       init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
+       edesc->src_nents = src_nents;
 
-       if (src_nents) {
-               sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0);
-               edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
-                                           sec4_sg_bytes, DMA_TO_DEVICE);
-               if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
-                       dev_err(jrdev, "unable to map S/G table\n");
-                       return -ENOMEM;
-               }
-               src_dma = edesc->sec4_sg_dma;
-               options = LDST_SGF;
-       } else {
-               src_dma = sg_dma_address(req->src);
-               options = 0;
+       ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 0, 0,
+                                 req->nbytes);
+       if (ret) {
+               ahash_unmap(jrdev, edesc, req, digestsize);
+               kfree(edesc);
+               return ret;
        }
-       append_seq_in_ptr(desc, src_dma, req->nbytes, options);
+
+       desc = edesc->hw_desc;
 
        edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
                                                digestsize);
        if (dma_mapping_error(jrdev, edesc->dst_dma)) {
                dev_err(jrdev, "unable to map dst\n");
+               ahash_unmap(jrdev, edesc, req, digestsize);
+               kfree(edesc);
                return -ENOMEM;
        }
 
@@ -1168,29 +1215,23 @@ static int ahash_final_no_ctx(struct ahash_request *req)
                       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
        u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
        int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
-       u32 *sh_desc = ctx->sh_desc_digest, *desc;
-       dma_addr_t ptr = ctx->sh_desc_digest_dma;
+       u32 *desc;
        int digestsize = crypto_ahash_digestsize(ahash);
        struct ahash_edesc *edesc;
-       int ret = 0;
-       int sh_len;
+       int ret;
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN, GFP_DMA | flags);
-       if (!edesc) {
-               dev_err(jrdev, "could not allocate extended descriptor\n");
+       edesc = ahash_edesc_alloc(ctx, 0, ctx->sh_desc_digest,
+                                 ctx->sh_desc_digest_dma, flags);
+       if (!edesc)
                return -ENOMEM;
-       }
 
-       edesc->sec4_sg_bytes = 0;
-       sh_len = desc_len(sh_desc);
        desc = edesc->hw_desc;
-       init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
        state->buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, state->buf_dma)) {
                dev_err(jrdev, "unable to map src\n");
-               return -ENOMEM;
+               goto unmap;
        }
 
        append_seq_in_ptr(desc, state->buf_dma, buflen, 0);
@@ -1199,7 +1240,7 @@ static int ahash_final_no_ctx(struct ahash_request *req)
                                                digestsize);
        if (dma_mapping_error(jrdev, edesc->dst_dma)) {
                dev_err(jrdev, "unable to map dst\n");
-               return -ENOMEM;
+               goto unmap;
        }
        edesc->src_nents = 0;
 
@@ -1217,6 +1258,11 @@ static int ahash_final_no_ctx(struct ahash_request *req)
        }
 
        return ret;
+ unmap:
+       ahash_unmap(jrdev, edesc, req, digestsize);
+       kfree(edesc);
+       return -ENOMEM;
+
 }
 
 /* submit ahash update if it the first job descriptor after update */
@@ -1234,48 +1280,58 @@ static int ahash_update_no_ctx(struct ahash_request *req)
        int *next_buflen = state->current_buf ? &state->buflen_0 :
                           &state->buflen_1;
        int in_len = *buflen + req->nbytes, to_hash;
-       int sec4_sg_bytes, src_nents;
+       int sec4_sg_bytes, src_nents, mapped_nents;
        struct ahash_edesc *edesc;
-       u32 *desc, *sh_desc = ctx->sh_desc_update_first;
-       dma_addr_t ptr = ctx->sh_desc_update_first_dma;
+       u32 *desc;
        int ret = 0;
-       int sh_len;
 
        *next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
        to_hash = in_len - *next_buflen;
 
        if (to_hash) {
                src_nents = sg_nents_for_len(req->src,
-                                            req->nbytes - (*next_buflen));
+                                            req->nbytes - *next_buflen);
                if (src_nents < 0) {
                        dev_err(jrdev, "Invalid number of src SG.\n");
                        return src_nents;
                }
-               sec4_sg_bytes = (1 + src_nents) *
+
+               if (src_nents) {
+                       mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                                 DMA_TO_DEVICE);
+                       if (!mapped_nents) {
+                               dev_err(jrdev, "unable to DMA map source\n");
+                               return -ENOMEM;
+                       }
+               } else {
+                       mapped_nents = 0;
+               }
+
+               sec4_sg_bytes = (1 + mapped_nents) *
                                sizeof(struct sec4_sg_entry);
 
                /*
                 * allocate space for base edesc and hw desc commands,
                 * link tables
                 */
-               edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN +
-                               sec4_sg_bytes, GFP_DMA | flags);
+               edesc = ahash_edesc_alloc(ctx, 1 + mapped_nents,
+                                         ctx->sh_desc_update_first,
+                                         ctx->sh_desc_update_first_dma,
+                                         flags);
                if (!edesc) {
-                       dev_err(jrdev,
-                               "could not allocate extended descriptor\n");
+                       dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                        return -ENOMEM;
                }
 
                edesc->src_nents = src_nents;
                edesc->sec4_sg_bytes = sec4_sg_bytes;
-               edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                                DESC_JOB_IO_LEN;
                edesc->dst_dma = 0;
 
                state->buf_dma = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg,
                                                    buf, *buflen);
-               src_map_to_sec4_sg(jrdev, req->src, src_nents,
-                                  edesc->sec4_sg + 1);
+               sg_to_sec4_sg_last(req->src, mapped_nents,
+                                  edesc->sec4_sg + 1, 0);
+
                if (*next_buflen) {
                        scatterwalk_map_and_copy(next_buf, req->src,
                                                 to_hash - *buflen,
@@ -1284,24 +1340,22 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 
                state->current_buf = !state->current_buf;
 
-               sh_len = desc_len(sh_desc);
                desc = edesc->hw_desc;
-               init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
-                                    HDR_REVERSE);
 
                edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
                                                    sec4_sg_bytes,
                                                    DMA_TO_DEVICE);
                if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
                        dev_err(jrdev, "unable to map S/G table\n");
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto unmap_ctx;
                }
 
                append_seq_in_ptr(desc, edesc->sec4_sg_dma, to_hash, LDST_SGF);
 
                ret = map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len);
                if (ret)
-                       return ret;
+                       goto unmap_ctx;
 
 #ifdef DEBUG
                print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
@@ -1310,16 +1364,13 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 #endif
 
                ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
-               if (!ret) {
-                       ret = -EINPROGRESS;
-                       state->update = ahash_update_ctx;
-                       state->finup = ahash_finup_ctx;
-                       state->final = ahash_final_ctx;
-               } else {
-                       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
-                                       DMA_TO_DEVICE);
-                       kfree(edesc);
-               }
+               if (ret)
+                       goto unmap_ctx;
+
+               ret = -EINPROGRESS;
+               state->update = ahash_update_ctx;
+               state->finup = ahash_finup_ctx;
+               state->final = ahash_final_ctx;
        } else if (*next_buflen) {
                scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
                                         req->nbytes, 0);
@@ -1334,6 +1385,10 @@ static int ahash_update_no_ctx(struct ahash_request *req)
                       *next_buflen, 1);
 #endif
 
+       return ret;
+ unmap_ctx:
+       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE);
+       kfree(edesc);
        return ret;
 }
 
@@ -1350,61 +1405,63 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
        int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
        int last_buflen = state->current_buf ? state->buflen_0 :
                          state->buflen_1;
-       u32 *sh_desc = ctx->sh_desc_digest, *desc;
-       dma_addr_t ptr = ctx->sh_desc_digest_dma;
-       int sec4_sg_bytes, sec4_sg_src_index, src_nents;
+       u32 *desc;
+       int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents;
        int digestsize = crypto_ahash_digestsize(ahash);
        struct ahash_edesc *edesc;
-       int sh_len;
-       int ret = 0;
+       int ret;
 
        src_nents = sg_nents_for_len(req->src, req->nbytes);
        if (src_nents < 0) {
                dev_err(jrdev, "Invalid number of src SG.\n");
                return src_nents;
        }
+
+       if (src_nents) {
+               mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                         DMA_TO_DEVICE);
+               if (!mapped_nents) {
+                       dev_err(jrdev, "unable to DMA map source\n");
+                       return -ENOMEM;
+               }
+       } else {
+               mapped_nents = 0;
+       }
+
        sec4_sg_src_index = 2;
-       sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
+       sec4_sg_bytes = (sec4_sg_src_index + mapped_nents) *
                         sizeof(struct sec4_sg_entry);
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN + sec4_sg_bytes,
-                       GFP_DMA | flags);
+       edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
+                                 ctx->sh_desc_digest, ctx->sh_desc_digest_dma,
+                                 flags);
        if (!edesc) {
-               dev_err(jrdev, "could not allocate extended descriptor\n");
+               dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                return -ENOMEM;
        }
 
-       sh_len = desc_len(sh_desc);
        desc = edesc->hw_desc;
-       init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
        edesc->src_nents = src_nents;
        edesc->sec4_sg_bytes = sec4_sg_bytes;
-       edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                        DESC_JOB_IO_LEN;
 
        state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, buf,
                                                state->buf_dma, buflen,
                                                last_buflen);
 
-       src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + 1);
-
-       edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
-                                           sec4_sg_bytes, DMA_TO_DEVICE);
-       if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
+       ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 1, buflen,
+                                 req->nbytes);
+       if (ret) {
                dev_err(jrdev, "unable to map S/G table\n");
-               return -ENOMEM;
+               goto unmap;
        }
 
-       append_seq_in_ptr(desc, edesc->sec4_sg_dma, buflen +
-                              req->nbytes, LDST_SGF);
-
        edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
                                                digestsize);
        if (dma_mapping_error(jrdev, edesc->dst_dma)) {
                dev_err(jrdev, "unable to map dst\n");
-               return -ENOMEM;
+               goto unmap;
        }
 
 #ifdef DEBUG
@@ -1421,6 +1478,11 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
        }
 
        return ret;
+ unmap:
+       ahash_unmap(jrdev, edesc, req, digestsize);
+       kfree(edesc);
+       return -ENOMEM;
+
 }
 
 /* submit first update job descriptor after init */
@@ -1436,78 +1498,65 @@ static int ahash_update_first(struct ahash_request *req)
        int *next_buflen = state->current_buf ?
                &state->buflen_1 : &state->buflen_0;
        int to_hash;
-       u32 *sh_desc = ctx->sh_desc_update_first, *desc;
-       dma_addr_t ptr = ctx->sh_desc_update_first_dma;
-       int sec4_sg_bytes, src_nents;
-       dma_addr_t src_dma;
-       u32 options;
+       u32 *desc;
+       int src_nents, mapped_nents;
        struct ahash_edesc *edesc;
        int ret = 0;
-       int sh_len;
 
        *next_buflen = req->nbytes & (crypto_tfm_alg_blocksize(&ahash->base) -
                                      1);
        to_hash = req->nbytes - *next_buflen;
 
        if (to_hash) {
-               src_nents = sg_count(req->src, req->nbytes - (*next_buflen));
+               src_nents = sg_nents_for_len(req->src,
+                                            req->nbytes - *next_buflen);
                if (src_nents < 0) {
                        dev_err(jrdev, "Invalid number of src SG.\n");
                        return src_nents;
                }
-               dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE);
-               sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry);
+
+               if (src_nents) {
+                       mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+                                                 DMA_TO_DEVICE);
+                       if (!mapped_nents) {
+                               dev_err(jrdev, "unable to map source for DMA\n");
+                               return -ENOMEM;
+                       }
+               } else {
+                       mapped_nents = 0;
+               }
 
                /*
                 * allocate space for base edesc and hw desc commands,
                 * link tables
                 */
-               edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN +
-                               sec4_sg_bytes, GFP_DMA | flags);
+               edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ?
+                                         mapped_nents : 0,
+                                         ctx->sh_desc_update_first,
+                                         ctx->sh_desc_update_first_dma,
+                                         flags);
                if (!edesc) {
-                       dev_err(jrdev,
-                               "could not allocate extended descriptor\n");
+                       dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
                        return -ENOMEM;
                }
 
                edesc->src_nents = src_nents;
-               edesc->sec4_sg_bytes = sec4_sg_bytes;
-               edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-                                DESC_JOB_IO_LEN;
                edesc->dst_dma = 0;
 
-               if (src_nents) {
-                       sg_to_sec4_sg_last(req->src, src_nents,
-                                          edesc->sec4_sg, 0);
-                       edesc->sec4_sg_dma = dma_map_single(jrdev,
-                                                           edesc->sec4_sg,
-                                                           sec4_sg_bytes,
-                                                           DMA_TO_DEVICE);
-                       if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
-                               dev_err(jrdev, "unable to map S/G table\n");
-                               return -ENOMEM;
-                       }
-                       src_dma = edesc->sec4_sg_dma;
-                       options = LDST_SGF;
-               } else {
-                       src_dma = sg_dma_address(req->src);
-                       options = 0;
-               }
+               ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 0, 0,
+                                         to_hash);
+               if (ret)
+                       goto unmap_ctx;
 
                if (*next_buflen)
                        scatterwalk_map_and_copy(next_buf, req->src, to_hash,
                                                 *next_buflen, 0);
 
-               sh_len = desc_len(sh_desc);
                desc = edesc->hw_desc;
-               init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
-                                    HDR_REVERSE);
-
-               append_seq_in_ptr(desc, src_dma, to_hash, options);
 
                ret = map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len);
                if (ret)
-                       return ret;
+                       goto unmap_ctx;
 
 #ifdef DEBUG
                print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
@@ -1515,18 +1564,14 @@ static int ahash_update_first(struct ahash_request *req)
                               desc_bytes(desc), 1);
 #endif
 
-               ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst,
-                                     req);
-               if (!ret) {
-                       ret = -EINPROGRESS;
-                       state->update = ahash_update_ctx;
-                       state->finup = ahash_finup_ctx;
-                       state->final = ahash_final_ctx;
-               } else {
-                       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
-                                       DMA_TO_DEVICE);
-                       kfree(edesc);
-               }
+               ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
+               if (ret)
+                       goto unmap_ctx;
+
+               ret = -EINPROGRESS;
+               state->update = ahash_update_ctx;
+               state->finup = ahash_finup_ctx;
+               state->final = ahash_final_ctx;
        } else if (*next_buflen) {
                state->update = ahash_update_no_ctx;
                state->finup = ahash_finup_no_ctx;
@@ -1540,6 +1585,10 @@ static int ahash_update_first(struct ahash_request *req)
                       *next_buflen, 1);
 #endif
 
+       return ret;
+ unmap_ctx:
+       ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE);
+       kfree(edesc);
        return ret;
 }
 
@@ -1799,7 +1848,6 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
                                         HASH_MSG_LEN + SHA256_DIGEST_SIZE,
                                         HASH_MSG_LEN + 64,
                                         HASH_MSG_LEN + SHA512_DIGEST_SIZE };
-       int ret = 0;
 
        /*
         * Get a Job ring from Job Ring driver to ensure in-order
@@ -1819,10 +1867,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 
        crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
                                 sizeof(struct caam_hash_state));
-
-       ret = ahash_set_sh_desc(ahash);
-
-       return ret;
+       return ahash_set_sh_desc(ahash);
 }
 
 static void caam_hash_cra_exit(struct crypto_tfm *tfm)
index 0ec112ee5204302ef01bdc7f5dc30d5184e747b4..72ff196589851d652ff4d369bf0ce497aa0e2165 100644 (file)
@@ -14,6 +14,7 @@
 #include "jr.h"
 #include "desc_constr.h"
 #include "error.h"
+#include "ctrl.h"
 
 bool caam_little_end;
 EXPORT_SYMBOL(caam_little_end);
@@ -826,6 +827,8 @@ static int caam_probe(struct platform_device *pdev)
 
 caam_remove:
        caam_remove(pdev);
+       return ret;
+
 iounmap_ctrl:
        iounmap(ctrl);
 disable_caam_emi_slow:
index 26427c11ad8784ab1cad8adef7332599e113eb81..513b6646bb36ae670c85337da37dce52a0674cd7 100644 (file)
 #define SEC4_SG_OFFSET_MASK    0x00001fff
 
 struct sec4_sg_entry {
-#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \
-       defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX)
-       u32 rsvd1;
-       dma_addr_t ptr;
-#else
        u64 ptr;
-#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */
        u32 len;
        u32 bpid_offset;
 };
index d3869b95e7b1a0d1b89cfb9dd2d125b975e82f9b..a8cd8a78ec1fcdc12ec48886d455ee83ef575f6e 100644 (file)
@@ -324,6 +324,23 @@ static inline void append_##cmd##_imm_##type(u32 *desc, type immediate, \
 }
 APPEND_CMD_RAW_IMM(load, LOAD, u32);
 
+/*
+ * ee - endianness
+ * size - size of immediate type in bytes
+ */
+#define APPEND_CMD_RAW_IMM2(cmd, op, ee, size) \
+static inline void append_##cmd##_imm_##ee##size(u32 *desc, \
+                                                  u##size immediate, \
+                                                  u32 options) \
+{ \
+       __##ee##size data = cpu_to_##ee##size(immediate); \
+       PRINT_POS; \
+       append_cmd(desc, CMD_##op | IMMEDIATE | options | sizeof(data)); \
+       append_data(desc, &data, sizeof(data)); \
+}
+
+APPEND_CMD_RAW_IMM2(load, LOAD, be, 32);
+
 /*
  * Append math command. Only the last part of destination and source need to
  * be specified
index e2bcacc1a921675cf30f70a40816e1306a8c3ef9..5d4c05074a5c222b03011db9b0cc41b1078a7725 100644 (file)
@@ -41,7 +41,6 @@ struct caam_drv_private_jr {
        struct device           *dev;
        int ridx;
        struct caam_job_ring __iomem *rregs;    /* JobR's register space */
-       struct tasklet_struct irqtask;
        int irq;                        /* One per queue */
 
        /* Number of scatterlist crypt transforms active on the JobR */
index a81f551ac222a73153cdf166d5413cfda89fba9a..757c27f9953d52aa4b662f317b7c0d5ff508a685 100644 (file)
@@ -73,8 +73,6 @@ static int caam_jr_shutdown(struct device *dev)
 
        ret = caam_reset_hw_jr(dev);
 
-       tasklet_kill(&jrp->irqtask);
-
        /* Release interrupt */
        free_irq(jrp->irq, dev);
 
@@ -130,7 +128,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
 
        /*
         * Check the output ring for ready responses, kick
-        * tasklet if jobs done.
+        * the threaded irq if jobs done.
         */
        irqstate = rd_reg32(&jrp->rregs->jrintstatus);
        if (!irqstate)
@@ -152,18 +150,13 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
        /* Have valid interrupt at this point, just ACK and trigger */
        wr_reg32(&jrp->rregs->jrintstatus, irqstate);
 
-       preempt_disable();
-       tasklet_schedule(&jrp->irqtask);
-       preempt_enable();
-
-       return IRQ_HANDLED;
+       return IRQ_WAKE_THREAD;
 }
 
-/* Deferred service handler, run as interrupt-fired tasklet */
-static void caam_jr_dequeue(unsigned long devarg)
+static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
 {
        int hw_idx, sw_idx, i, head, tail;
-       struct device *dev = (struct device *)devarg;
+       struct device *dev = st_dev;
        struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
        void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
        u32 *userdesc, userstatus;
@@ -237,6 +230,8 @@ static void caam_jr_dequeue(unsigned long devarg)
 
        /* reenable / unmask IRQs */
        clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
+
+       return IRQ_HANDLED;
 }
 
 /**
@@ -394,11 +389,10 @@ static int caam_jr_init(struct device *dev)
 
        jrp = dev_get_drvdata(dev);
 
-       tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
-
        /* Connect job ring interrupt handler. */
-       error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED,
-                           dev_name(dev), dev);
+       error = request_threaded_irq(jrp->irq, caam_jr_interrupt,
+                                    caam_jr_threadirq, IRQF_SHARED,
+                                    dev_name(dev), dev);
        if (error) {
                dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
                        jrp->ridx, jrp->irq);
@@ -460,7 +454,6 @@ out_free_inpring:
 out_free_irq:
        free_irq(jrp->irq, dev);
 out_kill_deq:
-       tasklet_kill(&jrp->irqtask);
        return error;
 }
 
@@ -513,6 +506,7 @@ static int caam_jr_probe(struct platform_device *pdev)
        error = caam_jr_init(jrdev); /* now turn on hardware */
        if (error) {
                irq_dispose_mapping(jrpriv->irq);
+               iounmap(ctrl);
                return error;
        }
 
index b3c5016f645892ca6709051a27a4e7841eb10151..84d2f838a063cc87f6521e6b6b87cb2a800c8e7d 100644 (file)
@@ -196,6 +196,14 @@ static inline u64 rd_reg64(void __iomem *reg)
 #define caam_dma_to_cpu(value) caam32_to_cpu(value)
 #endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT  */
 
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
+#define cpu_to_caam_dma64(value) \
+               (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
+                (u64)cpu_to_caam32(upper_32_bits(value)))
+#else
+#define cpu_to_caam_dma64(value) cpu_to_caam64(value)
+#endif
+
 /*
  * jr_outentry
  * Represents each entry in a JobR output ring
index 19dc64fede0d1056485503cfac10a7a717ae9af2..41cd5a356d050adaaf9cdaf40a143833422c7ef6 100644 (file)
@@ -15,7 +15,7 @@ struct sec4_sg_entry;
 static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
                                      dma_addr_t dma, u32 len, u16 offset)
 {
-       sec4_sg_ptr->ptr = cpu_to_caam_dma(dma);
+       sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma);
        sec4_sg_ptr->len = cpu_to_caam32(len);
        sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK);
 #ifdef DEBUG
index ee4d2741b3abfc4a68155512bbd4e5bcc13c514b..346ceb8f17bd44104a5c23e30ca69ddbf52396ec 100644 (file)
@@ -2,6 +2,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
 ccp-objs := ccp-dev.o \
            ccp-ops.o \
            ccp-dev-v3.o \
+           ccp-dev-v5.o \
            ccp-platform.o \
            ccp-dmaengine.o
 ccp-$(CONFIG_PCI) += ccp-pci.o
index 8f36af62fe951032d22dfd9f7ea0150e43d6360b..84a652be42749f365dd81cff133917ebf1689c71 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -134,7 +135,22 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes,
        rctx->cmd.engine = CCP_ENGINE_SHA;
        rctx->cmd.u.sha.type = rctx->type;
        rctx->cmd.u.sha.ctx = &rctx->ctx_sg;
-       rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx);
+
+       switch (rctx->type) {
+       case CCP_SHA_TYPE_1:
+               rctx->cmd.u.sha.ctx_len = SHA1_DIGEST_SIZE;
+               break;
+       case CCP_SHA_TYPE_224:
+               rctx->cmd.u.sha.ctx_len = SHA224_DIGEST_SIZE;
+               break;
+       case CCP_SHA_TYPE_256:
+               rctx->cmd.u.sha.ctx_len = SHA256_DIGEST_SIZE;
+               break;
+       default:
+               /* Should never get here */
+               break;
+       }
+
        rctx->cmd.u.sha.src = sg;
        rctx->cmd.u.sha.src_len = rctx->hash_cnt;
        rctx->cmd.u.sha.opad = ctx->u.sha.key_len ?
index d7a71034796705c4c98f7f56b796fe80628bfc94..8d2dbacc6161fd071a5ef64f063f49ccba7e5c36 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
 
 #include "ccp-dev.h"
 
+static u32 ccp_alloc_ksb(struct ccp_cmd_queue *cmd_q, unsigned int count)
+{
+       int start;
+       struct ccp_device *ccp = cmd_q->ccp;
+
+       for (;;) {
+               mutex_lock(&ccp->sb_mutex);
+
+               start = (u32)bitmap_find_next_zero_area(ccp->sb,
+                                                       ccp->sb_count,
+                                                       ccp->sb_start,
+                                                       count, 0);
+               if (start <= ccp->sb_count) {
+                       bitmap_set(ccp->sb, start, count);
+
+                       mutex_unlock(&ccp->sb_mutex);
+                       break;
+               }
+
+               ccp->sb_avail = 0;
+
+               mutex_unlock(&ccp->sb_mutex);
+
+               /* Wait for KSB entries to become available */
+               if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
+                       return 0;
+       }
+
+       return KSB_START + start;
+}
+
+static void ccp_free_ksb(struct ccp_cmd_queue *cmd_q, unsigned int start,
+                        unsigned int count)
+{
+       struct ccp_device *ccp = cmd_q->ccp;
+
+       if (!start)
+               return;
+
+       mutex_lock(&ccp->sb_mutex);
+
+       bitmap_clear(ccp->sb, start - KSB_START, count);
+
+       ccp->sb_avail = 1;
+
+       mutex_unlock(&ccp->sb_mutex);
+
+       wake_up_interruptible_all(&ccp->sb_queue);
+}
+
+static unsigned int ccp_get_free_slots(struct ccp_cmd_queue *cmd_q)
+{
+       return CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+}
+
 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
 {
        struct ccp_cmd_queue *cmd_q = op->cmd_q;
@@ -68,6 +124,9 @@ static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
                        /* On error delete all related jobs from the queue */
                        cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
                              | op->jobid;
+                       if (cmd_q->cmd_error)
+                               ccp_log_error(cmd_q->ccp,
+                                             cmd_q->cmd_error);
 
                        iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
 
@@ -99,10 +158,10 @@ static int ccp_perform_aes(struct ccp_op *op)
                | (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
                | (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
                | (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
-               | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
+               | (op->sb_key << REQ1_KEY_KSB_SHIFT);
        cr[1] = op->src.u.dma.length - 1;
        cr[2] = ccp_addr_lo(&op->src.u.dma);
-       cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+       cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
                | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
                | ccp_addr_hi(&op->src.u.dma);
        cr[4] = ccp_addr_lo(&op->dst.u.dma);
@@ -129,10 +188,10 @@ static int ccp_perform_xts_aes(struct ccp_op *op)
        cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
                | (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
                | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
-               | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
+               | (op->sb_key << REQ1_KEY_KSB_SHIFT);
        cr[1] = op->src.u.dma.length - 1;
        cr[2] = ccp_addr_lo(&op->src.u.dma);
-       cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+       cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
                | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
                | ccp_addr_hi(&op->src.u.dma);
        cr[4] = ccp_addr_lo(&op->dst.u.dma);
@@ -158,7 +217,7 @@ static int ccp_perform_sha(struct ccp_op *op)
                | REQ1_INIT;
        cr[1] = op->src.u.dma.length - 1;
        cr[2] = ccp_addr_lo(&op->src.u.dma);
-       cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+       cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
                | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
                | ccp_addr_hi(&op->src.u.dma);
 
@@ -181,11 +240,11 @@ static int ccp_perform_rsa(struct ccp_op *op)
        /* Fill out the register contents for REQ1 through REQ6 */
        cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
                | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
-               | (op->ksb_key << REQ1_KEY_KSB_SHIFT)
+               | (op->sb_key << REQ1_KEY_KSB_SHIFT)
                | REQ1_EOM;
        cr[1] = op->u.rsa.input_len - 1;
        cr[2] = ccp_addr_lo(&op->src.u.dma);
-       cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+       cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
                | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
                | ccp_addr_hi(&op->src.u.dma);
        cr[4] = ccp_addr_lo(&op->dst.u.dma);
@@ -215,10 +274,10 @@ static int ccp_perform_passthru(struct ccp_op *op)
                        | ccp_addr_hi(&op->src.u.dma);
 
                if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
-                       cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
+                       cr[3] |= (op->sb_key << REQ4_KSB_SHIFT);
        } else {
-               cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
-               cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
+               cr[2] = op->src.u.sb * CCP_SB_BYTES;
+               cr[3] = (CCP_MEMTYPE_SB << REQ4_MEMTYPE_SHIFT);
        }
 
        if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
@@ -226,8 +285,8 @@ static int ccp_perform_passthru(struct ccp_op *op)
                cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
                        | ccp_addr_hi(&op->dst.u.dma);
        } else {
-               cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
-               cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
+               cr[4] = op->dst.u.sb * CCP_SB_BYTES;
+               cr[5] = (CCP_MEMTYPE_SB << REQ6_MEMTYPE_SHIFT);
        }
 
        if (op->eom)
@@ -256,35 +315,6 @@ static int ccp_perform_ecc(struct ccp_op *op)
        return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
 }
 
-static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
-{
-       struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
-       u32 trng_value;
-       int len = min_t(int, sizeof(trng_value), max);
-
-       /*
-        * Locking is provided by the caller so we can update device
-        * hwrng-related fields safely
-        */
-       trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
-       if (!trng_value) {
-               /* Zero is returned if not data is available or if a
-                * bad-entropy error is present. Assume an error if
-                * we exceed TRNG_RETRIES reads of zero.
-                */
-               if (ccp->hwrng_retries++ > TRNG_RETRIES)
-                       return -EIO;
-
-               return 0;
-       }
-
-       /* Reset the counter and save the rng value */
-       ccp->hwrng_retries = 0;
-       memcpy(data, &trng_value, len);
-
-       return len;
-}
-
 static int ccp_init(struct ccp_device *ccp)
 {
        struct device *dev = ccp->dev;
@@ -321,9 +351,9 @@ static int ccp_init(struct ccp_device *ccp)
                cmd_q->dma_pool = dma_pool;
 
                /* Reserve 2 KSB regions for the queue */
-               cmd_q->ksb_key = KSB_START + ccp->ksb_start++;
-               cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++;
-               ccp->ksb_count -= 2;
+               cmd_q->sb_key = KSB_START + ccp->sb_start++;
+               cmd_q->sb_ctx = KSB_START + ccp->sb_start++;
+               ccp->sb_count -= 2;
 
                /* Preset some register values and masks that are queue
                 * number dependent
@@ -335,7 +365,7 @@ static int ccp_init(struct ccp_device *ccp)
                cmd_q->int_ok = 1 << (i * 2);
                cmd_q->int_err = 1 << ((i * 2) + 1);
 
-               cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+               cmd_q->free_slots = ccp_get_free_slots(cmd_q);
 
                init_waitqueue_head(&cmd_q->int_queue);
 
@@ -375,9 +405,10 @@ static int ccp_init(struct ccp_device *ccp)
        }
 
        /* Initialize the queues used to wait for KSB space and suspend */
-       init_waitqueue_head(&ccp->ksb_queue);
+       init_waitqueue_head(&ccp->sb_queue);
        init_waitqueue_head(&ccp->suspend_queue);
 
+       dev_dbg(dev, "Starting threads...\n");
        /* Create a kthread for each queue */
        for (i = 0; i < ccp->cmd_q_count; i++) {
                struct task_struct *kthread;
@@ -397,29 +428,26 @@ static int ccp_init(struct ccp_device *ccp)
                wake_up_process(kthread);
        }
 
-       /* Register the RNG */
-       ccp->hwrng.name = ccp->rngname;
-       ccp->hwrng.read = ccp_trng_read;
-       ret = hwrng_register(&ccp->hwrng);
-       if (ret) {
-               dev_err(dev, "error registering hwrng (%d)\n", ret);
+       dev_dbg(dev, "Enabling interrupts...\n");
+       /* Enable interrupts */
+       iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
+
+       dev_dbg(dev, "Registering device...\n");
+       ccp_add_device(ccp);
+
+       ret = ccp_register_rng(ccp);
+       if (ret)
                goto e_kthread;
-       }
 
        /* Register the DMA engine support */
        ret = ccp_dmaengine_register(ccp);
        if (ret)
                goto e_hwrng;
 
-       ccp_add_device(ccp);
-
-       /* Enable interrupts */
-       iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
-
        return 0;
 
 e_hwrng:
-       hwrng_unregister(&ccp->hwrng);
+       ccp_unregister_rng(ccp);
 
 e_kthread:
        for (i = 0; i < ccp->cmd_q_count; i++)
@@ -441,19 +469,14 @@ static void ccp_destroy(struct ccp_device *ccp)
        struct ccp_cmd *cmd;
        unsigned int qim, i;
 
-       /* Remove this device from the list of available units first */
-       ccp_del_device(ccp);
-
        /* Unregister the DMA engine */
        ccp_dmaengine_unregister(ccp);
 
        /* Unregister the RNG */
-       hwrng_unregister(&ccp->hwrng);
+       ccp_unregister_rng(ccp);
 
-       /* Stop the queue kthreads */
-       for (i = 0; i < ccp->cmd_q_count; i++)
-               if (ccp->cmd_q[i].kthread)
-                       kthread_stop(ccp->cmd_q[i].kthread);
+       /* Remove this device from the list of available units */
+       ccp_del_device(ccp);
 
        /* Build queue interrupt mask (two interrupt masks per queue) */
        qim = 0;
@@ -472,6 +495,11 @@ static void ccp_destroy(struct ccp_device *ccp)
        }
        iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
 
+       /* Stop the queue kthreads */
+       for (i = 0; i < ccp->cmd_q_count; i++)
+               if (ccp->cmd_q[i].kthread)
+                       kthread_stop(ccp->cmd_q[i].kthread);
+
        ccp->free_irq(ccp);
 
        for (i = 0; i < ccp->cmd_q_count; i++)
@@ -527,18 +555,24 @@ static irqreturn_t ccp_irq_handler(int irq, void *data)
 }
 
 static const struct ccp_actions ccp3_actions = {
-       .perform_aes = ccp_perform_aes,
-       .perform_xts_aes = ccp_perform_xts_aes,
-       .perform_sha = ccp_perform_sha,
-       .perform_rsa = ccp_perform_rsa,
-       .perform_passthru = ccp_perform_passthru,
-       .perform_ecc = ccp_perform_ecc,
+       .aes = ccp_perform_aes,
+       .xts_aes = ccp_perform_xts_aes,
+       .sha = ccp_perform_sha,
+       .rsa = ccp_perform_rsa,
+       .passthru = ccp_perform_passthru,
+       .ecc = ccp_perform_ecc,
+       .sballoc = ccp_alloc_ksb,
+       .sbfree = ccp_free_ksb,
        .init = ccp_init,
        .destroy = ccp_destroy,
+       .get_free_slots = ccp_get_free_slots,
        .irqhandler = ccp_irq_handler,
 };
 
-struct ccp_vdata ccpv3 = {
+const struct ccp_vdata ccpv3 = {
        .version = CCP_VERSION(3, 0),
+       .setup = NULL,
        .perform = &ccp3_actions,
+       .bar = 2,
+       .offset = 0x20000,
 };
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
new file mode 100644 (file)
index 0000000..faf3cb3
--- /dev/null
@@ -0,0 +1,1017 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kthread.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/compiler.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
+{
+       struct ccp_device *ccp;
+       int start;
+
+       /* First look at the map for the queue */
+       if (cmd_q->lsb >= 0) {
+               start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap,
+                                                       LSB_SIZE,
+                                                       0, count, 0);
+               if (start < LSB_SIZE) {
+                       bitmap_set(cmd_q->lsbmap, start, count);
+                       return start + cmd_q->lsb * LSB_SIZE;
+               }
+       }
+
+       /* No joy; try to get an entry from the shared blocks */
+       ccp = cmd_q->ccp;
+       for (;;) {
+               mutex_lock(&ccp->sb_mutex);
+
+               start = (u32)bitmap_find_next_zero_area(ccp->lsbmap,
+                                                       MAX_LSB_CNT * LSB_SIZE,
+                                                       0,
+                                                       count, 0);
+               if (start <= MAX_LSB_CNT * LSB_SIZE) {
+                       bitmap_set(ccp->lsbmap, start, count);
+
+                       mutex_unlock(&ccp->sb_mutex);
+                       return start * LSB_ITEM_SIZE;
+               }
+
+               ccp->sb_avail = 0;
+
+               mutex_unlock(&ccp->sb_mutex);
+
+               /* Wait for KSB entries to become available */
+               if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
+                       return 0;
+       }
+}
+
+static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
+                        unsigned int count)
+{
+       int lsbno = start / LSB_SIZE;
+
+       if (!start)
+               return;
+
+       if (cmd_q->lsb == lsbno) {
+               /* An entry from the private LSB */
+               bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count);
+       } else {
+               /* From the shared LSBs */
+               struct ccp_device *ccp = cmd_q->ccp;
+
+               mutex_lock(&ccp->sb_mutex);
+               bitmap_clear(ccp->lsbmap, start, count);
+               ccp->sb_avail = 1;
+               mutex_unlock(&ccp->sb_mutex);
+               wake_up_interruptible_all(&ccp->sb_queue);
+       }
+}
+
+/* CCP version 5: Union to define the function field (cmd_reg1/dword0) */
+union ccp_function {
+       struct {
+               u16 size:7;
+               u16 encrypt:1;
+               u16 mode:5;
+               u16 type:2;
+       } aes;
+       struct {
+               u16 size:7;
+               u16 encrypt:1;
+               u16 rsvd:5;
+               u16 type:2;
+       } aes_xts;
+       struct {
+               u16 rsvd1:10;
+               u16 type:4;
+               u16 rsvd2:1;
+       } sha;
+       struct {
+               u16 mode:3;
+               u16 size:12;
+       } rsa;
+       struct {
+               u16 byteswap:2;
+               u16 bitwise:3;
+               u16 reflect:2;
+               u16 rsvd:8;
+       } pt;
+       struct  {
+               u16 rsvd:13;
+       } zlib;
+       struct {
+               u16 size:10;
+               u16 type:2;
+               u16 mode:3;
+       } ecc;
+       u16 raw;
+};
+
+#define        CCP_AES_SIZE(p)         ((p)->aes.size)
+#define        CCP_AES_ENCRYPT(p)      ((p)->aes.encrypt)
+#define        CCP_AES_MODE(p)         ((p)->aes.mode)
+#define        CCP_AES_TYPE(p)         ((p)->aes.type)
+#define        CCP_XTS_SIZE(p)         ((p)->aes_xts.size)
+#define        CCP_XTS_ENCRYPT(p)      ((p)->aes_xts.encrypt)
+#define        CCP_SHA_TYPE(p)         ((p)->sha.type)
+#define        CCP_RSA_SIZE(p)         ((p)->rsa.size)
+#define        CCP_PT_BYTESWAP(p)      ((p)->pt.byteswap)
+#define        CCP_PT_BITWISE(p)       ((p)->pt.bitwise)
+#define        CCP_ECC_MODE(p)         ((p)->ecc.mode)
+#define        CCP_ECC_AFFINE(p)       ((p)->ecc.one)
+
+/* Word 0 */
+#define CCP5_CMD_DW0(p)                ((p)->dw0)
+#define CCP5_CMD_SOC(p)                (CCP5_CMD_DW0(p).soc)
+#define CCP5_CMD_IOC(p)                (CCP5_CMD_DW0(p).ioc)
+#define CCP5_CMD_INIT(p)       (CCP5_CMD_DW0(p).init)
+#define CCP5_CMD_EOM(p)                (CCP5_CMD_DW0(p).eom)
+#define CCP5_CMD_FUNCTION(p)   (CCP5_CMD_DW0(p).function)
+#define CCP5_CMD_ENGINE(p)     (CCP5_CMD_DW0(p).engine)
+#define CCP5_CMD_PROT(p)       (CCP5_CMD_DW0(p).prot)
+
+/* Word 1 */
+#define CCP5_CMD_DW1(p)                ((p)->length)
+#define CCP5_CMD_LEN(p)                (CCP5_CMD_DW1(p))
+
+/* Word 2 */
+#define CCP5_CMD_DW2(p)                ((p)->src_lo)
+#define CCP5_CMD_SRC_LO(p)     (CCP5_CMD_DW2(p))
+
+/* Word 3 */
+#define CCP5_CMD_DW3(p)                ((p)->dw3)
+#define CCP5_CMD_SRC_MEM(p)    ((p)->dw3.src_mem)
+#define CCP5_CMD_SRC_HI(p)     ((p)->dw3.src_hi)
+#define CCP5_CMD_LSB_ID(p)     ((p)->dw3.lsb_cxt_id)
+#define CCP5_CMD_FIX_SRC(p)    ((p)->dw3.fixed)
+
+/* Words 4/5 */
+#define CCP5_CMD_DW4(p)                ((p)->dw4)
+#define CCP5_CMD_DST_LO(p)     (CCP5_CMD_DW4(p).dst_lo)
+#define CCP5_CMD_DW5(p)                ((p)->dw5.fields.dst_hi)
+#define CCP5_CMD_DST_HI(p)     (CCP5_CMD_DW5(p))
+#define CCP5_CMD_DST_MEM(p)    ((p)->dw5.fields.dst_mem)
+#define CCP5_CMD_FIX_DST(p)    ((p)->dw5.fields.fixed)
+#define CCP5_CMD_SHA_LO(p)     ((p)->dw4.sha_len_lo)
+#define CCP5_CMD_SHA_HI(p)     ((p)->dw5.sha_len_hi)
+
+/* Word 6/7 */
+#define CCP5_CMD_DW6(p)                ((p)->key_lo)
+#define CCP5_CMD_KEY_LO(p)     (CCP5_CMD_DW6(p))
+#define CCP5_CMD_DW7(p)                ((p)->dw7)
+#define CCP5_CMD_KEY_HI(p)     ((p)->dw7.key_hi)
+#define CCP5_CMD_KEY_MEM(p)    ((p)->dw7.key_mem)
+
+static inline u32 low_address(unsigned long addr)
+{
+       return (u64)addr & 0x0ffffffff;
+}
+
+static inline u32 high_address(unsigned long addr)
+{
+       return ((u64)addr >> 32) & 0x00000ffff;
+}
+
+static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
+{
+       unsigned int head_idx, n;
+       u32 head_lo, queue_start;
+
+       queue_start = low_address(cmd_q->qdma_tail);
+       head_lo = ioread32(cmd_q->reg_head_lo);
+       head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc);
+
+       n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1;
+
+       return n % COMMANDS_PER_QUEUE; /* Always one unused spot */
+}
+
+static int ccp5_do_cmd(struct ccp5_desc *desc,
+                      struct ccp_cmd_queue *cmd_q)
+{
+       u32 *mP;
+       __le32 *dP;
+       u32 tail;
+       int     i;
+       int ret = 0;
+
+       if (CCP5_CMD_SOC(desc)) {
+               CCP5_CMD_IOC(desc) = 1;
+               CCP5_CMD_SOC(desc) = 0;
+       }
+       mutex_lock(&cmd_q->q_mutex);
+
+       mP = (u32 *) &cmd_q->qbase[cmd_q->qidx];
+       dP = (__le32 *) desc;
+       for (i = 0; i < 8; i++)
+               mP[i] = cpu_to_le32(dP[i]); /* handle endianness */
+
+       cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE;
+
+       /* The data used by this command must be flushed to memory */
+       wmb();
+
+       /* Write the new tail address back to the queue register */
+       tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
+       iowrite32(tail, cmd_q->reg_tail_lo);
+
+       /* Turn the queue back on using our cached control register */
+       iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control);
+       mutex_unlock(&cmd_q->q_mutex);
+
+       if (CCP5_CMD_IOC(desc)) {
+               /* Wait for the job to complete */
+               ret = wait_event_interruptible(cmd_q->int_queue,
+                                              cmd_q->int_rcvd);
+               if (ret || cmd_q->cmd_error) {
+                       if (cmd_q->cmd_error)
+                               ccp_log_error(cmd_q->ccp,
+                                             cmd_q->cmd_error);
+                       /* A version 5 device doesn't use Job IDs... */
+                       if (!ret)
+                               ret = -EIO;
+               }
+               cmd_q->int_rcvd = 0;
+       }
+
+       return 0;
+}
+
+static int ccp5_perform_aes(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+       u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
+
+       /* Zero out all the fields of the command desc */
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES;
+
+       CCP5_CMD_SOC(&desc) = op->soc;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = op->init;
+       CCP5_CMD_EOM(&desc) = op->eom;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       CCP_AES_ENCRYPT(&function) = op->u.aes.action;
+       CCP_AES_MODE(&function) = op->u.aes.mode;
+       CCP_AES_TYPE(&function) = op->u.aes.type;
+       if (op->u.aes.mode == CCP_AES_MODE_CFB)
+               CCP_AES_SIZE(&function) = 0x7f;
+
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+       CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+       CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+       CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+       CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+       CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
+       CCP5_CMD_KEY_HI(&desc) = 0;
+       CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+       CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_xts_aes(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+       u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
+
+       /* Zero out all the fields of the command desc */
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128;
+
+       CCP5_CMD_SOC(&desc) = op->soc;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = op->init;
+       CCP5_CMD_EOM(&desc) = op->eom;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
+       CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+       CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+       CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+       CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+       CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+       CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
+       CCP5_CMD_KEY_HI(&desc) =  0;
+       CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+       CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_sha(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+
+       /* Zero out all the fields of the command desc */
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA;
+
+       CCP5_CMD_SOC(&desc) = op->soc;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = 1;
+       CCP5_CMD_EOM(&desc) = op->eom;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       CCP_SHA_TYPE(&function) = op->u.sha.type;
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+       CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+       CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+       CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+       if (op->eom) {
+               CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits);
+               CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits);
+       } else {
+               CCP5_CMD_SHA_LO(&desc) = 0;
+               CCP5_CMD_SHA_HI(&desc) = 0;
+       }
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_rsa(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+
+       /* Zero out all the fields of the command desc */
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA;
+
+       CCP5_CMD_SOC(&desc) = op->soc;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = 0;
+       CCP5_CMD_EOM(&desc) = 1;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       CCP_RSA_SIZE(&function) = op->u.rsa.mod_size;
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
+
+       /* Source is from external memory */
+       CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+       CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+       CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       /* Destination is in external memory */
+       CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+       CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+       CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       /* Key (Exponent) is in external memory */
+       CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
+       CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
+       CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_passthru(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+       struct ccp_dma_info *saddr = &op->src.u.dma;
+       struct ccp_dma_info *daddr = &op->dst.u.dma;
+
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
+
+       CCP5_CMD_SOC(&desc) = 0;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = 0;
+       CCP5_CMD_EOM(&desc) = op->eom;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap;
+       CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod;
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       /* Length of source data is always 256 bytes */
+       if (op->src.type == CCP_MEMTYPE_SYSTEM)
+               CCP5_CMD_LEN(&desc) = saddr->length;
+       else
+               CCP5_CMD_LEN(&desc) = daddr->length;
+
+       if (op->src.type == CCP_MEMTYPE_SYSTEM) {
+               CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+               CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+               CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+               if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
+                       CCP5_CMD_LSB_ID(&desc) = op->sb_key;
+       } else {
+               u32 key_addr = op->src.u.sb * CCP_SB_BYTES;
+
+               CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr);
+               CCP5_CMD_SRC_HI(&desc) = 0;
+               CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB;
+       }
+
+       if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
+               CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+               CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+               CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+       } else {
+               u32 key_addr = op->dst.u.sb * CCP_SB_BYTES;
+
+               CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr);
+               CCP5_CMD_DST_HI(&desc) = 0;
+               CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB;
+       }
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_ecc(struct ccp_op *op)
+{
+       struct ccp5_desc desc;
+       union ccp_function function;
+
+       /* Zero out all the fields of the command desc */
+       memset(&desc, 0, Q_DESC_SIZE);
+
+       CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC;
+
+       CCP5_CMD_SOC(&desc) = 0;
+       CCP5_CMD_IOC(&desc) = 1;
+       CCP5_CMD_INIT(&desc) = 0;
+       CCP5_CMD_EOM(&desc) = 1;
+       CCP5_CMD_PROT(&desc) = 0;
+
+       function.raw = 0;
+       function.ecc.mode = op->u.ecc.function;
+       CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+       CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+       CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+       CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+       CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+       CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+       CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+       return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
+{
+       int q_mask = 1 << cmd_q->id;
+       int queues = 0;
+       int j;
+
+       /* Build a bit mask to know which LSBs this queue has access to.
+        * Don't bother with segment 0 as it has special privileges.
+        */
+       for (j = 1; j < MAX_LSB_CNT; j++) {
+               if (status & q_mask)
+                       bitmap_set(cmd_q->lsbmask, j, 1);
+               status >>= LSB_REGION_WIDTH;
+       }
+       queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
+       dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
+                cmd_q->id, queues);
+
+       return queues ? 0 : -EINVAL;
+}
+
+
+static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
+                                       int lsb_cnt, int n_lsbs,
+                                       unsigned long *lsb_pub)
+{
+       DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
+       int bitno;
+       int qlsb_wgt;
+       int i;
+
+       /* For each queue:
+        * If the count of potential LSBs available to a queue matches the
+        * ordinal given to us in lsb_cnt:
+        * Copy the mask of possible LSBs for this queue into "qlsb";
+        * For each bit in qlsb, see if the corresponding bit in the
+        * aggregation mask is set; if so, we have a match.
+        *     If we have a match, clear the bit in the aggregation to
+        *     mark it as no longer available.
+        *     If there is no match, clear the bit in qlsb and keep looking.
+        */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+               qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
+
+               if (qlsb_wgt == lsb_cnt) {
+                       bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT);
+
+                       bitno = find_first_bit(qlsb, MAX_LSB_CNT);
+                       while (bitno < MAX_LSB_CNT) {
+                               if (test_bit(bitno, lsb_pub)) {
+                                       /* We found an available LSB
+                                        * that this queue can access
+                                        */
+                                       cmd_q->lsb = bitno;
+                                       bitmap_clear(lsb_pub, bitno, 1);
+                                       dev_info(ccp->dev,
+                                                "Queue %d gets LSB %d\n",
+                                                i, bitno);
+                                       break;
+                               }
+                               bitmap_clear(qlsb, bitno, 1);
+                               bitno = find_first_bit(qlsb, MAX_LSB_CNT);
+                       }
+                       if (bitno >= MAX_LSB_CNT)
+                               return -EINVAL;
+                       n_lsbs--;
+               }
+       }
+       return n_lsbs;
+}
+
+/* For each queue, from the most- to least-constrained:
+ * find an LSB that can be assigned to the queue. If there are N queues that
+ * can only use M LSBs, where N > M, fail; otherwise, every queue will get a
+ * dedicated LSB. Remaining LSB regions become a shared resource.
+ * If we have fewer LSBs than queues, all LSB regions become shared resources.
+ */
+static int ccp_assign_lsbs(struct ccp_device *ccp)
+{
+       DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT);
+       DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
+       int n_lsbs = 0;
+       int bitno;
+       int i, lsb_cnt;
+       int rc = 0;
+
+       bitmap_zero(lsb_pub, MAX_LSB_CNT);
+
+       /* Create an aggregate bitmap to get a total count of available LSBs */
+       for (i = 0; i < ccp->cmd_q_count; i++)
+               bitmap_or(lsb_pub,
+                         lsb_pub, ccp->cmd_q[i].lsbmask,
+                         MAX_LSB_CNT);
+
+       n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT);
+
+       if (n_lsbs >= ccp->cmd_q_count) {
+               /* We have enough LSBS to give every queue a private LSB.
+                * Brute force search to start with the queues that are more
+                * constrained in LSB choice. When an LSB is privately
+                * assigned, it is removed from the public mask.
+                * This is an ugly N squared algorithm with some optimization.
+                */
+               for (lsb_cnt = 1;
+                    n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
+                    lsb_cnt++) {
+                       rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
+                                                         lsb_pub);
+                       if (rc < 0)
+                               return -EINVAL;
+                       n_lsbs = rc;
+               }
+       }
+
+       rc = 0;
+       /* What's left of the LSBs, according to the public mask, now become
+        * shared. Any zero bits in the lsb_pub mask represent an LSB region
+        * that can't be used as a shared resource, so mark the LSB slots for
+        * them as "in use".
+        */
+       bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT);
+
+       bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
+       while (bitno < MAX_LSB_CNT) {
+               bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
+               bitmap_set(qlsb, bitno, 1);
+               bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
+       }
+
+       return rc;
+}
+
+static int ccp5_init(struct ccp_device *ccp)
+{
+       struct device *dev = ccp->dev;
+       struct ccp_cmd_queue *cmd_q;
+       struct dma_pool *dma_pool;
+       char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
+       unsigned int qmr, qim, i;
+       u64 status;
+       u32 status_lo, status_hi;
+       int ret;
+
+       /* Find available queues */
+       qim = 0;
+       qmr = ioread32(ccp->io_regs + Q_MASK_REG);
+       for (i = 0; i < MAX_HW_QUEUES; i++) {
+
+               if (!(qmr & (1 << i)))
+                       continue;
+
+               /* Allocate a dma pool for this queue */
+               snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
+                        ccp->name, i);
+               dma_pool = dma_pool_create(dma_pool_name, dev,
+                                          CCP_DMAPOOL_MAX_SIZE,
+                                          CCP_DMAPOOL_ALIGN, 0);
+               if (!dma_pool) {
+                       dev_err(dev, "unable to allocate dma pool\n");
+                       ret = -ENOMEM;
+               }
+
+               cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
+               ccp->cmd_q_count++;
+
+               cmd_q->ccp = ccp;
+               cmd_q->id = i;
+               cmd_q->dma_pool = dma_pool;
+               mutex_init(&cmd_q->q_mutex);
+
+               /* Page alignment satisfies our needs for N <= 128 */
+               BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
+               cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
+               cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize,
+                                                  &cmd_q->qbase_dma,
+                                                  GFP_KERNEL);
+               if (!cmd_q->qbase) {
+                       dev_err(dev, "unable to allocate command queue\n");
+                       ret = -ENOMEM;
+                       goto e_pool;
+               }
+
+               cmd_q->qidx = 0;
+               /* Preset some register values and masks that are queue
+                * number dependent
+                */
+               cmd_q->reg_control = ccp->io_regs +
+                                    CMD5_Q_STATUS_INCR * (i + 1);
+               cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE;
+               cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE;
+               cmd_q->reg_int_enable = cmd_q->reg_control +
+                                       CMD5_Q_INT_ENABLE_BASE;
+               cmd_q->reg_interrupt_status = cmd_q->reg_control +
+                                             CMD5_Q_INTERRUPT_STATUS_BASE;
+               cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE;
+               cmd_q->reg_int_status = cmd_q->reg_control +
+                                       CMD5_Q_INT_STATUS_BASE;
+               cmd_q->reg_dma_status = cmd_q->reg_control +
+                                       CMD5_Q_DMA_STATUS_BASE;
+               cmd_q->reg_dma_read_status = cmd_q->reg_control +
+                                            CMD5_Q_DMA_READ_STATUS_BASE;
+               cmd_q->reg_dma_write_status = cmd_q->reg_control +
+                                             CMD5_Q_DMA_WRITE_STATUS_BASE;
+
+               init_waitqueue_head(&cmd_q->int_queue);
+
+               dev_dbg(dev, "queue #%u available\n", i);
+       }
+       if (ccp->cmd_q_count == 0) {
+               dev_notice(dev, "no command queues available\n");
+               ret = -EIO;
+               goto e_pool;
+       }
+       dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
+
+       /* Turn off the queues and disable interrupts until ready */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               cmd_q = &ccp->cmd_q[i];
+
+               cmd_q->qcontrol = 0; /* Start with nothing */
+               iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+               /* Disable the interrupts */
+               iowrite32(0x00, cmd_q->reg_int_enable);
+               ioread32(cmd_q->reg_int_status);
+               ioread32(cmd_q->reg_status);
+
+               /* Clear the interrupts */
+               iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
+       }
+
+       dev_dbg(dev, "Requesting an IRQ...\n");
+       /* Request an irq */
+       ret = ccp->get_irq(ccp);
+       if (ret) {
+               dev_err(dev, "unable to allocate an IRQ\n");
+               goto e_pool;
+       }
+
+       /* Initialize the queue used to suspend */
+       init_waitqueue_head(&ccp->suspend_queue);
+
+       dev_dbg(dev, "Loading LSB map...\n");
+       /* Copy the private LSB mask to the public registers */
+       status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
+       status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
+       iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET);
+       iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET);
+       status = ((u64)status_hi<<30) | (u64)status_lo;
+
+       dev_dbg(dev, "Configuring virtual queues...\n");
+       /* Configure size of each virtual queue accessible to host */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               u32 dma_addr_lo;
+               u32 dma_addr_hi;
+
+               cmd_q = &ccp->cmd_q[i];
+
+               cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT);
+               cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT;
+
+               cmd_q->qdma_tail = cmd_q->qbase_dma;
+               dma_addr_lo = low_address(cmd_q->qdma_tail);
+               iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo);
+               iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo);
+
+               dma_addr_hi = high_address(cmd_q->qdma_tail);
+               cmd_q->qcontrol |= (dma_addr_hi << 16);
+               iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+               /* Find the LSB regions accessible to the queue */
+               ccp_find_lsb_regions(cmd_q, status);
+               cmd_q->lsb = -1; /* Unassigned value */
+       }
+
+       dev_dbg(dev, "Assigning LSBs...\n");
+       ret = ccp_assign_lsbs(ccp);
+       if (ret) {
+               dev_err(dev, "Unable to assign LSBs (%d)\n", ret);
+               goto e_irq;
+       }
+
+       /* Optimization: pre-allocate LSB slots for each queue */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
+               ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
+       }
+
+       dev_dbg(dev, "Starting threads...\n");
+       /* Create a kthread for each queue */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               struct task_struct *kthread;
+
+               cmd_q = &ccp->cmd_q[i];
+
+               kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
+                                        "%s-q%u", ccp->name, cmd_q->id);
+               if (IS_ERR(kthread)) {
+                       dev_err(dev, "error creating queue thread (%ld)\n",
+                               PTR_ERR(kthread));
+                       ret = PTR_ERR(kthread);
+                       goto e_kthread;
+               }
+
+               cmd_q->kthread = kthread;
+               wake_up_process(kthread);
+       }
+
+       dev_dbg(dev, "Enabling interrupts...\n");
+       /* Enable interrupts */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               cmd_q = &ccp->cmd_q[i];
+               iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable);
+       }
+
+       dev_dbg(dev, "Registering device...\n");
+       /* Put this on the unit list to make it available */
+       ccp_add_device(ccp);
+
+       ret = ccp_register_rng(ccp);
+       if (ret)
+               goto e_kthread;
+
+       /* Register the DMA engine support */
+       ret = ccp_dmaengine_register(ccp);
+       if (ret)
+               goto e_hwrng;
+
+       return 0;
+
+e_hwrng:
+       ccp_unregister_rng(ccp);
+
+e_kthread:
+       for (i = 0; i < ccp->cmd_q_count; i++)
+               if (ccp->cmd_q[i].kthread)
+                       kthread_stop(ccp->cmd_q[i].kthread);
+
+e_irq:
+       ccp->free_irq(ccp);
+
+e_pool:
+       for (i = 0; i < ccp->cmd_q_count; i++)
+               dma_pool_destroy(ccp->cmd_q[i].dma_pool);
+
+       return ret;
+}
+
+static void ccp5_destroy(struct ccp_device *ccp)
+{
+       struct device *dev = ccp->dev;
+       struct ccp_cmd_queue *cmd_q;
+       struct ccp_cmd *cmd;
+       unsigned int i;
+
+       /* Unregister the DMA engine */
+       ccp_dmaengine_unregister(ccp);
+
+       /* Unregister the RNG */
+       ccp_unregister_rng(ccp);
+
+       /* Remove this device from the list of available units first */
+       ccp_del_device(ccp);
+
+       /* Disable and clear interrupts */
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               cmd_q = &ccp->cmd_q[i];
+
+               /* Turn off the run bit */
+               iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);
+
+               /* Disable the interrupts */
+               iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
+
+               /* Clear the interrupt status */
+               iowrite32(0x00, cmd_q->reg_int_enable);
+               ioread32(cmd_q->reg_int_status);
+               ioread32(cmd_q->reg_status);
+       }
+
+       /* Stop the queue kthreads */
+       for (i = 0; i < ccp->cmd_q_count; i++)
+               if (ccp->cmd_q[i].kthread)
+                       kthread_stop(ccp->cmd_q[i].kthread);
+
+       ccp->free_irq(ccp);
+
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               cmd_q = &ccp->cmd_q[i];
+               dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
+                                 cmd_q->qbase_dma);
+       }
+
+       /* Flush the cmd and backlog queue */
+       while (!list_empty(&ccp->cmd)) {
+               /* Invoke the callback directly with an error code */
+               cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
+               list_del(&cmd->entry);
+               cmd->callback(cmd->data, -ENODEV);
+       }
+       while (!list_empty(&ccp->backlog)) {
+               /* Invoke the callback directly with an error code */
+               cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
+               list_del(&cmd->entry);
+               cmd->callback(cmd->data, -ENODEV);
+       }
+}
+
+static irqreturn_t ccp5_irq_handler(int irq, void *data)
+{
+       struct device *dev = data;
+       struct ccp_device *ccp = dev_get_drvdata(dev);
+       u32 status;
+       unsigned int i;
+
+       for (i = 0; i < ccp->cmd_q_count; i++) {
+               struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+               status = ioread32(cmd_q->reg_interrupt_status);
+
+               if (status) {
+                       cmd_q->int_status = status;
+                       cmd_q->q_status = ioread32(cmd_q->reg_status);
+                       cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
+
+                       /* On error, only save the first error value */
+                       if ((status & INT_ERROR) && !cmd_q->cmd_error)
+                               cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+                       cmd_q->int_rcvd = 1;
+
+                       /* Acknowledge the interrupt and wake the kthread */
+                       iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
+                       wake_up_interruptible(&cmd_q->int_queue);
+               }
+       }
+
+       return IRQ_HANDLED;
+}
+
+static void ccp5_config(struct ccp_device *ccp)
+{
+       /* Public side */
+       iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
+}
+
+static void ccp5other_config(struct ccp_device *ccp)
+{
+       int i;
+       u32 rnd;
+
+       /* We own all of the queues on the NTB CCP */
+
+       iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET);
+       iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET);
+       for (i = 0; i < 12; i++) {
+               rnd = ioread32(ccp->io_regs + TRNG_OUT_REG);
+               iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET);
+       }
+
+       iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET);
+       iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET);
+       iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET);
+
+       iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
+       iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
+
+       iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET);
+
+       ccp5_config(ccp);
+}
+
+/* Version 5 adds some function, but is essentially the same as v5 */
+static const struct ccp_actions ccp5_actions = {
+       .aes = ccp5_perform_aes,
+       .xts_aes = ccp5_perform_xts_aes,
+       .sha = ccp5_perform_sha,
+       .rsa = ccp5_perform_rsa,
+       .passthru = ccp5_perform_passthru,
+       .ecc = ccp5_perform_ecc,
+       .sballoc = ccp_lsb_alloc,
+       .sbfree = ccp_lsb_free,
+       .init = ccp5_init,
+       .destroy = ccp5_destroy,
+       .get_free_slots = ccp5_get_free_slots,
+       .irqhandler = ccp5_irq_handler,
+};
+
+const struct ccp_vdata ccpv5a = {
+       .version = CCP_VERSION(5, 0),
+       .setup = ccp5_config,
+       .perform = &ccp5_actions,
+       .bar = 2,
+       .offset = 0x0,
+};
+
+const struct ccp_vdata ccpv5b = {
+       .version = CCP_VERSION(5, 0),
+       .setup = ccp5other_config,
+       .perform = &ccp5_actions,
+       .bar = 2,
+       .offset = 0x0,
+};
index 87b9f2bfa623b1c6a2752519fffcdb5957646874..cafa633aae10805f176d5654f597839bc3b66520 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -39,6 +40,59 @@ struct ccp_tasklet_data {
        struct ccp_cmd *cmd;
 };
 
+/* Human-readable error strings */
+char *ccp_error_codes[] = {
+       "",
+       "ERR 01: ILLEGAL_ENGINE",
+       "ERR 02: ILLEGAL_KEY_ID",
+       "ERR 03: ILLEGAL_FUNCTION_TYPE",
+       "ERR 04: ILLEGAL_FUNCTION_MODE",
+       "ERR 05: ILLEGAL_FUNCTION_ENCRYPT",
+       "ERR 06: ILLEGAL_FUNCTION_SIZE",
+       "ERR 07: Zlib_MISSING_INIT_EOM",
+       "ERR 08: ILLEGAL_FUNCTION_RSVD",
+       "ERR 09: ILLEGAL_BUFFER_LENGTH",
+       "ERR 10: VLSB_FAULT",
+       "ERR 11: ILLEGAL_MEM_ADDR",
+       "ERR 12: ILLEGAL_MEM_SEL",
+       "ERR 13: ILLEGAL_CONTEXT_ID",
+       "ERR 14: ILLEGAL_KEY_ADDR",
+       "ERR 15: 0xF Reserved",
+       "ERR 16: Zlib_ILLEGAL_MULTI_QUEUE",
+       "ERR 17: Zlib_ILLEGAL_JOBID_CHANGE",
+       "ERR 18: CMD_TIMEOUT",
+       "ERR 19: IDMA0_AXI_SLVERR",
+       "ERR 20: IDMA0_AXI_DECERR",
+       "ERR 21: 0x15 Reserved",
+       "ERR 22: IDMA1_AXI_SLAVE_FAULT",
+       "ERR 23: IDMA1_AIXI_DECERR",
+       "ERR 24: 0x18 Reserved",
+       "ERR 25: ZLIBVHB_AXI_SLVERR",
+       "ERR 26: ZLIBVHB_AXI_DECERR",
+       "ERR 27: 0x1B Reserved",
+       "ERR 27: ZLIB_UNEXPECTED_EOM",
+       "ERR 27: ZLIB_EXTRA_DATA",
+       "ERR 30: ZLIB_BTYPE",
+       "ERR 31: ZLIB_UNDEFINED_SYMBOL",
+       "ERR 32: ZLIB_UNDEFINED_DISTANCE_S",
+       "ERR 33: ZLIB_CODE_LENGTH_SYMBOL",
+       "ERR 34: ZLIB _VHB_ILLEGAL_FETCH",
+       "ERR 35: ZLIB_UNCOMPRESSED_LEN",
+       "ERR 36: ZLIB_LIMIT_REACHED",
+       "ERR 37: ZLIB_CHECKSUM_MISMATCH0",
+       "ERR 38: ODMA0_AXI_SLVERR",
+       "ERR 39: ODMA0_AXI_DECERR",
+       "ERR 40: 0x28 Reserved",
+       "ERR 41: ODMA1_AXI_SLVERR",
+       "ERR 42: ODMA1_AXI_DECERR",
+       "ERR 43: LSB_PARITY_ERR",
+};
+
+void ccp_log_error(struct ccp_device *d, int e)
+{
+       dev_err(d->dev, "CCP error: %s (0x%x)\n", ccp_error_codes[e], e);
+}
+
 /* List of CCPs, CCP count, read-write access lock, and access functions
  *
  * Lock structure: get ccp_unit_lock for reading whenever we need to
@@ -58,7 +112,7 @@ static struct ccp_device *ccp_rr;
 
 /* Ever-increasing value to produce unique unit numbers */
 static atomic_t ccp_unit_ordinal;
-unsigned int ccp_increment_unit_ordinal(void)
+static unsigned int ccp_increment_unit_ordinal(void)
 {
        return atomic_inc_return(&ccp_unit_ordinal);
 }
@@ -118,6 +172,29 @@ void ccp_del_device(struct ccp_device *ccp)
        write_unlock_irqrestore(&ccp_unit_lock, flags);
 }
 
+
+
+int ccp_register_rng(struct ccp_device *ccp)
+{
+       int ret = 0;
+
+       dev_dbg(ccp->dev, "Registering RNG...\n");
+       /* Register an RNG */
+       ccp->hwrng.name = ccp->rngname;
+       ccp->hwrng.read = ccp_trng_read;
+       ret = hwrng_register(&ccp->hwrng);
+       if (ret)
+               dev_err(ccp->dev, "error registering hwrng (%d)\n", ret);
+
+       return ret;
+}
+
+void ccp_unregister_rng(struct ccp_device *ccp)
+{
+       if (ccp->hwrng.name)
+               hwrng_unregister(&ccp->hwrng);
+}
+
 static struct ccp_device *ccp_get_device(void)
 {
        unsigned long flags;
@@ -397,9 +474,9 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
 
        spin_lock_init(&ccp->cmd_lock);
        mutex_init(&ccp->req_mutex);
-       mutex_init(&ccp->ksb_mutex);
-       ccp->ksb_count = KSB_COUNT;
-       ccp->ksb_start = 0;
+       mutex_init(&ccp->sb_mutex);
+       ccp->sb_count = KSB_COUNT;
+       ccp->sb_start = 0;
 
        ccp->ord = ccp_increment_unit_ordinal();
        snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord);
@@ -408,6 +485,34 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
        return ccp;
 }
 
+int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+       struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
+       u32 trng_value;
+       int len = min_t(int, sizeof(trng_value), max);
+
+       /* Locking is provided by the caller so we can update device
+        * hwrng-related fields safely
+        */
+       trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
+       if (!trng_value) {
+               /* Zero is returned if not data is available or if a
+                * bad-entropy error is present. Assume an error if
+                * we exceed TRNG_RETRIES reads of zero.
+                */
+               if (ccp->hwrng_retries++ > TRNG_RETRIES)
+                       return -EIO;
+
+               return 0;
+       }
+
+       /* Reset the counter and save the rng value */
+       ccp->hwrng_retries = 0;
+       memcpy(data, &trng_value, len);
+
+       return len;
+}
+
 #ifdef CONFIG_PM
 bool ccp_queues_suspended(struct ccp_device *ccp)
 {
index bd41ffceff82fecacfd878bfddd0056067cf7632..da5f4a67808344b1123d023123d58b1381ca1e11 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
 #define CMD_Q_ERROR(__qs)              ((__qs) & 0x0000003f)
 #define CMD_Q_DEPTH(__qs)              (((__qs) >> 12) & 0x0000000f)
 
-/****** REQ0 Related Values ******/
+/* ------------------------ CCP Version 5 Specifics ------------------------ */
+#define CMD5_QUEUE_MASK_OFFSET         0x00
+#define        CMD5_QUEUE_PRIO_OFFSET          0x04
+#define CMD5_REQID_CONFIG_OFFSET       0x08
+#define        CMD5_CMD_TIMEOUT_OFFSET         0x10
+#define LSB_PUBLIC_MASK_LO_OFFSET      0x18
+#define LSB_PUBLIC_MASK_HI_OFFSET      0x1C
+#define LSB_PRIVATE_MASK_LO_OFFSET     0x20
+#define LSB_PRIVATE_MASK_HI_OFFSET     0x24
+
+#define CMD5_Q_CONTROL_BASE            0x0000
+#define CMD5_Q_TAIL_LO_BASE            0x0004
+#define CMD5_Q_HEAD_LO_BASE            0x0008
+#define CMD5_Q_INT_ENABLE_BASE         0x000C
+#define CMD5_Q_INTERRUPT_STATUS_BASE   0x0010
+
+#define CMD5_Q_STATUS_BASE             0x0100
+#define CMD5_Q_INT_STATUS_BASE         0x0104
+#define CMD5_Q_DMA_STATUS_BASE         0x0108
+#define CMD5_Q_DMA_READ_STATUS_BASE    0x010C
+#define CMD5_Q_DMA_WRITE_STATUS_BASE   0x0110
+#define CMD5_Q_ABORT_BASE              0x0114
+#define CMD5_Q_AX_CACHE_BASE           0x0118
+
+#define        CMD5_CONFIG_0_OFFSET            0x6000
+#define        CMD5_TRNG_CTL_OFFSET            0x6008
+#define        CMD5_AES_MASK_OFFSET            0x6010
+#define        CMD5_CLK_GATE_CTL_OFFSET        0x603C
+
+/* Address offset between two virtual queue registers */
+#define CMD5_Q_STATUS_INCR             0x1000
+
+/* Bit masks */
+#define CMD5_Q_RUN                     0x1
+#define CMD5_Q_HALT                    0x2
+#define CMD5_Q_MEM_LOCATION            0x4
+#define CMD5_Q_SIZE                    0x1F
+#define CMD5_Q_SHIFT                   3
+#define COMMANDS_PER_QUEUE             16
+#define QUEUE_SIZE_VAL                 ((ffs(COMMANDS_PER_QUEUE) - 2) & \
+                                         CMD5_Q_SIZE)
+#define Q_PTR_MASK                     (2 << (QUEUE_SIZE_VAL + 5) - 1)
+#define Q_DESC_SIZE                    sizeof(struct ccp5_desc)
+#define Q_SIZE(n)                      (COMMANDS_PER_QUEUE*(n))
+
+#define INT_COMPLETION                 0x1
+#define INT_ERROR                      0x2
+#define INT_QUEUE_STOPPED              0x4
+#define ALL_INTERRUPTS                 (INT_COMPLETION| \
+                                        INT_ERROR| \
+                                        INT_QUEUE_STOPPED)
+
+#define LSB_REGION_WIDTH               5
+#define MAX_LSB_CNT                    8
+
+#define LSB_SIZE                       16
+#define LSB_ITEM_SIZE                  32
+#define PLSB_MAP_SIZE                  (LSB_SIZE)
+#define SLSB_MAP_SIZE                  (MAX_LSB_CNT * LSB_SIZE)
+
+#define LSB_ENTRY_NUMBER(LSB_ADDR)     (LSB_ADDR / LSB_ITEM_SIZE)
+
+/* ------------------------ CCP Version 3 Specifics ------------------------ */
 #define REQ0_WAIT_FOR_WRITE            0x00000004
 #define REQ0_INT_ON_COMPLETE           0x00000002
 #define REQ0_STOP_ON_COMPLETE          0x00000001
 #define KSB_START                      77
 #define KSB_END                                127
 #define KSB_COUNT                      (KSB_END - KSB_START + 1)
-#define CCP_KSB_BITS                   256
-#define CCP_KSB_BYTES                  32
+#define CCP_SB_BITS                    256
 
 #define CCP_JOBID_MASK                 0x0000003f
 
+/* ------------------------ General CCP Defines ------------------------ */
+
 #define CCP_DMAPOOL_MAX_SIZE           64
 #define CCP_DMAPOOL_ALIGN              BIT(5)
 
 #define CCP_REVERSE_BUF_SIZE           64
 
-#define CCP_AES_KEY_KSB_COUNT          1
-#define CCP_AES_CTX_KSB_COUNT          1
+#define CCP_AES_KEY_SB_COUNT           1
+#define CCP_AES_CTX_SB_COUNT           1
 
-#define CCP_XTS_AES_KEY_KSB_COUNT      1
-#define CCP_XTS_AES_CTX_KSB_COUNT      1
+#define CCP_XTS_AES_KEY_SB_COUNT       1
+#define CCP_XTS_AES_CTX_SB_COUNT       1
 
-#define CCP_SHA_KSB_COUNT              1
+#define CCP_SHA_SB_COUNT               1
 
 #define CCP_RSA_MAX_WIDTH              4096
 
 #define CCP_PASSTHRU_BLOCKSIZE         256
 #define CCP_PASSTHRU_MASKSIZE          32
-#define CCP_PASSTHRU_KSB_COUNT         1
+#define CCP_PASSTHRU_SB_COUNT          1
 
 #define CCP_ECC_MODULUS_BYTES          48      /* 384-bits */
 #define CCP_ECC_MAX_OPERANDS           6
 #define CCP_ECC_RESULT_OFFSET          60
 #define CCP_ECC_RESULT_SUCCESS         0x0001
 
-struct ccp_op;
-
-/* Structure for computation functions that are device-specific */
-struct ccp_actions {
-       int (*perform_aes)(struct ccp_op *);
-       int (*perform_xts_aes)(struct ccp_op *);
-       int (*perform_sha)(struct ccp_op *);
-       int (*perform_rsa)(struct ccp_op *);
-       int (*perform_passthru)(struct ccp_op *);
-       int (*perform_ecc)(struct ccp_op *);
-       int (*init)(struct ccp_device *);
-       void (*destroy)(struct ccp_device *);
-       irqreturn_t (*irqhandler)(int, void *);
-};
-
-/* Structure to hold CCP version-specific values */
-struct ccp_vdata {
-       unsigned int version;
-       const struct ccp_actions *perform;
-};
-
-extern struct ccp_vdata ccpv3;
+#define CCP_SB_BYTES                   32
 
+struct ccp_op;
 struct ccp_device;
 struct ccp_cmd;
+struct ccp_fns;
 
 struct ccp_dma_cmd {
        struct list_head entry;
@@ -212,9 +257,29 @@ struct ccp_cmd_queue {
        /* Queue dma pool */
        struct dma_pool *dma_pool;
 
-       /* Queue reserved KSB regions */
-       u32 ksb_key;
-       u32 ksb_ctx;
+       /* Queue base address (not neccessarily aligned)*/
+       struct ccp5_desc *qbase;
+
+       /* Aligned queue start address (per requirement) */
+       struct mutex q_mutex ____cacheline_aligned;
+       unsigned int qidx;
+
+       /* Version 5 has different requirements for queue memory */
+       unsigned int qsize;
+       dma_addr_t qbase_dma;
+       dma_addr_t qdma_tail;
+
+       /* Per-queue reserved storage block(s) */
+       u32 sb_key;
+       u32 sb_ctx;
+
+       /* Bitmap of LSBs that can be accessed by this queue */
+       DECLARE_BITMAP(lsbmask, MAX_LSB_CNT);
+       /* Private LSB that is assigned to this queue, or -1 if none.
+        * Bitmap for my private LSB, unused otherwise
+        */
+       unsigned int lsb;
+       DECLARE_BITMAP(lsbmap, PLSB_MAP_SIZE);
 
        /* Queue processing thread */
        struct task_struct *kthread;
@@ -229,8 +294,17 @@ struct ccp_cmd_queue {
        u32 int_err;
 
        /* Register addresses for queue */
+       void __iomem *reg_control;
+       void __iomem *reg_tail_lo;
+       void __iomem *reg_head_lo;
+       void __iomem *reg_int_enable;
+       void __iomem *reg_interrupt_status;
        void __iomem *reg_status;
        void __iomem *reg_int_status;
+       void __iomem *reg_dma_status;
+       void __iomem *reg_dma_read_status;
+       void __iomem *reg_dma_write_status;
+       u32 qcontrol; /* Cached control register */
 
        /* Status values from job */
        u32 int_status;
@@ -253,16 +327,14 @@ struct ccp_device {
 
        struct device *dev;
 
-       /*
-        * Bus specific device information
+       /* Bus specific device information
         */
        void *dev_specific;
        int (*get_irq)(struct ccp_device *ccp);
        void (*free_irq)(struct ccp_device *ccp);
        unsigned int irq;
 
-       /*
-        * I/O area used for device communication. The register mapping
+       /* I/O area used for device communication. The register mapping
         * starts at an offset into the mapped bar.
         *   The CMD_REQx registers and the Delete_Cmd_Queue_Job register
         *   need to be protected while a command queue thread is accessing
@@ -272,8 +344,7 @@ struct ccp_device {
        void __iomem *io_map;
        void __iomem *io_regs;
 
-       /*
-        * Master lists that all cmds are queued on. Because there can be
+       /* Master lists that all cmds are queued on. Because there can be
         * more than one CCP command queue that can process a cmd a separate
         * backlog list is neeeded so that the backlog completion call
         * completes before the cmd is available for execution.
@@ -283,47 +354,54 @@ struct ccp_device {
        struct list_head cmd;
        struct list_head backlog;
 
-       /*
-        * The command queues. These represent the queues available on the
+       /* The command queues. These represent the queues available on the
         * CCP that are available for processing cmds
         */
        struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES];
        unsigned int cmd_q_count;
 
-       /*
-        * Support for the CCP True RNG
+       /* Support for the CCP True RNG
         */
        struct hwrng hwrng;
        unsigned int hwrng_retries;
 
-       /*
-        * Support for the CCP DMA capabilities
+       /* Support for the CCP DMA capabilities
         */
        struct dma_device dma_dev;
        struct ccp_dma_chan *ccp_dma_chan;
        struct kmem_cache *dma_cmd_cache;
        struct kmem_cache *dma_desc_cache;
 
-       /*
-        * A counter used to generate job-ids for cmds submitted to the CCP
+       /* A counter used to generate job-ids for cmds submitted to the CCP
         */
        atomic_t current_id ____cacheline_aligned;
 
-       /*
-        * The CCP uses key storage blocks (KSB) to maintain context for certain
-        * operations. To prevent multiple cmds from using the same KSB range
-        * a command queue reserves a KSB range for the duration of the cmd.
-        * Each queue, will however, reserve 2 KSB blocks for operations that
-        * only require single KSB entries (eg. AES context/iv and key) in order
-        * to avoid allocation contention.  This will reserve at most 10 KSB
-        * entries, leaving 40 KSB entries available for dynamic allocation.
+       /* The v3 CCP uses key storage blocks (SB) to maintain context for
+        * certain operations. To prevent multiple cmds from using the same
+        * SB range a command queue reserves an SB range for the duration of
+        * the cmd. Each queue, will however, reserve 2 SB blocks for
+        * operations that only require single SB entries (eg. AES context/iv
+        * and key) in order to avoid allocation contention.  This will reserve
+        * at most 10 SB entries, leaving 40 SB entries available for dynamic
+        * allocation.
+        *
+        * The v5 CCP Local Storage Block (LSB) is broken up into 8
+        * memrory ranges, each of which can be enabled for access by one
+        * or more queues. Device initialization takes this into account,
+        * and attempts to assign one region for exclusive use by each
+        * available queue; the rest are then aggregated as "public" use.
+        * If there are fewer regions than queues, all regions are shared
+        * amongst all queues.
         */
-       struct mutex ksb_mutex ____cacheline_aligned;
-       DECLARE_BITMAP(ksb, KSB_COUNT);
-       wait_queue_head_t ksb_queue;
-       unsigned int ksb_avail;
-       unsigned int ksb_count;
-       u32 ksb_start;
+       struct mutex sb_mutex ____cacheline_aligned;
+       DECLARE_BITMAP(sb, KSB_COUNT);
+       wait_queue_head_t sb_queue;
+       unsigned int sb_avail;
+       unsigned int sb_count;
+       u32 sb_start;
+
+       /* Bitmap of shared LSBs, if any */
+       DECLARE_BITMAP(lsbmap, SLSB_MAP_SIZE);
 
        /* Suspend support */
        unsigned int suspending;
@@ -335,10 +413,11 @@ struct ccp_device {
 
 enum ccp_memtype {
        CCP_MEMTYPE_SYSTEM = 0,
-       CCP_MEMTYPE_KSB,
+       CCP_MEMTYPE_SB,
        CCP_MEMTYPE_LOCAL,
        CCP_MEMTYPE__LAST,
 };
+#define        CCP_MEMTYPE_LSB CCP_MEMTYPE_KSB
 
 struct ccp_dma_info {
        dma_addr_t address;
@@ -379,7 +458,7 @@ struct ccp_mem {
        enum ccp_memtype type;
        union {
                struct ccp_dma_info dma;
-               u32 ksb;
+               u32 sb;
        } u;
 };
 
@@ -419,13 +498,14 @@ struct ccp_op {
        u32 jobid;
        u32 ioc;
        u32 soc;
-       u32 ksb_key;
-       u32 ksb_ctx;
+       u32 sb_key;
+       u32 sb_ctx;
        u32 init;
        u32 eom;
 
        struct ccp_mem src;
        struct ccp_mem dst;
+       struct ccp_mem exp;
 
        union {
                struct ccp_aes_op aes;
@@ -435,6 +515,7 @@ struct ccp_op {
                struct ccp_passthru_op passthru;
                struct ccp_ecc_op ecc;
        } u;
+       struct ccp_mem key;
 };
 
 static inline u32 ccp_addr_lo(struct ccp_dma_info *info)
@@ -447,6 +528,70 @@ static inline u32 ccp_addr_hi(struct ccp_dma_info *info)
        return upper_32_bits(info->address + info->offset) & 0x0000ffff;
 }
 
+/**
+ * descriptor for version 5 CPP commands
+ * 8 32-bit words:
+ * word 0: function; engine; control bits
+ * word 1: length of source data
+ * word 2: low 32 bits of source pointer
+ * word 3: upper 16 bits of source pointer; source memory type
+ * word 4: low 32 bits of destination pointer
+ * word 5: upper 16 bits of destination pointer; destination memory type
+ * word 6: low 32 bits of key pointer
+ * word 7: upper 16 bits of key pointer; key memory type
+ */
+struct dword0 {
+       __le32 soc:1;
+       __le32 ioc:1;
+       __le32 rsvd1:1;
+       __le32 init:1;
+       __le32 eom:1;           /* AES/SHA only */
+       __le32 function:15;
+       __le32 engine:4;
+       __le32 prot:1;
+       __le32 rsvd2:7;
+};
+
+struct dword3 {
+       __le32 src_hi:16;
+       __le32 src_mem:2;
+       __le32 lsb_cxt_id:8;
+       __le32 rsvd1:5;
+       __le32 fixed:1;
+};
+
+union dword4 {
+       __le32 dst_lo;          /* NON-SHA      */
+       __le32 sha_len_lo;      /* SHA          */
+};
+
+union dword5 {
+       struct {
+               __le32 dst_hi:16;
+               __le32 dst_mem:2;
+               __le32 rsvd1:13;
+               __le32 fixed:1;
+       } fields;
+       __le32 sha_len_hi;
+};
+
+struct dword7 {
+       __le32 key_hi:16;
+       __le32 key_mem:2;
+       __le32 rsvd1:14;
+};
+
+struct ccp5_desc {
+       struct dword0 dw0;
+       __le32 length;
+       __le32 src_lo;
+       struct dword3 dw3;
+       union dword4 dw4;
+       union dword5 dw5;
+       __le32 key_lo;
+       struct dword7 dw7;
+};
+
 int ccp_pci_init(void);
 void ccp_pci_exit(void);
 
@@ -456,13 +601,48 @@ void ccp_platform_exit(void);
 void ccp_add_device(struct ccp_device *ccp);
 void ccp_del_device(struct ccp_device *ccp);
 
+extern void ccp_log_error(struct ccp_device *, int);
+
 struct ccp_device *ccp_alloc_struct(struct device *dev);
 bool ccp_queues_suspended(struct ccp_device *ccp);
 int ccp_cmd_queue_thread(void *data);
+int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait);
 
 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd);
 
+int ccp_register_rng(struct ccp_device *ccp);
+void ccp_unregister_rng(struct ccp_device *ccp);
 int ccp_dmaengine_register(struct ccp_device *ccp);
 void ccp_dmaengine_unregister(struct ccp_device *ccp);
 
+/* Structure for computation functions that are device-specific */
+struct ccp_actions {
+       int (*aes)(struct ccp_op *);
+       int (*xts_aes)(struct ccp_op *);
+       int (*sha)(struct ccp_op *);
+       int (*rsa)(struct ccp_op *);
+       int (*passthru)(struct ccp_op *);
+       int (*ecc)(struct ccp_op *);
+       u32 (*sballoc)(struct ccp_cmd_queue *, unsigned int);
+       void (*sbfree)(struct ccp_cmd_queue *, unsigned int,
+                              unsigned int);
+       unsigned int (*get_free_slots)(struct ccp_cmd_queue *);
+       int (*init)(struct ccp_device *);
+       void (*destroy)(struct ccp_device *);
+       irqreturn_t (*irqhandler)(int, void *);
+};
+
+/* Structure to hold CCP version-specific values */
+struct ccp_vdata {
+       const unsigned int version;
+       void (*setup)(struct ccp_device *);
+       const struct ccp_actions *perform;
+       const unsigned int bar;
+       const unsigned int offset;
+};
+
+extern const struct ccp_vdata ccpv3;
+extern const struct ccp_vdata ccpv5a;
+extern const struct ccp_vdata ccpv5b;
+
 #endif
index 94f77b0f9ae75bda4ba54211164f1c4433b9ba4e..6553912804f73f1c061aec9bdd3afd0f0d7426bc 100644 (file)
@@ -299,12 +299,10 @@ static struct ccp_dma_desc *ccp_alloc_dma_desc(struct ccp_dma_chan *chan,
 {
        struct ccp_dma_desc *desc;
 
-       desc = kmem_cache_alloc(chan->ccp->dma_desc_cache, GFP_NOWAIT);
+       desc = kmem_cache_zalloc(chan->ccp->dma_desc_cache, GFP_NOWAIT);
        if (!desc)
                return NULL;
 
-       memset(desc, 0, sizeof(*desc));
-
        dma_async_tx_descriptor_init(&desc->tx_desc, &chan->dma_chan);
        desc->tx_desc.flags = flags;
        desc->tx_desc.tx_submit = ccp_tx_submit;
@@ -650,8 +648,11 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
        dma_desc_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL,
                                             "%s-dmaengine-desc-cache",
                                             ccp->name);
-       if (!dma_cmd_cache_name)
-               return -ENOMEM;
+       if (!dma_desc_cache_name) {
+               ret = -ENOMEM;
+               goto err_cache;
+       }
+
        ccp->dma_desc_cache = kmem_cache_create(dma_desc_cache_name,
                                                sizeof(struct ccp_dma_desc),
                                                sizeof(void *),
index ffa2891035ac4210ec2c0efab78d6bd1affec240..50fae4442801ca76f0411d7023c915895661fe5a 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
 #include "ccp-dev.h"
 
 /* SHA initial context values */
-static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
        cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
        cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
-       cpu_to_be32(SHA1_H4), 0, 0, 0,
+       cpu_to_be32(SHA1_H4),
 };
 
-static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
        cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
        cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
        cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
        cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
 };
 
-static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
        cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
        cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
        cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
        cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
 };
 
-static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
-{
-       int start;
-
-       for (;;) {
-               mutex_lock(&ccp->ksb_mutex);
-
-               start = (u32)bitmap_find_next_zero_area(ccp->ksb,
-                                                       ccp->ksb_count,
-                                                       ccp->ksb_start,
-                                                       count, 0);
-               if (start <= ccp->ksb_count) {
-                       bitmap_set(ccp->ksb, start, count);
-
-                       mutex_unlock(&ccp->ksb_mutex);
-                       break;
-               }
-
-               ccp->ksb_avail = 0;
-
-               mutex_unlock(&ccp->ksb_mutex);
-
-               /* Wait for KSB entries to become available */
-               if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
-                       return 0;
-       }
-
-       return KSB_START + start;
-}
-
-static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
-                        unsigned int count)
-{
-       if (!start)
-               return;
-
-       mutex_lock(&ccp->ksb_mutex);
-
-       bitmap_clear(ccp->ksb, start - KSB_START, count);
-
-       ccp->ksb_avail = 1;
-
-       mutex_unlock(&ccp->ksb_mutex);
-
-       wake_up_interruptible_all(&ccp->ksb_queue);
-}
+#define        CCP_NEW_JOBID(ccp)      ((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
+                                       ccp_gen_jobid(ccp) : 0)
 
 static u32 ccp_gen_jobid(struct ccp_device *ccp)
 {
@@ -231,7 +188,7 @@ static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
                                   unsigned int len, unsigned int se_len,
                                   bool sign_extend)
 {
-       unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
+       unsigned int nbytes, sg_offset, dm_offset, sb_len, i;
        u8 buffer[CCP_REVERSE_BUF_SIZE];
 
        if (WARN_ON(se_len > sizeof(buffer)))
@@ -241,21 +198,21 @@ static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
        dm_offset = 0;
        nbytes = len;
        while (nbytes) {
-               ksb_len = min_t(unsigned int, nbytes, se_len);
-               sg_offset -= ksb_len;
+               sb_len = min_t(unsigned int, nbytes, se_len);
+               sg_offset -= sb_len;
 
-               scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
-               for (i = 0; i < ksb_len; i++)
-                       wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
+               scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 0);
+               for (i = 0; i < sb_len; i++)
+                       wa->address[dm_offset + i] = buffer[sb_len - i - 1];
 
-               dm_offset += ksb_len;
-               nbytes -= ksb_len;
+               dm_offset += sb_len;
+               nbytes -= sb_len;
 
-               if ((ksb_len != se_len) && sign_extend) {
+               if ((sb_len != se_len) && sign_extend) {
                        /* Must sign-extend to nearest sign-extend length */
                        if (wa->address[dm_offset - 1] & 0x80)
                                memset(wa->address + dm_offset, 0xff,
-                                      se_len - ksb_len);
+                                      se_len - sb_len);
                }
        }
 
@@ -266,22 +223,22 @@ static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
                                    struct scatterlist *sg,
                                    unsigned int len)
 {
-       unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
+       unsigned int nbytes, sg_offset, dm_offset, sb_len, i;
        u8 buffer[CCP_REVERSE_BUF_SIZE];
 
        sg_offset = 0;
        dm_offset = len;
        nbytes = len;
        while (nbytes) {
-               ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
-               dm_offset -= ksb_len;
+               sb_len = min_t(unsigned int, nbytes, sizeof(buffer));
+               dm_offset -= sb_len;
 
-               for (i = 0; i < ksb_len; i++)
-                       buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
-               scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
+               for (i = 0; i < sb_len; i++)
+                       buffer[sb_len - i - 1] = wa->address[dm_offset + i];
+               scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 1);
 
-               sg_offset += ksb_len;
-               nbytes -= ksb_len;
+               sg_offset += sb_len;
+               nbytes -= sb_len;
        }
 }
 
@@ -449,9 +406,9 @@ static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
        }
 }
 
-static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
-                               struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
-                               u32 byte_swap, bool from)
+static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
+                              struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+                              u32 byte_swap, bool from)
 {
        struct ccp_op op;
 
@@ -463,8 +420,8 @@ static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
 
        if (from) {
                op.soc = 1;
-               op.src.type = CCP_MEMTYPE_KSB;
-               op.src.u.ksb = ksb;
+               op.src.type = CCP_MEMTYPE_SB;
+               op.src.u.sb = sb;
                op.dst.type = CCP_MEMTYPE_SYSTEM;
                op.dst.u.dma.address = wa->dma.address;
                op.dst.u.dma.length = wa->length;
@@ -472,27 +429,27 @@ static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
                op.src.type = CCP_MEMTYPE_SYSTEM;
                op.src.u.dma.address = wa->dma.address;
                op.src.u.dma.length = wa->length;
-               op.dst.type = CCP_MEMTYPE_KSB;
-               op.dst.u.ksb = ksb;
+               op.dst.type = CCP_MEMTYPE_SB;
+               op.dst.u.sb = sb;
        }
 
        op.u.passthru.byte_swap = byte_swap;
 
-       return cmd_q->ccp->vdata->perform->perform_passthru(&op);
+       return cmd_q->ccp->vdata->perform->passthru(&op);
 }
 
-static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
-                          struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
-                          u32 byte_swap)
+static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
+                         struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+                         u32 byte_swap)
 {
-       return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
+       return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
 }
 
-static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
-                            struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
-                            u32 byte_swap)
+static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
+                           struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+                           u32 byte_swap)
 {
-       return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
+       return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
 }
 
 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
@@ -527,54 +484,54 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
                        return -EINVAL;
        }
 
-       BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
-       BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
+       BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
+       BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
 
        ret = -EIO;
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
-       op.ksb_key = cmd_q->ksb_key;
-       op.ksb_ctx = cmd_q->ksb_ctx;
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+       op.sb_key = cmd_q->sb_key;
+       op.sb_ctx = cmd_q->sb_ctx;
        op.init = 1;
        op.u.aes.type = aes->type;
        op.u.aes.mode = aes->mode;
        op.u.aes.action = aes->action;
 
-       /* All supported key sizes fit in a single (32-byte) KSB entry
+       /* All supported key sizes fit in a single (32-byte) SB entry
         * and must be in little endian format. Use the 256-bit byte
         * swap passthru option to convert from big endian to little
         * endian.
         */
        ret = ccp_init_dm_workarea(&key, cmd_q,
-                                  CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
                                   DMA_TO_DEVICE);
        if (ret)
                return ret;
 
-       dm_offset = CCP_KSB_BYTES - aes->key_len;
+       dm_offset = CCP_SB_BYTES - aes->key_len;
        ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
-       ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
-                             CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+                            CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_key;
        }
 
-       /* The AES context fits in a single (32-byte) KSB entry and
+       /* The AES context fits in a single (32-byte) SB entry and
         * must be in little endian format. Use the 256-bit byte swap
         * passthru option to convert from big endian to little endian.
         */
        ret = ccp_init_dm_workarea(&ctx, cmd_q,
-                                  CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
                                   DMA_BIDIRECTIONAL);
        if (ret)
                goto e_key;
 
-       dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+       dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
        ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
-       ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                             CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                            CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_ctx;
@@ -592,9 +549,9 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
                        op.eom = 1;
 
                        /* Push the K1/K2 key to the CCP now */
-                       ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
-                                               op.ksb_ctx,
-                                               CCP_PASSTHRU_BYTESWAP_256BIT);
+                       ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
+                                              op.sb_ctx,
+                                              CCP_PASSTHRU_BYTESWAP_256BIT);
                        if (ret) {
                                cmd->engine_error = cmd_q->cmd_error;
                                goto e_src;
@@ -602,15 +559,15 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 
                        ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
                                        aes->cmac_key_len);
-                       ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                                             CCP_PASSTHRU_BYTESWAP_256BIT);
+                       ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                                            CCP_PASSTHRU_BYTESWAP_256BIT);
                        if (ret) {
                                cmd->engine_error = cmd_q->cmd_error;
                                goto e_src;
                        }
                }
 
-               ret = cmd_q->ccp->vdata->perform->perform_aes(&op);
+               ret = cmd_q->ccp->vdata->perform->aes(&op);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_src;
@@ -622,15 +579,15 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
        /* Retrieve the AES context - convert from LE to BE using
         * 32-byte (256-bit) byteswapping
         */
-       ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                               CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                              CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_src;
        }
 
        /* ...but we only need AES_BLOCK_SIZE bytes */
-       dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+       dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
        ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 
 e_src:
@@ -680,56 +637,56 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                        return -EINVAL;
        }
 
-       BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
-       BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
+       BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
+       BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
 
        ret = -EIO;
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
-       op.ksb_key = cmd_q->ksb_key;
-       op.ksb_ctx = cmd_q->ksb_ctx;
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+       op.sb_key = cmd_q->sb_key;
+       op.sb_ctx = cmd_q->sb_ctx;
        op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
        op.u.aes.type = aes->type;
        op.u.aes.mode = aes->mode;
        op.u.aes.action = aes->action;
 
-       /* All supported key sizes fit in a single (32-byte) KSB entry
+       /* All supported key sizes fit in a single (32-byte) SB entry
         * and must be in little endian format. Use the 256-bit byte
         * swap passthru option to convert from big endian to little
         * endian.
         */
        ret = ccp_init_dm_workarea(&key, cmd_q,
-                                  CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
                                   DMA_TO_DEVICE);
        if (ret)
                return ret;
 
-       dm_offset = CCP_KSB_BYTES - aes->key_len;
+       dm_offset = CCP_SB_BYTES - aes->key_len;
        ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
-       ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
-                             CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+                            CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_key;
        }
 
-       /* The AES context fits in a single (32-byte) KSB entry and
+       /* The AES context fits in a single (32-byte) SB entry and
         * must be in little endian format. Use the 256-bit byte swap
         * passthru option to convert from big endian to little endian.
         */
        ret = ccp_init_dm_workarea(&ctx, cmd_q,
-                                  CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
                                   DMA_BIDIRECTIONAL);
        if (ret)
                goto e_key;
 
        if (aes->mode != CCP_AES_MODE_ECB) {
-               /* Load the AES context - conver to LE */
-               dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+               /* Load the AES context - convert to LE */
+               dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
                ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
-               ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                                     CCP_PASSTHRU_BYTESWAP_256BIT);
+               ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                                    CCP_PASSTHRU_BYTESWAP_256BIT);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_ctx;
@@ -772,7 +729,7 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                                op.soc = 1;
                }
 
-               ret = cmd_q->ccp->vdata->perform->perform_aes(&op);
+               ret = cmd_q->ccp->vdata->perform->aes(&op);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_dst;
@@ -785,15 +742,15 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                /* Retrieve the AES context - convert from LE to BE using
                 * 32-byte (256-bit) byteswapping
                 */
-               ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                                       CCP_PASSTHRU_BYTESWAP_256BIT);
+               ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                                      CCP_PASSTHRU_BYTESWAP_256BIT);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_dst;
                }
 
                /* ...but we only need AES_BLOCK_SIZE bytes */
-               dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+               dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
                ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
        }
 
@@ -857,53 +814,53 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
        if (!xts->key || !xts->iv || !xts->src || !xts->dst)
                return -EINVAL;
 
-       BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
-       BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
+       BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
+       BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
 
        ret = -EIO;
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
-       op.ksb_key = cmd_q->ksb_key;
-       op.ksb_ctx = cmd_q->ksb_ctx;
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+       op.sb_key = cmd_q->sb_key;
+       op.sb_ctx = cmd_q->sb_ctx;
        op.init = 1;
        op.u.xts.action = xts->action;
        op.u.xts.unit_size = xts->unit_size;
 
-       /* All supported key sizes fit in a single (32-byte) KSB entry
+       /* All supported key sizes fit in a single (32-byte) SB entry
         * and must be in little endian format. Use the 256-bit byte
         * swap passthru option to convert from big endian to little
         * endian.
         */
        ret = ccp_init_dm_workarea(&key, cmd_q,
-                                  CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_XTS_AES_KEY_SB_COUNT * CCP_SB_BYTES,
                                   DMA_TO_DEVICE);
        if (ret)
                return ret;
 
-       dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
+       dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
        ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
        ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
-       ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
-                             CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+                            CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_key;
        }
 
-       /* The AES context fits in a single (32-byte) KSB entry and
+       /* The AES context fits in a single (32-byte) SB entry and
         * for XTS is already in little endian format so no byte swapping
         * is needed.
         */
        ret = ccp_init_dm_workarea(&ctx, cmd_q,
-                                  CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+                                  CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
                                   DMA_BIDIRECTIONAL);
        if (ret)
                goto e_key;
 
        ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
-       ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                             CCP_PASSTHRU_BYTESWAP_NOOP);
+       ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                            CCP_PASSTHRU_BYTESWAP_NOOP);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_ctx;
@@ -937,7 +894,7 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
                if (!src.sg_wa.bytes_left)
                        op.eom = 1;
 
-               ret = cmd_q->ccp->vdata->perform->perform_xts_aes(&op);
+               ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_dst;
@@ -949,15 +906,15 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
        /* Retrieve the AES context - convert from LE to BE using
         * 32-byte (256-bit) byteswapping
         */
-       ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                               CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                              CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_dst;
        }
 
        /* ...but we only need AES_BLOCK_SIZE bytes */
-       dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+       dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
        ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
 
 e_dst:
@@ -982,163 +939,227 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        struct ccp_dm_workarea ctx;
        struct ccp_data src;
        struct ccp_op op;
+       unsigned int ioffset, ooffset;
+       unsigned int digest_size;
+       int sb_count;
+       const void *init;
+       u64 block_size;
+       int ctx_size;
        int ret;
 
-       if (sha->ctx_len != CCP_SHA_CTXSIZE)
+       switch (sha->type) {
+       case CCP_SHA_TYPE_1:
+               if (sha->ctx_len < SHA1_DIGEST_SIZE)
+                       return -EINVAL;
+               block_size = SHA1_BLOCK_SIZE;
+               break;
+       case CCP_SHA_TYPE_224:
+               if (sha->ctx_len < SHA224_DIGEST_SIZE)
+                       return -EINVAL;
+               block_size = SHA224_BLOCK_SIZE;
+               break;
+       case CCP_SHA_TYPE_256:
+               if (sha->ctx_len < SHA256_DIGEST_SIZE)
+                       return -EINVAL;
+               block_size = SHA256_BLOCK_SIZE;
+               break;
+       default:
                return -EINVAL;
+       }
 
        if (!sha->ctx)
                return -EINVAL;
 
-       if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
+       if (!sha->final && (sha->src_len & (block_size - 1)))
                return -EINVAL;
 
-       if (!sha->src_len) {
-               const u8 *sha_zero;
+       /* The version 3 device can't handle zero-length input */
+       if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
 
-               /* Not final, just return */
-               if (!sha->final)
-                       return 0;
+               if (!sha->src_len) {
+                       unsigned int digest_len;
+                       const u8 *sha_zero;
 
-               /* CCP can't do a zero length sha operation so the caller
-                * must buffer the data.
-                */
-               if (sha->msg_bits)
-                       return -EINVAL;
+                       /* Not final, just return */
+                       if (!sha->final)
+                               return 0;
 
-               /* The CCP cannot perform zero-length sha operations so the
-                * caller is required to buffer data for the final operation.
-                * However, a sha operation for a message with a total length
-                * of zero is valid so known values are required to supply
-                * the result.
-                */
-               switch (sha->type) {
-               case CCP_SHA_TYPE_1:
-                       sha_zero = sha1_zero_message_hash;
-                       break;
-               case CCP_SHA_TYPE_224:
-                       sha_zero = sha224_zero_message_hash;
-                       break;
-               case CCP_SHA_TYPE_256:
-                       sha_zero = sha256_zero_message_hash;
-                       break;
-               default:
-                       return -EINVAL;
-               }
+                       /* CCP can't do a zero length sha operation so the
+                        * caller must buffer the data.
+                        */
+                       if (sha->msg_bits)
+                               return -EINVAL;
+
+                       /* The CCP cannot perform zero-length sha operations
+                        * so the caller is required to buffer data for the
+                        * final operation. However, a sha operation for a
+                        * message with a total length of zero is valid so
+                        * known values are required to supply the result.
+                        */
+                       switch (sha->type) {
+                       case CCP_SHA_TYPE_1:
+                               sha_zero = sha1_zero_message_hash;
+                               digest_len = SHA1_DIGEST_SIZE;
+                               break;
+                       case CCP_SHA_TYPE_224:
+                               sha_zero = sha224_zero_message_hash;
+                               digest_len = SHA224_DIGEST_SIZE;
+                               break;
+                       case CCP_SHA_TYPE_256:
+                               sha_zero = sha256_zero_message_hash;
+                               digest_len = SHA256_DIGEST_SIZE;
+                               break;
+                       default:
+                               return -EINVAL;
+                       }
 
-               scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
-                                        sha->ctx_len, 1);
+                       scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
+                                                digest_len, 1);
 
-               return 0;
+                       return 0;
+               }
        }
 
-       if (!sha->src)
-               return -EINVAL;
+       /* Set variables used throughout */
+       switch (sha->type) {
+       case CCP_SHA_TYPE_1:
+               digest_size = SHA1_DIGEST_SIZE;
+               init = (void *) ccp_sha1_init;
+               ctx_size = SHA1_DIGEST_SIZE;
+               sb_count = 1;
+               if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
+                       ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
+               else
+                       ooffset = ioffset = 0;
+               break;
+       case CCP_SHA_TYPE_224:
+               digest_size = SHA224_DIGEST_SIZE;
+               init = (void *) ccp_sha224_init;
+               ctx_size = SHA256_DIGEST_SIZE;
+               sb_count = 1;
+               ioffset = 0;
+               if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
+                       ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
+               else
+                       ooffset = 0;
+               break;
+       case CCP_SHA_TYPE_256:
+               digest_size = SHA256_DIGEST_SIZE;
+               init = (void *) ccp_sha256_init;
+               ctx_size = SHA256_DIGEST_SIZE;
+               sb_count = 1;
+               ooffset = ioffset = 0;
+               break;
+       default:
+               ret = -EINVAL;
+               goto e_data;
+       }
 
-       BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
+       /* For zero-length plaintext the src pointer is ignored;
+        * otherwise both parts must be valid
+        */
+       if (sha->src_len && !sha->src)
+               return -EINVAL;
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
-       op.ksb_ctx = cmd_q->ksb_ctx;
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+       op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
        op.u.sha.type = sha->type;
        op.u.sha.msg_bits = sha->msg_bits;
 
-       /* The SHA context fits in a single (32-byte) KSB entry and
-        * must be in little endian format. Use the 256-bit byte swap
-        * passthru option to convert from big endian to little endian.
-        */
-       ret = ccp_init_dm_workarea(&ctx, cmd_q,
-                                  CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
+       ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
                                   DMA_BIDIRECTIONAL);
        if (ret)
                return ret;
-
        if (sha->first) {
-               const __be32 *init;
-
                switch (sha->type) {
                case CCP_SHA_TYPE_1:
-                       init = ccp_sha1_init;
-                       break;
                case CCP_SHA_TYPE_224:
-                       init = ccp_sha224_init;
-                       break;
                case CCP_SHA_TYPE_256:
-                       init = ccp_sha256_init;
+                       memcpy(ctx.address + ioffset, init, ctx_size);
                        break;
                default:
                        ret = -EINVAL;
                        goto e_ctx;
                }
-               memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
        } else {
-               ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
+               /* Restore the context */
+               ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
+                               sb_count * CCP_SB_BYTES);
        }
 
-       ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                             CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                            CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_ctx;
        }
 
-       /* Send data to the CCP SHA engine */
-       ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
-                           CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
-       if (ret)
-               goto e_ctx;
+       if (sha->src) {
+               /* Send data to the CCP SHA engine; block_size is set above */
+               ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
+                                   block_size, DMA_TO_DEVICE);
+               if (ret)
+                       goto e_ctx;
 
-       while (src.sg_wa.bytes_left) {
-               ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
-               if (sha->final && !src.sg_wa.bytes_left)
-                       op.eom = 1;
+               while (src.sg_wa.bytes_left) {
+                       ccp_prepare_data(&src, NULL, &op, block_size, false);
+                       if (sha->final && !src.sg_wa.bytes_left)
+                               op.eom = 1;
+
+                       ret = cmd_q->ccp->vdata->perform->sha(&op);
+                       if (ret) {
+                               cmd->engine_error = cmd_q->cmd_error;
+                               goto e_data;
+                       }
 
-               ret = cmd_q->ccp->vdata->perform->perform_sha(&op);
+                       ccp_process_data(&src, NULL, &op);
+               }
+       } else {
+               op.eom = 1;
+               ret = cmd_q->ccp->vdata->perform->sha(&op);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_data;
                }
-
-               ccp_process_data(&src, NULL, &op);
        }
 
        /* Retrieve the SHA context - convert from LE to BE using
         * 32-byte (256-bit) byteswapping to BE
         */
-       ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-                               CCP_PASSTHRU_BYTESWAP_256BIT);
+       ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+                              CCP_PASSTHRU_BYTESWAP_256BIT);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_data;
        }
 
-       ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
-
-       if (sha->final && sha->opad) {
-               /* HMAC operation, recursively perform final SHA */
-               struct ccp_cmd hmac_cmd;
-               struct scatterlist sg;
-               u64 block_size, digest_size;
-               u8 *hmac_buf;
-
+       if (sha->final) {
+               /* Finishing up, so get the digest */
                switch (sha->type) {
                case CCP_SHA_TYPE_1:
-                       block_size = SHA1_BLOCK_SIZE;
-                       digest_size = SHA1_DIGEST_SIZE;
-                       break;
                case CCP_SHA_TYPE_224:
-                       block_size = SHA224_BLOCK_SIZE;
-                       digest_size = SHA224_DIGEST_SIZE;
-                       break;
                case CCP_SHA_TYPE_256:
-                       block_size = SHA256_BLOCK_SIZE;
-                       digest_size = SHA256_DIGEST_SIZE;
+                       ccp_get_dm_area(&ctx, ooffset,
+                                       sha->ctx, 0,
+                                       digest_size);
                        break;
                default:
                        ret = -EINVAL;
-                       goto e_data;
+                       goto e_ctx;
                }
+       } else {
+               /* Stash the context */
+               ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
+                               sb_count * CCP_SB_BYTES);
+       }
+
+       if (sha->final && sha->opad) {
+               /* HMAC operation, recursively perform final SHA */
+               struct ccp_cmd hmac_cmd;
+               struct scatterlist sg;
+               u8 *hmac_buf;
 
                if (sha->opad_len != block_size) {
                        ret = -EINVAL;
@@ -1153,7 +1174,18 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                sg_init_one(&sg, hmac_buf, block_size + digest_size);
 
                scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
-               memcpy(hmac_buf + block_size, ctx.address, digest_size);
+               switch (sha->type) {
+               case CCP_SHA_TYPE_1:
+               case CCP_SHA_TYPE_224:
+               case CCP_SHA_TYPE_256:
+                       memcpy(hmac_buf + block_size,
+                              ctx.address + ooffset,
+                              digest_size);
+                       break;
+               default:
+                       ret = -EINVAL;
+                       goto e_ctx;
+               }
 
                memset(&hmac_cmd, 0, sizeof(hmac_cmd));
                hmac_cmd.engine = CCP_ENGINE_SHA;
@@ -1176,7 +1208,8 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        }
 
 e_data:
-       ccp_free_data(&src, cmd_q);
+       if (sha->src)
+               ccp_free_data(&src, cmd_q);
 
 e_ctx:
        ccp_dm_free(&ctx);
@@ -1190,7 +1223,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        struct ccp_dm_workarea exp, src;
        struct ccp_data dst;
        struct ccp_op op;
-       unsigned int ksb_count, i_len, o_len;
+       unsigned int sb_count, i_len, o_len;
        int ret;
 
        if (rsa->key_size > CCP_RSA_MAX_WIDTH)
@@ -1208,16 +1241,17 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        o_len = ((rsa->key_size + 255) / 256) * 32;
        i_len = o_len * 2;
 
-       ksb_count = o_len / CCP_KSB_BYTES;
+       sb_count = o_len / CCP_SB_BYTES;
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
        op.jobid = ccp_gen_jobid(cmd_q->ccp);
-       op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
-       if (!op.ksb_key)
+       op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, sb_count);
+
+       if (!op.sb_key)
                return -EIO;
 
-       /* The RSA exponent may span multiple (32-byte) KSB entries and must
+       /* The RSA exponent may span multiple (32-byte) SB entries and must
         * be in little endian format. Reverse copy each 32-byte chunk
         * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
         * and each byte within that chunk and do not perform any byte swap
@@ -1225,14 +1259,14 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
         */
        ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
        if (ret)
-               goto e_ksb;
+               goto e_sb;
 
        ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len,
-                                     CCP_KSB_BYTES, false);
+                                     CCP_SB_BYTES, false);
        if (ret)
                goto e_exp;
-       ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
-                             CCP_PASSTHRU_BYTESWAP_NOOP);
+       ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
+                            CCP_PASSTHRU_BYTESWAP_NOOP);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_exp;
@@ -1247,12 +1281,12 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
                goto e_exp;
 
        ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len,
-                                     CCP_KSB_BYTES, false);
+                                     CCP_SB_BYTES, false);
        if (ret)
                goto e_src;
        src.address += o_len;   /* Adjust the address for the copy operation */
        ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len,
-                                     CCP_KSB_BYTES, false);
+                                     CCP_SB_BYTES, false);
        if (ret)
                goto e_src;
        src.address -= o_len;   /* Reset the address to original value */
@@ -1274,7 +1308,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
        op.u.rsa.mod_size = rsa->key_size;
        op.u.rsa.input_len = i_len;
 
-       ret = cmd_q->ccp->vdata->perform->perform_rsa(&op);
+       ret = cmd_q->ccp->vdata->perform->rsa(&op);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_dst;
@@ -1291,8 +1325,8 @@ e_src:
 e_exp:
        ccp_dm_free(&exp);
 
-e_ksb:
-       ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
+e_sb:
+       cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
 
        return ret;
 }
@@ -1306,7 +1340,7 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
        struct ccp_op op;
        bool in_place = false;
        unsigned int i;
-       int ret;
+       int ret = 0;
 
        if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
                return -EINVAL;
@@ -1321,26 +1355,26 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
                        return -EINVAL;
        }
 
-       BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
+       BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
        if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
                /* Load the mask */
-               op.ksb_key = cmd_q->ksb_key;
+               op.sb_key = cmd_q->sb_key;
 
                ret = ccp_init_dm_workarea(&mask, cmd_q,
-                                          CCP_PASSTHRU_KSB_COUNT *
-                                          CCP_KSB_BYTES,
+                                          CCP_PASSTHRU_SB_COUNT *
+                                          CCP_SB_BYTES,
                                           DMA_TO_DEVICE);
                if (ret)
                        return ret;
 
                ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
-               ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
-                                     CCP_PASSTHRU_BYTESWAP_NOOP);
+               ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
+                                    CCP_PASSTHRU_BYTESWAP_NOOP);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_mask;
@@ -1399,7 +1433,7 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
                op.dst.u.dma.offset = dst.sg_wa.sg_used;
                op.dst.u.dma.length = op.src.u.dma.length;
 
-               ret = cmd_q->ccp->vdata->perform->perform_passthru(&op);
+               ret = cmd_q->ccp->vdata->perform->passthru(&op);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
                        goto e_dst;
@@ -1448,7 +1482,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
                        return -EINVAL;
        }
 
-       BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
+       BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
@@ -1456,13 +1490,13 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
 
        if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
                /* Load the mask */
-               op.ksb_key = cmd_q->ksb_key;
+               op.sb_key = cmd_q->sb_key;
 
                mask.length = pt->mask_len;
                mask.dma.address = pt->mask;
                mask.dma.length = pt->mask_len;
 
-               ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
+               ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
                                     CCP_PASSTHRU_BYTESWAP_NOOP);
                if (ret) {
                        cmd->engine_error = cmd_q->cmd_error;
@@ -1484,7 +1518,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
        op.dst.u.dma.offset = 0;
        op.dst.u.dma.length = pt->src_len;
 
-       ret = cmd_q->ccp->vdata->perform->perform_passthru(&op);
+       ret = cmd_q->ccp->vdata->perform->passthru(&op);
        if (ret)
                cmd->engine_error = cmd_q->cmd_error;
 
@@ -1514,7 +1548,7 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
        /* Concatenate the modulus and the operands. Both the modulus and
         * the operands must be in little endian format.  Since the input
@@ -1575,7 +1609,7 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
        op.u.ecc.function = cmd->u.ecc.function;
 
-       ret = cmd_q->ccp->vdata->perform->perform_ecc(&op);
+       ret = cmd_q->ccp->vdata->perform->ecc(&op);
        if (ret) {
                cmd->engine_error = cmd_q->cmd_error;
                goto e_dst;
@@ -1639,7 +1673,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
        memset(&op, 0, sizeof(op));
        op.cmd_q = cmd_q;
-       op.jobid = ccp_gen_jobid(cmd_q->ccp);
+       op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
        /* Concatenate the modulus and the operands. Both the modulus and
         * the operands must be in little endian format.  Since the input
@@ -1677,7 +1711,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
    &