Linux cryptographic layer development

Linux cryptographic layer development
 help / color / mirror / Atom feed

* [PATCH 09/19] crypto: cmh - add SM4 skcipher/aead/cmac/xcbc
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Register SM4 algorithms using the CMH SM4 core (core ID 0x04):
- skcipher: SM4-ECB, SM4-CBC, SM4-CTR, SM4-XTS, SM4-CFB
- aead: SM4-GCM, SM4-CCM
- ahash: SM4-CMAC, SM4-XCBC

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 drivers/crypto/cmh/Makefile           |   5 +-
 drivers/crypto/cmh/cmh_main.c         |  25 +
 drivers/crypto/cmh/cmh_sm4_aead.c     | 870 ++++++++++++++++++++++++++
 drivers/crypto/cmh/cmh_sm4_cmac.c     | 672 ++++++++++++++++++++
 drivers/crypto/cmh/cmh_sm4_skcipher.c | 690 ++++++++++++++++++++
 drivers/crypto/cmh/include/cmh_sm4.h  |  24 +
 6 files changed, 2285 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/cmh/cmh_sm4_aead.c
 create mode 100644 drivers/crypto/cmh/cmh_sm4_cmac.c
 create mode 100644 drivers/crypto/cmh/cmh_sm4_skcipher.c
 create mode 100644 drivers/crypto/cmh/include/cmh_sm4.h

diff --git a/drivers/crypto/cmh/Makefile b/drivers/crypto/cmh/Makefile
index ced8d1748e6c..1f36cd9c0b98 100644
--- a/drivers/crypto/cmh/Makefile
+++ b/drivers/crypto/cmh/Makefile
@@ -22,7 +22,10 @@ cmh-y := \
        cmh_sm3.o \
        cmh_aes.o \
        cmh_aes_aead.o \
-       cmh_aes_cmac.o
+       cmh_aes_cmac.o \
+       cmh_sm4_skcipher.o \
+       cmh_sm4_aead.o \
+       cmh_sm4_cmac.o

 # Management ioctl device (/dev/cmh_mgmt): key lifecycle, PKE, PQC ioctls.
 cmh-$(CONFIG_CRYPTO_DEV_CMH_MGMT) += \
diff --git a/drivers/crypto/cmh/cmh_main.c b/drivers/crypto/cmh/cmh_main.c
index 1edd8d14c666..5d67a4a12333 100644
--- a/drivers/crypto/cmh/cmh_main.c
+++ b/drivers/crypto/cmh/cmh_main.c
@@ -35,6 +35,7 @@
 #include "cmh_kmac.h"
 #include "cmh_sm3.h"
 #include "cmh_aes.h"
+#include "cmh_sm4.h"
 #include "cmh_mgmt.h"
 #include "cmh_registers.h"
 #include "cmh_debugfs.h"
@@ -237,6 +238,21 @@ static int cmh_probe(struct platform_device *pdev)
        if (ret)
                goto err_aes_cmac_register;

+       /* Register SM4 skcipher algorithms */
+       ret = cmh_sm4_register();
+       if (ret)
+               goto err_sm4_register;
+
+       /* Register SM4 AEAD algorithms (GCM, CCM) */
+       ret = cmh_sm4_aead_register();
+       if (ret)
+               goto err_sm4_aead_register;
+
+       /* Register SM4 CMAC/XCBC algorithms */
+       ret = cmh_sm4_cmac_register();
+       if (ret)
+               goto err_sm4_cmac_register;
+
        /* Register key management device (/dev/cmh_mgmt) */
        ret = cmh_mgmt_register();
        if (ret)
@@ -249,6 +265,12 @@ static int cmh_probe(struct platform_device *pdev)
        return 0;

 err_mgmt_register:
+       cmh_sm4_cmac_unregister();
+err_sm4_cmac_register:
+       cmh_sm4_aead_unregister();
+err_sm4_aead_register:
+       cmh_sm4_unregister();
+err_sm4_register:
        cmh_aes_cmac_unregister();
 err_aes_cmac_register:
        cmh_aes_aead_unregister();
@@ -291,6 +313,9 @@ static void cmh_remove(struct platform_device *pdev)
        cfg = &dev->config;

        cmh_mgmt_unregister();
+       cmh_sm4_cmac_unregister();
+       cmh_sm4_aead_unregister();
+       cmh_sm4_unregister();
        cmh_aes_cmac_unregister();
        cmh_aes_aead_unregister();
        cmh_aes_unregister();
diff --git a/drivers/crypto/cmh/cmh_sm4_aead.c b/drivers/crypto/cmh/cmh_sm4_aead.c
new file mode 100644
index 000000000000..478119bb9c08
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_sm4_aead.c
@@ -0,0 +1,870 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API SM4 AEAD Driver (GCM/CCM)
+ *
+ * Registers AEAD algorithms with the Linux crypto subsystem:
+ *   gcm(sm4), ccm(sm4)
+ *
+ * GCM: SM4_CMD_INIT(mode=GCM) + [AAD_FINAL] + SM4_CMD_FINAL + FLUSH
+ * CCM: SM4_CMD_CCM_INIT + [AAD_FINAL] + SM4_CMD_FINAL + FLUSH
+ *   - SM4 CCM uses a distinct sm4_cmd_ccm_init struct
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/cipher.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/utils.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "cmh_sm4.h"
+#include "cmh_vcq.h"
+#include "cmh_sm4_abi.h"
+#include "cmh_sys_abi.h"
+#include "cmh_sys.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+#include "cmh_key.h"
+
+/*
+ * GCM IV contract:
+ *
+ * The SM4 core requires exactly 16 bytes loaded into its IV register.
+ * For standard 96-bit nonce GCM, the driver passes:
+ *
+ *   IV[0..11]  = user-supplied 12-byte nonce
+ *   IV[12..15] = 0x00000000
+ *
+ * The hardware internally sets the last 32 bits to the big-endian
+ * counter value 1 (forming J0 = nonce || 0x00000001) before
+ * processing AAD.  The driver must NOT pre-set the counter.
+ *
+ * If the IV format is incorrect, GCM authentication will fail
+ * (encrypt produces wrong ciphertext/tag, decrypt rejects).
+ */
+#define SM4_GCM_IV_SIZE                12U     /* GCM nonce size (standard) */
+#define SM4_GCM_HW_IV_SIZE     16U     /* HW requires 16-byte IV buffer */
+#define SM4_GCM_TAG_SIZE       16U
+
+/* CCM: callers pass a 16-byte IV in RFC 3610 format:
+ * iv[0] = L-1, iv[1..14-iv[0]] = nonce, rest = counter (zeroed).
+ * Nonce length = 14 - iv[0], range 7..13.
+ */
+#define SM4_CCM_IV_SIZE        16U
+
+enum cmh_sm4_aead_type {
+       CMH_SM4_AEAD_GCM,
+       CMH_SM4_AEAD_CCM,
+};
+
+struct cmh_sm4_aead_info {
+       enum cmh_sm4_aead_type type;
+       u32         sm4_mode;
+       u32         ivsize;
+       u32         maxauthsize;
+       const char *alg_name;
+       const char *drv_name;
+};
+
+static const struct cmh_sm4_aead_info sm4_aead_algs[] = {
+       { CMH_SM4_AEAD_GCM, SM4_MODE_GCM, SM4_GCM_IV_SIZE,
+         SM4_GCM_TAG_SIZE, "gcm(sm4)", "cri-cmh-gcm-sm4" },
+       { CMH_SM4_AEAD_CCM, SM4_MODE_CCM, SM4_CCM_IV_SIZE,
+         SM4_GCM_TAG_SIZE, "ccm(sm4)", "cri-cmh-ccm-sm4" },
+};
+
+struct cmh_sm4_aead_tfm_ctx {
+       struct cmh_key_ctx key;
+       u32 authsize;
+       struct crypto_cipher *sw_cipher;        /* CCM empty-input fallback */
+};
+
+/* Per-request context (lives in aead_request::__ctx) */
+
+#define CMH_SM4_AEAD_MAX_PAYLOAD       5
+#define CMH_SM4_AEAD_MAX_PACKED                (CMH_SM4_AEAD_MAX_PAYLOAD * 2)
+
+struct cmh_sm4_aead_reqctx {
+       dma_addr_t in_dma;
+       dma_addr_t out_dma;
+       dma_addr_t iv_dma;
+       dma_addr_t key_dma;
+       dma_addr_t aad_dma;
+       dma_addr_t tag_dma;
+       u8 *in_buf;
+       u8 *out_buf;
+       u8 *iv_buf;
+       u8 *aad_buf;
+       u8 *tag_buf;
+       u32 cryptlen;
+       u32 assoclen;
+       u32 authsize;
+       u32 iv_map_len;
+       u32 keylen;
+       bool encrypting;
+       bool empty_gcm_fallback;
+       struct vcq_cmd packed[CMH_SM4_AEAD_MAX_PACKED];
+};
+
+struct cmh_sm4_aead_drv {
+       struct aead_alg                  alg;
+       const struct cmh_sm4_aead_info  *info;
+};
+
+static const struct cmh_sm4_aead_info *
+cmh_sm4_aead_get_info(struct crypto_aead *tfm)
+{
+       struct aead_alg *alg = crypto_aead_alg(tfm);
+
+       return container_of(alg, struct cmh_sm4_aead_drv, alg)->info;
+}
+
+/* VCQ Builders -- SM4 AEAD-specific */
+
+static void vcq_add_sm4_aead_init(struct vcq_cmd *slot, u32 core_id, u64 key_ref,
+                                 u64 iv_dma, u32 keylen, u32 ivlen,
+                                 u32 mode, u32 op, u32 aadlen, u32 iolen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM4_CMD_INIT);
+       slot->hwc.sm4.cmd_init.key = key_ref;
+       slot->hwc.sm4.cmd_init.iv = iv_dma;
+       slot->hwc.sm4.cmd_init.keylen = keylen;
+       slot->hwc.sm4.cmd_init.ivlen = ivlen;
+       slot->hwc.sm4.cmd_init.mode = mode;
+       slot->hwc.sm4.cmd_init.op = op;
+       slot->hwc.sm4.cmd_init.aadlen = aadlen;
+       slot->hwc.sm4.cmd_init.iolen = iolen;
+}
+
+static void vcq_add_sm4_ccm_init(struct vcq_cmd *slot, u32 core_id, u64 key_ref,
+                                u64 nonce_dma, u32 keylen, u32 noncelen,
+                                u32 op, u32 aadlen, u32 iolen, u32 taglen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM4_CMD_CCM_INIT);
+       slot->hwc.sm4.cmd_ccm_init.key = key_ref;
+       slot->hwc.sm4.cmd_ccm_init.nonce = nonce_dma;
+       slot->hwc.sm4.cmd_ccm_init.keylen = keylen;
+       slot->hwc.sm4.cmd_ccm_init.noncelen = noncelen;
+       slot->hwc.sm4.cmd_ccm_init.op = op;
+       slot->hwc.sm4.cmd_ccm_init.aadlen = aadlen;
+       slot->hwc.sm4.cmd_ccm_init.iolen = iolen;
+       slot->hwc.sm4.cmd_ccm_init.taglen = taglen;
+}
+
+static void vcq_add_sm4_aad_final(struct vcq_cmd *slot, u32 core_id, u64 aad_dma,
+                                 u32 aadlen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM4_CMD_AAD_FINAL);
+       slot->hwc.sm4.cmd_aad_final.data = aad_dma;
+       slot->hwc.sm4.cmd_aad_final.datalen = aadlen;
+}
+
+static void vcq_add_sm4_aead_final(struct vcq_cmd *slot, u32 core_id, u64 input_dma,
+                                  u64 output_dma, u64 tag_dma,
+                                  u32 iolen, u32 taglen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM4_CMD_FINAL);
+       slot->hwc.sm4.cmd_final.input = input_dma;
+       slot->hwc.sm4.cmd_final.output = output_dma;
+       slot->hwc.sm4.cmd_final.tag = tag_dma;
+       slot->hwc.sm4.cmd_final.iolen = iolen;
+       slot->hwc.sm4.cmd_final.taglen = taglen;
+}
+
+/* setkey */
+static int cmh_sm4_aead_setkey(struct crypto_aead *tfm, const u8 *key,
+                              unsigned int keylen)
+{
+       struct cmh_sm4_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       /* SM4 always uses 128-bit keys */
+       if (keylen != CMH_SM4_KEY_SIZE)
+               return -EINVAL;
+
+       if (tctx->sw_cipher) {
+               int ret;
+
+               ret = crypto_cipher_setkey(tctx->sw_cipher, key, keylen);
+               if (ret)
+                       return ret;
+       }
+
+       return cmh_key_setkey_raw(&tctx->key, key, keylen, CORE_ID_SM4);
+}
+
+static int cmh_sm4_aead_setauthsize(struct crypto_aead *tfm,
+                                   unsigned int authsize)
+{
+       struct cmh_sm4_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       const struct cmh_sm4_aead_info *info = cmh_sm4_aead_get_info(tfm);
+
+       if (info->type == CMH_SM4_AEAD_GCM) {
+               /* eSW enforces taglen == 16 for SM4 GCM (EIP40_SM4_TAG_SIZE) */
+               if (authsize != 16)
+                       return -EINVAL;
+       } else {
+               /* CCM: accept 4, 6, 8, 10, 12, 14, 16 per RFC 3610 */
+               if (authsize < 4 || authsize > 16 || (authsize & 1))
+                       return -EINVAL;
+       }
+
+       tctx->authsize = authsize;
+       return 0;
+}
+
+static int cmh_sm4_aead_init_tfm(struct crypto_aead *tfm)
+{
+       struct cmh_sm4_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       const struct cmh_sm4_aead_info *info = cmh_sm4_aead_get_info(tfm);
+
+       memset(tctx, 0, sizeof(*tctx));
+       tctx->authsize = info->maxauthsize;
+
+       if (info->type == CMH_SM4_AEAD_CCM) {
+               struct crypto_cipher *ci;
+
+               ci = crypto_alloc_cipher("sm4", 0, 0);
+               if (IS_ERR(ci))
+                       return PTR_ERR(ci);
+               tctx->sw_cipher = ci;
+       }
+
+       crypto_aead_set_reqsize(tfm, sizeof(struct cmh_sm4_aead_reqctx));
+       return 0;
+}
+
+static void cmh_sm4_aead_exit_tfm(struct crypto_aead *tfm)
+{
+       struct cmh_sm4_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+
+       if (tctx->sw_cipher)
+               crypto_free_cipher(tctx->sw_cipher);
+       cmh_key_destroy(&tctx->key);
+}
+
+/* DMA unmap helper */
+static void cmh_sm4_aead_unmap_dma(struct cmh_sm4_aead_reqctx *rctx)
+{
+       u32 tag_map_len;
+
+       cmh_dma_unmap_single(rctx->iv_dma, rctx->iv_map_len, DMA_TO_DEVICE);
+       tag_map_len = rctx->empty_gcm_fallback ?
+                     SM4_GCM_HW_IV_SIZE : rctx->authsize;
+       cmh_dma_unmap_single(rctx->tag_dma, tag_map_len,
+                            (rctx->encrypting || rctx->empty_gcm_fallback) ?
+                             DMA_FROM_DEVICE : DMA_TO_DEVICE);
+       if (rctx->cryptlen > 0) {
+               cmh_dma_unmap_single(rctx->out_dma, rctx->cryptlen,
+                                    DMA_FROM_DEVICE);
+               cmh_dma_unmap_single(rctx->in_dma, rctx->cryptlen,
+                                    DMA_TO_DEVICE);
+       }
+       if (rctx->assoclen > 0)
+               cmh_dma_unmap_single(rctx->aad_dma, rctx->assoclen,
+                                    DMA_TO_DEVICE);
+}
+
+static void cmh_sm4_aead_free_bufs(struct cmh_sm4_aead_reqctx *rctx)
+{
+       kfree(rctx->iv_buf);
+       rctx->iv_buf = NULL;
+       kfree(rctx->tag_buf);
+       rctx->tag_buf = NULL;
+       kfree_sensitive(rctx->out_buf);
+       rctx->out_buf = NULL;
+       kfree_sensitive(rctx->in_buf);
+       rctx->in_buf = NULL;
+       kfree(rctx->aad_buf);
+       rctx->aad_buf = NULL;
+}
+
+static void cmh_sm4_aead_complete(void *data, int error)
+{
+       struct aead_request *req = data;
+       struct cmh_sm4_aead_reqctx *rctx = aead_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       cmh_sm4_aead_unmap_dma(rctx);
+
+       /*
+        * Map HW error on decrypt to -EBADMSG.  The eSW SM4 core uses a
+        * single error code (-EIO) for both authentication failures and
+        * other core errors (e.g. DMA timeout), so we cannot distinguish
+        * them from the MBX_STATUS alone.  In practice the only error
+        * during a well-formed AEAD decrypt is auth-tag mismatch; a DMA
+        * timeout would indicate a fatal HW problem where -EBADMSG vs
+        * -EIO is moot.  The kernel crypto API requires -EBADMSG for
+        * AEAD authentication failures.
+        */
+       if (error == -EIO && !rctx->encrypting)
+               error = -EBADMSG;
+
+       if (!error) {
+               if (rctx->empty_gcm_fallback && !rctx->encrypting) {
+                       if (crypto_memneq(rctx->tag_buf, rctx->in_buf,
+                                         rctx->authsize))
+                               error = -EBADMSG;
+               }
+               if (!error && rctx->cryptlen > 0)
+                       scatterwalk_map_and_copy(rctx->out_buf, req->dst,
+                                                req->assoclen,
+                                               rctx->cryptlen, 1);
+               if (!error && rctx->encrypting)
+                       scatterwalk_map_and_copy(rctx->tag_buf, req->dst,
+                                                req->assoclen +
+                                               rctx->cryptlen,
+                                               rctx->authsize, 1);
+       }
+
+       cmh_sm4_aead_free_bufs(rctx);
+       cmh_complete(&req->base, error);
+}
+
+/*
+ * GCM empty-input fallback (SM4).
+ *
+ * When both AAD and plaintext are empty, GCM reduces to:
+ *   tag = E(K, J0) where J0 = nonce || 0x00000001
+ *
+ * The eSW GCM engine rejects this degenerate case, so we compute it
+ * via a single ECB block encryption of J0.
+ *
+ * VCQ: [SYS_CMD_WRITE] + SM4_CMD_INIT(ECB) + SM4_CMD_FINAL + FLUSH
+ */
+static int cmh_sm4_gcm_empty(struct aead_request *req, u32 sm4_op)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct cmh_sm4_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       struct cmh_sm4_aead_reqctx *rctx = aead_request_ctx(req);
+       struct vcq_cmd cmds[CMH_SM4_AEAD_MAX_PAYLOAD];
+       u64 key_ref;
+       u32 keylen, authsize;
+       struct core_dispatch d;
+       s32 target_mbx;
+       u32 core_id;
+       u32 idx;
+       int ret;
+       gfp_t gfp;
+
+       authsize = tctx->authsize;
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       memset(rctx, 0, sizeof(*rctx));
+       rctx->cryptlen = 0;
+       rctx->assoclen = 0;
+       rctx->authsize = authsize;
+       rctx->encrypting = (sm4_op == SM4_OP_ENCRYPT);
+       rctx->empty_gcm_fallback = true;
+
+       /* Build J0 = nonce || 0x00000001 in iv_buf */
+       rctx->iv_buf = kzalloc(SM4_GCM_HW_IV_SIZE, gfp);
+       if (!rctx->iv_buf)
+               return -ENOMEM;
+       memcpy(rctx->iv_buf, req->iv, SM4_GCM_IV_SIZE);
+       rctx->iv_buf[15] = 0x01;
+       rctx->iv_map_len = SM4_GCM_HW_IV_SIZE;
+
+       rctx->iv_dma = cmh_dma_map_single(rctx->iv_buf, SM4_GCM_HW_IV_SIZE,
+                                         DMA_TO_DEVICE);
+       if (cmh_dma_map_error(rctx->iv_dma)) {
+               ret = -ENOMEM;
+               goto out_free_iv;
+       }
+
+       /* Tag buffer -- receives E(K, J0) output */
+       rctx->tag_buf = kzalloc(SM4_GCM_HW_IV_SIZE, gfp);
+       if (!rctx->tag_buf) {
+               ret = -ENOMEM;
+               goto out_unmap_iv;
+       }
+       rctx->tag_dma = cmh_dma_map_single(rctx->tag_buf, SM4_GCM_HW_IV_SIZE,
+                                          DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(rctx->tag_dma)) {
+               ret = -ENOMEM;
+               goto out_free_tag;
+       }
+
+       /* For decrypt: read expected tag from request */
+       if (!rctx->encrypting) {
+               rctx->in_buf = kmalloc(authsize, gfp);
+               if (!rctx->in_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_tag;
+               }
+               scatterwalk_map_and_copy(rctx->in_buf, req->src, 0,
+                                        authsize, 0);
+       }
+
+       /* Resolve key */
+       idx = 0;
+       rctx->key_dma = tctx->key.raw.dma;
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP,
+                         (u64)rctx->key_dma, SYS_REF_NONE,
+                         tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       keylen = tctx->key.raw.len;
+       d = cmh_core_select_instance(CMH_CORE_SM4);
+       target_mbx = d.mbx_idx;
+       core_id = d.core_id;
+
+       /* ECB INIT: single block encryption of J0 */
+       vcq_add_sm4_aead_init(&cmds[idx++], core_id, key_ref,
+                             0, keylen, 0, SM4_MODE_ECB, SM4_OP_ENCRYPT,
+                             0, SM4_GCM_HW_IV_SIZE);
+
+       /* FINAL: J0 in, E(K,J0) out */
+       vcq_add_sm4_aead_final(&cmds[idx++], core_id,
+                              (u64)rctx->iv_dma, (u64)rctx->tag_dma,
+                              0, SM4_GCM_HW_IV_SIZE, 0);
+
+       vcq_add_flush(&cmds[idx++], core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_SM4_AEAD_MAX_PACKED,
+                                           target_mbx,
+                                           cmh_sm4_aead_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto out_free_in;
+
+       return -EINPROGRESS;
+
+out_free_in:
+       kfree_sensitive(rctx->in_buf);
+out_unmap_tag:
+       cmh_dma_unmap_single(rctx->tag_dma, SM4_GCM_HW_IV_SIZE,
+                            DMA_FROM_DEVICE);
+out_free_tag:
+       kfree(rctx->tag_buf);
+out_unmap_iv:
+       cmh_dma_unmap_single(rctx->iv_dma, SM4_GCM_HW_IV_SIZE, DMA_TO_DEVICE);
+out_free_iv:
+       kfree(rctx->iv_buf);
+       return ret;
+}
+
+/*
+ * CCM empty-input fallback (SM4).
+ *
+ * When both AAD and plaintext are empty, CCM reduces to:
+ *   T  = E(K, B0)    -- CBC-MAC of the single formatting block
+ *   S0 = E(K, A0)    -- CTR block zero
+ *   tag = (T XOR S0)[0..authsize-1]
+ *
+ * The eSW rejects this degenerate case, so the driver computes it
+ * synchronously via two crypto_cipher single-block encryptions.
+ */
+static int cmh_sm4_ccm_empty(struct aead_request *req, u32 sm4_op)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct cmh_sm4_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       u32 authsize = tctx->authsize;
+       u8 b0[CMH_SM4_BLOCK_SIZE], a0[CMH_SM4_BLOCK_SIZE];
+       u8 t[CMH_SM4_BLOCK_SIZE], s0[CMH_SM4_BLOCK_SIZE];
+       u8 tag[CMH_SM4_BLOCK_SIZE];
+       u8 L;
+       u32 i;
+
+       /* Defense-in-depth: iv[0] = L-1, valid L is 2..8 per RFC 3610 S2.1 */
+       if (WARN_ON_ONCE(req->iv[0] < 1 || req->iv[0] > 7))
+               return -EINVAL;
+
+       L = req->iv[0] + 1;
+
+       if (tctx->key.mode != CMH_KEY_RAW)
+               return -EOPNOTSUPP;
+
+       /* B0: flags || nonce || Q(=0).  Adata=0, t=authsize, q=L. */
+       memset(b0, 0, CMH_SM4_BLOCK_SIZE);
+       b0[0] = (u8)(8 * ((authsize - 2) / 2) + (L - 1));
+       memcpy(&b0[1], &req->iv[1], 15 - L);
+
+       /* A0: (L-1) || nonce || counter(=0) */
+       memset(a0, 0, CMH_SM4_BLOCK_SIZE);
+       a0[0] = (u8)(L - 1);
+       memcpy(&a0[1], &req->iv[1], 15 - L);
+
+       crypto_cipher_encrypt_one(tctx->sw_cipher, t, b0);
+       crypto_cipher_encrypt_one(tctx->sw_cipher, s0, a0);
+
+       for (i = 0; i < authsize; i++)
+               tag[i] = t[i] ^ s0[i];
+
+       if (sm4_op == SM4_OP_ENCRYPT) {
+               scatterwalk_map_and_copy(tag, req->dst,
+                                        req->assoclen, authsize, 1);
+       } else {
+               u8 expected[CMH_SM4_BLOCK_SIZE];
+
+               scatterwalk_map_and_copy(expected, req->src,
+                                        req->assoclen, authsize, 0);
+               if (crypto_memneq(tag, expected, authsize))
+                       return -EBADMSG;
+       }
+
+       return 0;
+}
+
+static int cmh_sm4_aead_crypt(struct aead_request *req, u32 sm4_op)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct cmh_sm4_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       const struct cmh_sm4_aead_info *info = cmh_sm4_aead_get_info(tfm);
+       struct cmh_sm4_aead_reqctx *rctx = aead_request_ctx(req);
+       struct vcq_cmd cmds[CMH_SM4_AEAD_MAX_PAYLOAD];
+       u64 key_ref;
+       u32 keylen, authsize, cryptlen;
+       struct core_dispatch d;
+       s32 target_mbx;
+       u32 core_id;
+       u32 idx;
+       int ret;
+       gfp_t gfp;
+
+       if (tctx->key.mode == CMH_KEY_NONE)
+               return -ENOKEY;
+
+       authsize = tctx->authsize;
+
+       if (sm4_op == SM4_OP_ENCRYPT) {
+               cryptlen = req->cryptlen;
+       } else {
+               if (req->cryptlen < authsize)
+                       return -EINVAL;
+               cryptlen = req->cryptlen - authsize;
+       }
+
+       /*
+        * Validate CCM IV format early -- the empty-input fallback and
+        * nonce extraction both depend on iv[0] being in range [1,7].
+        */
+       if (info->type == CMH_SM4_AEAD_CCM) {
+               if (req->iv[0] < 1 || req->iv[0] > 7)
+                       return -EINVAL;
+       }
+
+       /*
+        * The CMH eSW rejects SM4 GCM/CCM when both aadlen and iolen
+        * are zero.  For GCM, the tag is simply E(K, J0) -- use ECB
+        * fallback.  For CCM, compute tag = E(K,B0) XOR E(K,A0) in SW.
+        */
+       if (cryptlen == 0 && req->assoclen == 0) {
+               if (info->type == CMH_SM4_AEAD_GCM)
+                       return cmh_sm4_gcm_empty(req, sm4_op);
+               return cmh_sm4_ccm_empty(req, sm4_op);
+       }
+
+       /*
+        * HW uses a proprietary LLI scatter-gather format that is
+        * incompatible with struct scatterlist, so the payload is
+        * linearised into contiguous buffers for DMA.  Cap total
+        * size to prevent excessive memory consumption.
+        */
+       if ((u64)cryptlen + req->assoclen > SZ_1M)
+               return -EINVAL;
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       memset(rctx, 0, sizeof(*rctx));
+       rctx->cryptlen = cryptlen;
+       rctx->assoclen = req->assoclen;
+       rctx->authsize = authsize;
+       rctx->encrypting = (sm4_op == SM4_OP_ENCRYPT);
+
+       /* Linearise AAD */
+       if (req->assoclen > 0) {
+               rctx->aad_buf = kmalloc(req->assoclen, gfp);
+               if (!rctx->aad_buf)
+                       return -ENOMEM;
+               scatterwalk_map_and_copy(rctx->aad_buf, req->src,
+                                        0, req->assoclen, 0);
+               rctx->aad_dma = cmh_dma_map_single(rctx->aad_buf,
+                                                  req->assoclen,
+                                                   DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->aad_dma)) {
+                       ret = -ENOMEM;
+                       goto out_free_aad;
+               }
+       }
+
+       /* Linearise input */
+       if (cryptlen > 0) {
+               rctx->in_buf = kmalloc(cryptlen, gfp);
+               if (!rctx->in_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_aad;
+               }
+               scatterwalk_map_and_copy(rctx->in_buf, req->src,
+                                        req->assoclen, cryptlen, 0);
+               rctx->in_dma = cmh_dma_map_single(rctx->in_buf, cryptlen,
+                                                 DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->in_dma)) {
+                       ret = -ENOMEM;
+                       goto out_free_in;
+               }
+       }
+
+       /* Allocate output buffer */
+       if (cryptlen > 0) {
+               rctx->out_buf = kmalloc(cryptlen, gfp);
+               if (!rctx->out_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_in;
+               }
+               rctx->out_dma = cmh_dma_map_single(rctx->out_buf, cryptlen,
+                                                  DMA_FROM_DEVICE);
+               if (cmh_dma_map_error(rctx->out_dma)) {
+                       ret = -ENOMEM;
+                       goto out_free_out;
+               }
+       }
+
+       /* Tag buffer */
+       rctx->tag_buf = kmalloc(authsize, gfp);
+       if (!rctx->tag_buf) {
+               ret = -ENOMEM;
+               goto out_unmap_out;
+       }
+
+       if (!rctx->encrypting) {
+               scatterwalk_map_and_copy(rctx->tag_buf, req->src,
+                                        req->assoclen + cryptlen,
+                                       authsize, 0);
+       } else {
+               memset(rctx->tag_buf, 0, authsize);
+       }
+
+       rctx->tag_dma = cmh_dma_map_single(rctx->tag_buf, authsize,
+                                          rctx->encrypting ?
+                                           DMA_FROM_DEVICE : DMA_TO_DEVICE);
+       if (cmh_dma_map_error(rctx->tag_dma)) {
+               ret = -ENOMEM;
+               goto out_free_tag;
+       }
+
+       /* Map IV/nonce */
+       if (info->type == CMH_SM4_AEAD_GCM) {
+               rctx->iv_buf = kzalloc(SM4_GCM_HW_IV_SIZE, gfp);
+               if (!rctx->iv_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_tag;
+               }
+               memcpy(rctx->iv_buf, req->iv, SM4_GCM_IV_SIZE);
+               rctx->iv_map_len = SM4_GCM_HW_IV_SIZE;
+               rctx->iv_dma = cmh_dma_map_single(rctx->iv_buf,
+                                                 rctx->iv_map_len,
+                                                  DMA_TO_DEVICE);
+       } else {
+               u32 noncelen;
+
+               if (req->iv[0] < 1 || req->iv[0] > 7) {
+                       ret = -EINVAL;
+                       goto out_unmap_tag;
+               }
+               noncelen = 14 - req->iv[0];
+
+               rctx->iv_buf = kmemdup(req->iv + 1, noncelen, gfp);
+               if (!rctx->iv_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_tag;
+               }
+               rctx->iv_map_len = noncelen;
+               rctx->iv_dma = cmh_dma_map_single(rctx->iv_buf,
+                                                 rctx->iv_map_len,
+                                                  DMA_TO_DEVICE);
+       }
+       if (cmh_dma_map_error(rctx->iv_dma)) {
+               ret = -ENOMEM;
+               goto out_free_iv;
+       }
+
+       /* Resolve key reference */
+       idx = 0;
+
+       rctx->key_dma = tctx->key.raw.dma;
+       rctx->keylen = tctx->key.raw.len;
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP,
+                         (u64)rctx->key_dma, SYS_REF_NONE,
+                         tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       keylen = tctx->key.raw.len;
+       d = cmh_core_select_instance(CMH_CORE_SM4);
+       target_mbx = d.mbx_idx;
+       core_id = d.core_id;
+
+       /* Build INIT command */
+       if (info->type == CMH_SM4_AEAD_CCM) {
+               vcq_add_sm4_ccm_init(&cmds[idx++], core_id, key_ref,
+                                    (u64)rctx->iv_dma, keylen,
+                                    rctx->iv_map_len, sm4_op,
+                                    req->assoclen, cryptlen, authsize);
+       } else {
+               vcq_add_sm4_aead_init(&cmds[idx++], core_id, key_ref,
+                                     (u64)rctx->iv_dma, keylen,
+                                     SM4_GCM_HW_IV_SIZE, info->sm4_mode,
+                                     sm4_op, req->assoclen, cryptlen);
+       }
+
+       if (req->assoclen > 0)
+               vcq_add_sm4_aad_final(&cmds[idx++], core_id,
+                                     (u64)rctx->aad_dma, req->assoclen);
+
+       vcq_add_sm4_aead_final(&cmds[idx++], core_id,
+                              cryptlen > 0 ? (u64)rctx->in_dma : 0,
+                              cryptlen > 0 ? (u64)rctx->out_dma : 0,
+                              (u64)rctx->tag_dma, cryptlen, authsize);
+
+       vcq_add_flush(&cmds[idx++], core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_SM4_AEAD_MAX_PACKED,
+                                           target_mbx,
+                                           cmh_sm4_aead_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto out_cleanup_all;
+
+       return -EINPROGRESS;
+
+out_cleanup_all:
+       cmh_dma_unmap_single(rctx->iv_dma, rctx->iv_map_len, DMA_TO_DEVICE);
+out_free_iv:
+       kfree(rctx->iv_buf);
+out_unmap_tag:
+       cmh_dma_unmap_single(rctx->tag_dma, authsize,
+                            rctx->encrypting ? DMA_FROM_DEVICE :
+                                              DMA_TO_DEVICE);
+out_free_tag:
+       kfree(rctx->tag_buf);
+out_unmap_out:
+       if (cryptlen > 0)
+               cmh_dma_unmap_single(rctx->out_dma, cryptlen, DMA_FROM_DEVICE);
+out_free_out:
+       kfree_sensitive(rctx->out_buf);
+out_unmap_in:
+       if (cryptlen > 0)
+               cmh_dma_unmap_single(rctx->in_dma, cryptlen, DMA_TO_DEVICE);
+out_free_in:
+       kfree_sensitive(rctx->in_buf);
+out_unmap_aad:
+       if (req->assoclen > 0)
+               cmh_dma_unmap_single(rctx->aad_dma, req->assoclen,
+                                    DMA_TO_DEVICE);
+out_free_aad:
+       kfree(rctx->aad_buf);
+       return ret;
+}
+
+static int cmh_sm4_aead_encrypt(struct aead_request *req)
+{
+       return cmh_sm4_aead_crypt(req, SM4_OP_ENCRYPT);
+}
+
+static int cmh_sm4_aead_decrypt(struct aead_request *req)
+{
+       return cmh_sm4_aead_crypt(req, SM4_OP_DECRYPT);
+}
+
+/* Registration */
+
+static struct cmh_sm4_aead_drv sm4_aead_drv_algs[ARRAY_SIZE(sm4_aead_algs)];
+
+/**
+ * cmh_sm4_aead_register() - Register SM4-GCM/CCM AEAD algorithms with the crypto framework
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_sm4_aead_register(void)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(sm4_aead_algs); i++) {
+               const struct cmh_sm4_aead_info *info = &sm4_aead_algs[i];
+               struct cmh_sm4_aead_drv *drv = &sm4_aead_drv_algs[i];
+               struct aead_alg *alg = &drv->alg;
+
+               drv->info = info;
+
+               memset(alg, 0, sizeof(*alg));
+
+               alg->setkey      = cmh_sm4_aead_setkey;
+               alg->setauthsize = cmh_sm4_aead_setauthsize;
+               alg->encrypt     = cmh_sm4_aead_encrypt;
+               alg->decrypt     = cmh_sm4_aead_decrypt;
+               alg->init        = cmh_sm4_aead_init_tfm;
+               alg->exit        = cmh_sm4_aead_exit_tfm;
+               alg->ivsize      = info->ivsize;
+               alg->maxauthsize = info->maxauthsize;
+
+               strscpy(alg->base.cra_name, info->alg_name,
+                       CRYPTO_MAX_ALG_NAME);
+               strscpy(alg->base.cra_driver_name, info->drv_name,
+                       CRYPTO_MAX_ALG_NAME);
+               alg->base.cra_priority  = 300;
+               alg->base.cra_flags     = CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                         CRYPTO_ALG_ASYNC;
+               alg->base.cra_blocksize = 1;
+               alg->base.cra_ctxsize  = sizeof(struct cmh_sm4_aead_tfm_ctx);
+               alg->base.cra_module   = THIS_MODULE;
+
+               ret = crypto_register_aead(alg);
+               if (ret) {
+                       dev_err(cmh_dev(), "cmh_sm4_aead: failed to register %s (rc=%d)\n",
+                               info->alg_name, ret);
+                       goto err_unregister;
+               }
+
+               dev_dbg(cmh_dev(), "cmh_sm4_aead: registered %s\n", info->alg_name);
+       }
+
+       return 0;
+
+err_unregister:
+       while (i--)
+               crypto_unregister_aead(&sm4_aead_drv_algs[i].alg);
+       return ret;
+}
+
+/**
+ * cmh_sm4_aead_unregister() - Unregister SM4 AEAD algorithms from the crypto framework
+ */
+void cmh_sm4_aead_unregister(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(sm4_aead_algs); i++) {
+               crypto_unregister_aead(&sm4_aead_drv_algs[i].alg);
+               dev_dbg(cmh_dev(), "cmh_sm4_aead: unregistered %s\n",
+                       sm4_aead_algs[i].alg_name);
+       }
+}
diff --git a/drivers/crypto/cmh/cmh_sm4_cmac.c b/drivers/crypto/cmh/cmh_sm4_cmac.c
new file mode 100644
index 000000000000..9304dede3f68
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_sm4_cmac.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API SM4-CMAC / SM4-XCBC (ahash) Driver
+ *
+ * Registers cmac(sm4) and xcbc(sm4) as ahash algorithms.
+ *
+ * Both produce a 16-byte tag (MAC) from a key and message.
+ * VCQ sequence: [SYS_CMD_WRITE] + SM4_CMD_INIT(CMAC/XCBC) +
+ *               SM4_CMD_AAD_FINAL + SM4_CMD_FINAL + FLUSH
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/internal/cipher.h>
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "cmh_sm4.h"
+#include "cmh_vcq.h"
+#include "cmh_sm4_abi.h"
+#include "cmh_sys_abi.h"
+#include "cmh_sys.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+#include "cmh_key.h"
+
+#define SM4_MAC_DIGEST_SIZE    16U
+#define SM4_MAC_BLOCK_SIZE     16U
+/*
+ * Maximum accumulated data for SM4 MAC -- driver-imposed, not HW.
+ *
+ * The SM4 core does not expose external save/restore VCQ commands,
+ * so the driver must accumulate all data in kernel memory via
+ * .update() and submit it atomically in .final().  This cap limits
+ * the per-request kernel allocation.
+ */
+#define SM4_MAC_MAX_DATA       (64 * 1024)
+
+struct cmh_sm4_mac_alg_info {
+       u32         sm4_mode;   /* SM4_MODE_CMAC or SM4_MODE_XCBC */
+       const char *alg_name;
+       const char *drv_name;
+};
+
+static const struct cmh_sm4_mac_alg_info sm4_mac_algs[] = {
+       { SM4_MODE_CMAC, "cmac(sm4)", "cri-cmh-cmac-sm4" },
+       { SM4_MODE_XCBC, "xcbc(sm4)", "cri-cmh-xcbc-sm4" },
+};
+
+struct cmh_sm4_mac_tfm_ctx {
+       struct cmh_key_ctx key;
+       u32 sm4_mode;
+       struct crypto_cipher *sw_cipher;        /* XCBC empty-input fallback */
+       /* Cached XCBC subkeys (derived at setkey time for concurrency safety) */
+       u8 xcbc_k1[CMH_SM4_BLOCK_SIZE];         /* K1 = E(K, 0x01..01) */
+       u8 xcbc_k3[CMH_SM4_BLOCK_SIZE];         /* K3 = E(K, 0x03..03) */
+       bool xcbc_subkeys_valid;
+       spinlock_t         chunk_lock;  /* protects all_chunks */
+       struct list_head   all_chunks;  /* orphan-safe chunk tracking */
+};
+
+/* Per-request context (lives in ahash_request::__ctx) */
+/* Chunk node for O(1) update() appends */
+struct cmh_sm4_mac_chunk {
+       struct list_head list;
+       struct list_head tfm_node; /* per-tfm orphan tracking */
+       u32 len;
+       u8  data[];
+};
+
+/* Per-request context (lives in ahash_request::__ctx) */
+
+#define CMH_SM4_MAC_MAX_PAYLOAD                5
+#define CMH_SM4_MAC_MAX_PACKED         (CMH_SM4_MAC_MAX_PAYLOAD * 2)
+
+struct cmh_sm4_mac_reqctx {
+       struct list_head chunks;
+       u32  total_len;
+       u8  *buf;               /* linearised in final() */
+       /* DMA state for async final */
+       dma_addr_t key_dma;
+       dma_addr_t in_dma;
+       dma_addr_t tag_dma;
+       u8 *tag_buf;
+       u32 keylen;
+       struct vcq_cmd packed[CMH_SM4_MAC_MAX_PACKED];
+};
+
+/* Flat state for export/import -- holds accumulated input data only */
+struct cmh_sm4_mac_export_state {
+       u32 total_len;
+       u8  data[];
+};
+
+/*
+ * Flat state buffer for export/import.  The CMH SM4 core does not
+ * support save/restore of intermediate MAC state, so this driver
+ * accumulates input in SW and serialises the buffer on export.
+ *
+ * PAGE_SIZE (4096) caps the exportable accumulated-data window.
+ * Full-range export is not feasible because the crypto subsystem
+ * pre-allocates statesize bytes per request.  Export returns -EINVAL
+ * if the caller has accumulated more than CMH_SM4_MAC_EXPORT_MAX.
+ */
+#define CMH_SM4_MAC_STATE_SIZE 4096
+#define CMH_SM4_MAC_EXPORT_MAX \
+       (CMH_SM4_MAC_STATE_SIZE - sizeof(struct cmh_sm4_mac_export_state))
+
+struct cmh_sm4_mac_drv {
+       struct ahash_alg                   alg;
+       const struct cmh_sm4_mac_alg_info *info;
+};
+
+static int cmh_sm4_mac_setkey(struct crypto_ahash *tfm, const u8 *key,
+                             unsigned int keylen)
+{
+       struct cmh_sm4_mac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       int ret;
+
+       if (keylen != CMH_SM4_KEY_SIZE)
+               return -EINVAL;
+
+       if (tctx->sw_cipher) {
+               u8 const1[CMH_SM4_BLOCK_SIZE], const3[CMH_SM4_BLOCK_SIZE];
+
+               ret = crypto_cipher_setkey(tctx->sw_cipher, key, keylen);
+               if (ret)
+                       return ret;
+
+               /* Pre-derive XCBC subkeys for concurrent-safe final() */
+               memset(const1, 0x01, CMH_SM4_BLOCK_SIZE);
+               memset(const3, 0x03, CMH_SM4_BLOCK_SIZE);
+               crypto_cipher_encrypt_one(tctx->sw_cipher, tctx->xcbc_k1,
+                                         const1);
+               crypto_cipher_encrypt_one(tctx->sw_cipher, tctx->xcbc_k3,
+                                         const3);
+
+               /*
+                * Leave sw_cipher keyed with K1 permanently.
+                * final() only needs E(K1, block) and never touches the
+                * original key again, so no re-keying in the hot path
+                * eliminates the per-tfm concurrency race entirely.
+                */
+               ret = crypto_cipher_setkey(tctx->sw_cipher, tctx->xcbc_k1,
+                                          CMH_SM4_BLOCK_SIZE);
+               if (ret)
+                       return ret;
+       }
+
+       ret = cmh_key_setkey_raw(&tctx->key, key, keylen, CORE_ID_SM4);
+       if (ret)
+               return ret;
+
+       if (tctx->sw_cipher)
+               tctx->xcbc_subkeys_valid = true;
+
+       return 0;
+}
+
+static void cmh_sm4_mac_free_chunks(struct cmh_sm4_mac_reqctx *rctx,
+                                   struct cmh_sm4_mac_tfm_ctx *tctx)
+{
+       struct cmh_sm4_mac_chunk *c, *tmp;
+
+       spin_lock_bh(&tctx->chunk_lock);
+       list_for_each_entry_safe(c, tmp, &rctx->chunks, list) {
+               list_del(&c->list);
+               list_del(&c->tfm_node);
+               kfree_sensitive(c);
+       }
+       spin_unlock_bh(&tctx->chunk_lock);
+}
+
+static int cmh_sm4_mac_init(struct ahash_request *req)
+{
+       struct cmh_sm4_mac_reqctx *rctx = ahash_request_ctx(req);
+
+       memset(rctx, 0, sizeof(*rctx));
+       INIT_LIST_HEAD(&rctx->chunks);
+       return 0;
+}
+
+static int cmh_sm4_mac_update(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_sm4_mac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_sm4_mac_reqctx *rctx = ahash_request_ctx(req);
+       struct cmh_sm4_mac_chunk *chunk;
+       gfp_t gfp;
+       int ret;
+
+       if (!req->nbytes)
+               return 0;
+
+       if (req->nbytes > SM4_MAC_MAX_DATA - rctx->total_len) {
+               ret = -EINVAL;
+               goto err_free_chunks;
+       }
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+       chunk = kmalloc(sizeof(*chunk) + req->nbytes, gfp);
+       if (!chunk) {
+               ret = -ENOMEM;
+               goto err_free_chunks;
+       }
+
+       chunk->len = req->nbytes;
+       if (req->base.flags & CRYPTO_AHASH_REQ_VIRT)
+               memcpy(chunk->data, req->svirt, req->nbytes);
+       else
+               scatterwalk_map_and_copy(chunk->data, req->src,
+                                        0, req->nbytes, 0);
+       list_add_tail(&chunk->list, &rctx->chunks);
+       spin_lock_bh(&tctx->chunk_lock);
+       list_add_tail(&chunk->tfm_node, &tctx->all_chunks);
+       spin_unlock_bh(&tctx->chunk_lock);
+       rctx->total_len += req->nbytes;
+       return 0;
+
+err_free_chunks:
+       /*
+        * Terminal error -- free all previously accumulated chunks.
+        * callers may not call .final() on error, so they would leak.
+        */
+       cmh_sm4_mac_free_chunks(rctx, tctx);
+       return ret;
+}
+
+static void cmh_sm4_mac_complete(void *data, int error)
+{
+       struct ahash_request *req = data;
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_sm4_mac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_sm4_mac_reqctx *rctx = ahash_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       if (rctx->total_len > 0)
+               cmh_dma_unmap_single(rctx->in_dma, rctx->total_len,
+                                    DMA_TO_DEVICE);
+       cmh_dma_unmap_single(rctx->tag_dma, SM4_MAC_DIGEST_SIZE,
+                            DMA_FROM_DEVICE);
+
+       if (!error)
+               memcpy(req->result, rctx->tag_buf, SM4_MAC_DIGEST_SIZE);
+
+       kfree(rctx->tag_buf);
+       rctx->tag_buf = NULL;
+       cmh_sm4_mac_free_chunks(rctx, tctx);
+       kfree_sensitive(rctx->buf);
+       rctx->buf = NULL;
+       rctx->total_len = 0;
+       cmh_complete(&req->base, error);
+}
+
+static int cmh_sm4_mac_final(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_sm4_mac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_sm4_mac_reqctx *rctx = ahash_request_ctx(req);
+       struct vcq_cmd cmds[CMH_SM4_MAC_MAX_PAYLOAD];
+       u64 key_ref;
+       u32 keylen;
+       struct core_dispatch d;
+       s32 target_mbx;
+       u32 core_id;
+       u32 idx;
+       int ret;
+       gfp_t gfp;
+
+       if (tctx->key.mode == CMH_KEY_NONE) {
+               ret = -ENOKEY;
+               goto out_free_chunks;
+       }
+
+       /*
+        * XCBC empty-input SW fallback (RFC 3566).
+        *
+        * For a zero-length message:
+        *   K1 = E(K, 0x01010101...)  -- encryption subkey
+        *   K3 = E(K, 0x03030303...)  -- incomplete-block subkey
+        *   pad = 0x80 00...00        -- single 1 bit + 127 zero bits
+        *   tag = E(K1, pad XOR K3)
+        *
+        * The eSW produces incorrect output for this case, so the driver
+        * computes it synchronously using crypto_cipher.
+        *
+        * For DS keys we cannot derive subkeys (no raw key material),
+        * and the HW also cannot handle empty XCBC correctly, so
+        * return -EOPNOTSUPP.
+        */
+       if (rctx->total_len == 0 && tctx->sm4_mode == SM4_MODE_XCBC) {
+               u8 block[CMH_SM4_BLOCK_SIZE];
+               u32 i;
+
+               if (tctx->key.mode != CMH_KEY_RAW ||
+                   !tctx->xcbc_subkeys_valid) {
+                       cmh_sm4_mac_free_chunks(rctx, tctx);
+                       return -EOPNOTSUPP;
+               }
+
+               /* block = pad XOR K3 */
+               memset(block, 0, CMH_SM4_BLOCK_SIZE);
+               block[0] = 0x80;
+               for (i = 0; i < CMH_SM4_BLOCK_SIZE; i++)
+                       block[i] ^= tctx->xcbc_k3[i];
+
+               /*
+                * tag = E(K1, block)
+                *
+                * sw_cipher is permanently keyed with K1 (set at setkey
+                * time), so this is safe for concurrent requests sharing
+                * the same tfm -- no re-keying, no race.
+                */
+               crypto_cipher_encrypt_one(tctx->sw_cipher, req->result,
+                                         block);
+
+               cmh_sm4_mac_free_chunks(rctx, tctx);
+               return 0;
+       }
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       /* Linearise chunks into a single contiguous buffer for DMA */
+       if (rctx->total_len > 0) {
+               struct cmh_sm4_mac_chunk *c;
+               u32 off = 0;
+
+               rctx->buf = kmalloc(rctx->total_len, gfp);
+               if (!rctx->buf) {
+                       ret = -ENOMEM;
+                       goto out_free_chunks;
+               }
+               list_for_each_entry(c, &rctx->chunks, list) {
+                       memcpy(rctx->buf + off, c->data, c->len);
+                       off += c->len;
+               }
+       }
+
+       rctx->tag_buf = kzalloc(SM4_MAC_DIGEST_SIZE, gfp);
+       if (!rctx->tag_buf) {
+               ret = -ENOMEM;
+               goto out_free_buf;
+       }
+
+       rctx->tag_dma = cmh_dma_map_single(rctx->tag_buf,
+                                          SM4_MAC_DIGEST_SIZE,
+                                           DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(rctx->tag_dma)) {
+               ret = -ENOMEM;
+               goto out_free_tag;
+       }
+
+       if (rctx->total_len > 0) {
+               rctx->in_dma = cmh_dma_map_single(rctx->buf, rctx->total_len,
+                                                 DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->in_dma)) {
+                       ret = -ENOMEM;
+                       goto out_unmap_tag;
+               }
+       }
+
+       idx = 0;
+
+       rctx->key_dma = tctx->key.raw.dma;
+       rctx->keylen = tctx->key.raw.len;
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP,
+                         (u64)rctx->key_dma, SYS_REF_NONE,
+                         tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       keylen = tctx->key.raw.len;
+       d = cmh_core_select_instance(CMH_CORE_SM4);
+       target_mbx = d.mbx_idx;
+       core_id = d.core_id;
+
+       /*
+        * INIT: mode=CMAC or XCBC
+        * CMAC/XCBC data goes through the AAD path:
+        *   aadlen = total data length, iolen = 0
+        */
+       {
+               struct vcq_cmd *slot = &cmds[idx++];
+
+               memset(slot, 0, sizeof(*slot));
+               slot->magic = VCQ_CMD_MAGIC;
+               slot->id = VCQ_CMD_ID(core_id, 0, 1, SM4_CMD_INIT);
+               slot->hwc.sm4.cmd_init.key = key_ref;
+               slot->hwc.sm4.cmd_init.iv = 0;
+               slot->hwc.sm4.cmd_init.keylen = keylen;
+               slot->hwc.sm4.cmd_init.ivlen = 0;
+               slot->hwc.sm4.cmd_init.mode = tctx->sm4_mode;
+               slot->hwc.sm4.cmd_init.op = SM4_OP_ENCRYPT;
+               slot->hwc.sm4.cmd_init.aadlen = rctx->total_len;
+               slot->hwc.sm4.cmd_init.iolen = 0;
+       }
+
+       /* AAD_FINAL: send data through the AAD path */
+       if (rctx->total_len > 0) {
+               struct vcq_cmd *slot = &cmds[idx++];
+
+               memset(slot, 0, sizeof(*slot));
+               slot->magic = VCQ_CMD_MAGIC;
+               slot->id = VCQ_CMD_ID(core_id, 0, 1, SM4_CMD_AAD_FINAL);
+               slot->hwc.sm4.cmd_aad_final.data = (u64)rctx->in_dma;
+               slot->hwc.sm4.cmd_aad_final.datalen = rctx->total_len;
+       }
+
+       /* FINAL: tag extraction only (no data) */
+       {
+               struct vcq_cmd *slot = &cmds[idx++];
+
+               memset(slot, 0, sizeof(*slot));
+               slot->magic = VCQ_CMD_MAGIC;
+               slot->id = VCQ_CMD_ID(core_id, 0, 1, SM4_CMD_FINAL);
+               slot->hwc.sm4.cmd_final.input = 0;
+               slot->hwc.sm4.cmd_final.output = 0;
+               slot->hwc.sm4.cmd_final.tag = (u64)rctx->tag_dma;
+               slot->hwc.sm4.cmd_final.iolen = 0;
+               slot->hwc.sm4.cmd_final.taglen = SM4_MAC_DIGEST_SIZE;
+       }
+
+       vcq_add_flush(&cmds[idx++], core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_SM4_MAC_MAX_PACKED,
+                                           target_mbx,
+                                           cmh_sm4_mac_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto out_cleanup_all;
+
+       return -EINPROGRESS;
+
+out_cleanup_all:
+       if (rctx->total_len > 0 && !cmh_dma_map_error(rctx->in_dma))
+               cmh_dma_unmap_single(rctx->in_dma, rctx->total_len,
+                                    DMA_TO_DEVICE);
+out_unmap_tag:
+       cmh_dma_unmap_single(rctx->tag_dma, SM4_MAC_DIGEST_SIZE,
+                            DMA_FROM_DEVICE);
+out_free_tag:
+       kfree(rctx->tag_buf);
+out_free_buf:
+       kfree_sensitive(rctx->buf);
+       rctx->buf = NULL;
+out_free_chunks:
+       cmh_sm4_mac_free_chunks(rctx, tctx);
+       rctx->total_len = 0;
+       return ret;
+}
+
+/*
+ * ahash .export()/.import(): serialize/deserialize the software
+ * accumulation buffer.  No HW state is involved.
+ */
+
+static int cmh_sm4_mac_export(struct ahash_request *req, void *out)
+{
+       struct cmh_sm4_mac_reqctx *rctx = ahash_request_ctx(req);
+       struct cmh_sm4_mac_export_state *state = out;
+       struct cmh_sm4_mac_chunk *chunk;
+       u32 offset = 0;
+
+       if (rctx->total_len > CMH_SM4_MAC_EXPORT_MAX)
+               return -ENOSPC;
+
+       state->total_len = rctx->total_len;
+       list_for_each_entry(chunk, &rctx->chunks, list) {
+               memcpy(state->data + offset, chunk->data, chunk->len);
+               offset += chunk->len;
+       }
+       return 0;
+}
+
+static int cmh_sm4_mac_import(struct ahash_request *req, const void *in)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_sm4_mac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_sm4_mac_reqctx *rctx = ahash_request_ctx(req);
+       const struct cmh_sm4_mac_export_state *state = in;
+       struct cmh_sm4_mac_chunk *chunk;
+
+       /*
+        * Do NOT call free_chunks() here: the crypto API does not
+        * guarantee the request context is in a valid state before
+        * import(), so the list pointers may be stale or invalid.
+        * Re-initialize from scratch instead.  Any pre-existing chunks
+        * are tracked on tctx->all_chunks and freed in exit_tfm.
+        */
+       memset(rctx, 0, sizeof(*rctx));
+       INIT_LIST_HEAD(&rctx->chunks);
+
+       if (state->total_len > CMH_SM4_MAC_EXPORT_MAX)
+               return -EINVAL;
+
+       if (state->total_len) {
+               chunk = kmalloc(sizeof(*chunk) + state->total_len, GFP_KERNEL);
+               if (!chunk)
+                       return -ENOMEM;
+               chunk->len = state->total_len;
+               memcpy(chunk->data, state->data, state->total_len);
+               list_add_tail(&chunk->list, &rctx->chunks);
+               spin_lock_bh(&tctx->chunk_lock);
+               list_add_tail(&chunk->tfm_node, &tctx->all_chunks);
+               spin_unlock_bh(&tctx->chunk_lock);
+               rctx->total_len = state->total_len;
+       }
+       return 0;
+}
+
+static int cmh_sm4_mac_finup(struct ahash_request *req)
+{
+       int err;
+
+       err = cmh_sm4_mac_update(req);
+       if (err)
+               return err;
+       return cmh_sm4_mac_final(req);
+}
+
+static int cmh_sm4_mac_digest(struct ahash_request *req)
+{
+       int err;
+
+       err = cmh_sm4_mac_init(req);
+       if (err)
+               return err;
+       return cmh_sm4_mac_finup(req);
+}
+
+/* Registration */
+
+static struct cmh_sm4_mac_drv sm4_mac_drv_algs[ARRAY_SIZE(sm4_mac_algs)];
+
+static int cmh_sm4_mac_init_tfm(struct crypto_ahash *tfm)
+{
+       struct cmh_sm4_mac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct ahash_alg *alg = crypto_ahash_alg(tfm);
+       struct cmh_sm4_mac_drv *drv =
+               container_of(alg, struct cmh_sm4_mac_drv, alg);
+
+       memset(tctx, 0, sizeof(*tctx));
+       tctx->sm4_mode = drv->info->sm4_mode;
+       spin_lock_init(&tctx->chunk_lock);
+       INIT_LIST_HEAD(&tctx->all_chunks);
+
+       /* Allocate SW cipher for XCBC empty-input fallback */
+       if (tctx->sm4_mode == SM4_MODE_XCBC) {
+               struct crypto_cipher *ci;
+
+               ci = crypto_alloc_cipher("sm4", 0, 0);
+               if (IS_ERR(ci))
+                       return PTR_ERR(ci);
+               tctx->sw_cipher = ci;
+       }
+
+       crypto_ahash_set_reqsize(tfm, sizeof(struct cmh_sm4_mac_reqctx));
+       return 0;
+}
+
+static void cmh_sm4_mac_exit_tfm(struct crypto_ahash *tfm)
+{
+       struct cmh_sm4_mac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_sm4_mac_chunk *c, *tmp;
+
+       /* Free any orphaned chunks (e.g. testmgr export/reimport poison) */
+       spin_lock_bh(&tctx->chunk_lock);
+       list_for_each_entry_safe(c, tmp, &tctx->all_chunks, tfm_node) {
+               list_del(&c->tfm_node);
+               kfree_sensitive(c);
+       }
+       spin_unlock_bh(&tctx->chunk_lock);
+
+       if (tctx->sw_cipher)
+               crypto_free_cipher(tctx->sw_cipher);
+       memzero_explicit(tctx->xcbc_k1, sizeof(tctx->xcbc_k1));
+       memzero_explicit(tctx->xcbc_k3, sizeof(tctx->xcbc_k3));
+       cmh_key_destroy(&tctx->key);
+}
+
+/**
+ * cmh_sm4_cmac_register() - Register SM4-CMAC/XCBC hash algorithms with the crypto framework
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_sm4_cmac_register(void)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(sm4_mac_algs); i++) {
+               const struct cmh_sm4_mac_alg_info *info = &sm4_mac_algs[i];
+               struct cmh_sm4_mac_drv *drv = &sm4_mac_drv_algs[i];
+               struct ahash_alg *alg = &drv->alg;
+
+               drv->info = info;
+
+               memset(alg, 0, sizeof(*alg));
+
+               alg->init       = cmh_sm4_mac_init;
+               alg->update     = cmh_sm4_mac_update;
+               alg->final      = cmh_sm4_mac_final;
+               alg->finup      = cmh_sm4_mac_finup;
+               alg->digest     = cmh_sm4_mac_digest;
+               alg->export     = cmh_sm4_mac_export;
+               alg->import     = cmh_sm4_mac_import;
+               alg->setkey     = cmh_sm4_mac_setkey;
+               alg->init_tfm   = cmh_sm4_mac_init_tfm;
+               alg->exit_tfm   = cmh_sm4_mac_exit_tfm;
+
+               alg->halg.digestsize = SM4_MAC_DIGEST_SIZE;
+               alg->halg.statesize = CMH_SM4_MAC_STATE_SIZE;
+
+               strscpy(alg->halg.base.cra_name, info->alg_name,
+                       CRYPTO_MAX_ALG_NAME);
+               strscpy(alg->halg.base.cra_driver_name, info->drv_name,
+                       CRYPTO_MAX_ALG_NAME);
+               alg->halg.base.cra_priority  = 300;
+               alg->halg.base.cra_flags     = CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                               CRYPTO_ALG_NO_FALLBACK |
+                                               CRYPTO_ALG_ASYNC |
+                                               CRYPTO_ALG_REQ_VIRT;
+               alg->halg.base.cra_blocksize = SM4_MAC_BLOCK_SIZE;
+               alg->halg.base.cra_ctxsize  = sizeof(struct cmh_sm4_mac_tfm_ctx);
+               alg->halg.base.cra_module   = THIS_MODULE;
+
+               ret = crypto_register_ahash(alg);
+               if (ret) {
+                       dev_err(cmh_dev(), "cmh_sm4_mac: failed to register %s (rc=%d)\n",
+                               info->alg_name, ret);
+                       goto err_unregister;
+               }
+
+               dev_dbg(cmh_dev(), "cmh_sm4_mac: registered %s\n",
+                       info->alg_name);
+       }
+
+       return 0;
+
+err_unregister:
+       while (i--)
+               crypto_unregister_ahash(&sm4_mac_drv_algs[i].alg);
+       return ret;
+}
+
+/**
+ * cmh_sm4_cmac_unregister() - Unregister SM4 MAC hash algorithms from the crypto framework
+ */
+void cmh_sm4_cmac_unregister(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(sm4_mac_algs); i++) {
+               crypto_unregister_ahash(&sm4_mac_drv_algs[i].alg);
+               dev_dbg(cmh_dev(), "cmh_sm4_mac: unregistered %s\n",
+                       sm4_mac_algs[i].alg_name);
+       }
+}
diff --git a/drivers/crypto/cmh/cmh_sm4_skcipher.c b/drivers/crypto/cmh/cmh_sm4_skcipher.c
new file mode 100644
index 000000000000..8cd76cba9235
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_sm4_skcipher.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API SM4 (skcipher) Driver
+ *
+ * Registers skcipher algorithms with the Linux crypto subsystem:
+ *   ecb(sm4), cbc(sm4), ctr(sm4), cfb(sm4), xts(sm4)
+ *
+ * Uses the CMH SM4 Core via VCQ commands:
+ *   [SYS_CMD_WRITE] + SM4_CMD_INIT + SM4_CMD_FINAL + VCQ_CMD_FLUSH
+ *
+ * The SM4 core requires bidirectional DMA -- both input and output
+ * buffers are mapped and passed in a single SM4_CMD_FINAL command.
+ *
+ * Raw-key atomicity: SYS_CMD_WRITE to SYS_REF_TEMP is packed into
+ * the same VCQ as SM4 commands (see cmh_key.h for details).
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/algapi.h>
+#include <crypto/xts.h>
+#include <crypto/scatterwalk.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
+
+#include "cmh_sm4.h"
+#include "cmh_vcq.h"
+#include "cmh_sm4_abi.h"
+#include "cmh_sys_abi.h"
+#include "cmh_sys.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+#include "cmh_key.h"
+
+/* Algorithm Table */
+
+struct cmh_sm4_alg_info {
+       u32         sm4_mode;   /* SM4_MODE_* */
+       u32         ivsize;     /* bytes (0 for ECB) */
+       u32         min_keysize;
+       u32         max_keysize;
+       const char *alg_name;   /* Linux crypto name: "ecb(sm4)" */
+       const char *drv_name;   /* driver name: "cri-cmh-ecb-sm4" */
+};
+
+static const struct cmh_sm4_alg_info sm4_algs[] = {
+       { SM4_MODE_ECB, 0,               CMH_SM4_KEY_SIZE, CMH_SM4_KEY_SIZE,
+         "ecb(sm4)", "cri-cmh-ecb-sm4" },
+       { SM4_MODE_CBC, CMH_SM4_IV_SIZE, CMH_SM4_KEY_SIZE, CMH_SM4_KEY_SIZE,
+         "cbc(sm4)", "cri-cmh-cbc-sm4" },
+       { SM4_MODE_CTR, CMH_SM4_IV_SIZE, CMH_SM4_KEY_SIZE, CMH_SM4_KEY_SIZE,
+         "ctr(sm4)", "cri-cmh-ctr-sm4" },
+       { SM4_MODE_CFB, CMH_SM4_IV_SIZE, CMH_SM4_KEY_SIZE, CMH_SM4_KEY_SIZE,
+         "cfb(sm4)", "cri-cmh-cfb-sm4" },
+       { SM4_MODE_XTS, CMH_SM4_IV_SIZE, CMH_SM4_KEY_SIZE * 2,
+                                        CMH_SM4_KEY_SIZE * 2,
+         "xts(sm4)", "cri-cmh-xts-sm4" },
+};
+
+/* Per-transform context (allocated by crypto framework) */
+
+struct cmh_sm4_tfm_ctx {
+       struct cmh_key_ctx key;
+};
+
+/* Per-request context (lives in skcipher_request::__ctx) */
+
+/*
+ * Maximum payload commands:
+ *   [SYS_CMD_WRITE] + SM4_CMD_INIT + [SM4_CMD_UPDATE] + SM4_CMD_FINAL
+ *   + VCQ_CMD_FLUSH = 5
+ * UPDATE is used for XTS data > 2 blocks (see cmh_sm4_crypt).
+ */
+#define CMH_SM4_MAX_PAYLOAD    5
+#define CMH_SM4_MAX_PACKED     (CMH_SM4_MAX_PAYLOAD * 2)
+
+struct cmh_sm4_reqctx {
+       dma_addr_t in_dma;
+       dma_addr_t out_dma;
+       dma_addr_t iv_dma;
+       dma_addr_t iv2_dma;
+       dma_addr_t key_dma;
+       u8 *in_buf;
+       u8 *out_buf;
+       u8 *iv_buf;
+       u8 *iv2_buf;
+       u32 cryptlen;
+       u32 ivsize;
+       u32 keylen;
+       u32 sm4_mode;
+       u32 sm4_op;
+       /* CTR counter-wrap split state */
+       u32 ctr_chunk1_len;
+       u32 core_id;
+       s32 target_mbx;
+       u64 key_ref;
+       struct vcq_cmd packed[CMH_SM4_MAX_PACKED];
+};
+
+/* VCQ Builders -- SM4-specific */
+
+static void vcq_add_sm4_init(struct vcq_cmd *slot, u32 core_id, u64 key_ref, u64 iv_dma,
+                            u32 keylen, u32 ivlen, u32 mode, u32 op,
+                            u32 iolen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM4_CMD_INIT);
+       slot->hwc.sm4.cmd_init.key = key_ref;
+       slot->hwc.sm4.cmd_init.iv = iv_dma;
+       slot->hwc.sm4.cmd_init.keylen = keylen;
+       slot->hwc.sm4.cmd_init.ivlen = ivlen;
+       slot->hwc.sm4.cmd_init.mode = mode;
+       slot->hwc.sm4.cmd_init.op = op;
+       slot->hwc.sm4.cmd_init.aadlen = 0;
+       slot->hwc.sm4.cmd_init.iolen = iolen;
+}
+
+static void vcq_add_sm4_update(struct vcq_cmd *slot, u32 core_id, u64 input_dma,
+                              u64 output_dma, u32 iolen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM4_CMD_UPDATE);
+       slot->hwc.sm4.cmd_update.input = input_dma;
+       slot->hwc.sm4.cmd_update.output = output_dma;
+       slot->hwc.sm4.cmd_update.iolen = iolen;
+}
+
+static void vcq_add_sm4_final(struct vcq_cmd *slot, u32 core_id, u64 input_dma,
+                             u64 output_dma, u32 iolen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM4_CMD_FINAL);
+       slot->hwc.sm4.cmd_final.input = input_dma;
+       slot->hwc.sm4.cmd_final.output = output_dma;
+       slot->hwc.sm4.cmd_final.iolen = iolen;
+       slot->hwc.sm4.cmd_final.tag = 0;
+       slot->hwc.sm4.cmd_final.taglen = 0;
+}
+
+/*
+ * We wrap each skcipher_alg with its info pointer in a compound struct,
+ * then use container_of() in cmh_sm4_get_info() to recover it.
+ */
+struct cmh_sm4_alg_drv {
+       struct skcipher_alg              alg;
+       const struct cmh_sm4_alg_info   *info;
+};
+
+static bool sm4_is_stream_mode(u32 mode)
+{
+       return mode == SM4_MODE_CTR || mode == SM4_MODE_CFB;
+}
+
+/*
+ * Update req->iv after a successful encrypt/decrypt.
+ * Same semantics as cmh_aes_update_iv -- see cmh_aes.c.
+ */
+static void cmh_sm4_update_iv(struct skcipher_request *req, u32 mode,
+                             u32 op, const u8 *in_buf, const u8 *out_buf)
+{
+       u32 bs = CMH_SM4_BLOCK_SIZE;
+       u32 nblocks;
+
+       switch (mode) {
+       case SM4_MODE_CBC:
+               if (op == SM4_OP_ENCRYPT)
+                       memcpy(req->iv, out_buf + req->cryptlen - bs, bs);
+               else
+                       memcpy(req->iv, in_buf + req->cryptlen - bs, bs);
+               break;
+       case SM4_MODE_CTR:
+               /* Arithmetic big-endian 128-bit counter increment */
+               nblocks = DIV_ROUND_UP(req->cryptlen, bs);
+               {
+                       u8 *iv = req->iv;
+                       int i;
+
+                       for (i = bs - 1; i >= 0 && nblocks; i--) {
+                               u32 sum = (u32)iv[i] + (nblocks & 0xff);
+
+                               iv[i] = (u8)sum;
+                               nblocks = (nblocks >> 8) + (sum >> 8);
+                       }
+               }
+               break;
+       case SM4_MODE_CFB:
+               /*
+                * For sub-block requests (cryptlen < 16), there is no
+                * complete ciphertext block to chain, so the IV is left
+                * unchanged -- CFB-128 has no defined chaining semantic
+                * for partial blocks (shift-register CFB-n is a different
+                * mode).  Without this guard the pointer arithmetic
+                * underflows and reads before the buffer.
+                */
+               if (req->cryptlen >= bs) {
+                       if (op == SM4_OP_ENCRYPT)
+                               memcpy(req->iv, out_buf + req->cryptlen - bs,
+                                      bs);
+                       else
+                               memcpy(req->iv, in_buf + req->cryptlen - bs,
+                                      bs);
+               }
+               break;
+       default:
+               break;
+       }
+}
+
+/* skcipher Operations */
+
+static const struct cmh_sm4_alg_info *
+cmh_sm4_get_info(struct crypto_skcipher *tfm)
+{
+       struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+
+       return container_of(alg, struct cmh_sm4_alg_drv, alg)->info;
+}
+
+static int cmh_sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
+                         unsigned int keylen)
+{
+       struct cmh_sm4_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+       const struct cmh_sm4_alg_info *info = cmh_sm4_get_info(tfm);
+
+       if (info->sm4_mode == SM4_MODE_XTS) {
+               int err;
+
+               /* XTS: double key (32 bytes) */
+               if (keylen != CMH_SM4_KEY_SIZE * 2)
+                       return -EINVAL;
+               err = xts_verify_key(tfm, key, keylen);
+               if (err)
+                       return err;
+       } else {
+               /* SM4 always uses 128-bit (16-byte) keys */
+               if (keylen != CMH_SM4_KEY_SIZE)
+                       return -EINVAL;
+       }
+
+       return cmh_key_setkey_raw(&tctx->key, key, keylen, CORE_ID_SM4);
+}
+
+static int cmh_sm4_init_tfm(struct crypto_skcipher *tfm)
+{
+       struct cmh_sm4_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+
+       memset(tctx, 0, sizeof(*tctx));
+       crypto_skcipher_set_reqsize(tfm, sizeof(struct cmh_sm4_reqctx));
+       return 0;
+}
+
+static void cmh_sm4_exit_tfm(struct crypto_skcipher *tfm)
+{
+       struct cmh_sm4_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+
+       cmh_key_destroy(&tctx->key);
+}
+
+#define CMH_SM4_MAX_CRYPTLEN   SZ_32M
+
+/* DMA unmap helper */
+static void cmh_sm4_unmap_dma(struct cmh_sm4_reqctx *rctx)
+{
+       if (rctx->iv2_buf)
+               cmh_dma_unmap_single(rctx->iv2_dma, rctx->ivsize,
+                                    DMA_TO_DEVICE);
+       if (rctx->ivsize > 0)
+               cmh_dma_unmap_single(rctx->iv_dma, rctx->ivsize,
+                                    DMA_TO_DEVICE);
+       cmh_dma_unmap_single(rctx->out_dma, rctx->cryptlen, DMA_FROM_DEVICE);
+       cmh_dma_unmap_single(rctx->in_dma, rctx->cryptlen, DMA_TO_DEVICE);
+}
+
+static void cmh_sm4_free_bufs(struct cmh_sm4_reqctx *rctx)
+{
+       kfree(rctx->iv2_buf);
+       rctx->iv2_buf = NULL;
+       kfree(rctx->iv_buf);
+       rctx->iv_buf = NULL;
+       kfree_sensitive(rctx->out_buf);
+       rctx->out_buf = NULL;
+       kfree_sensitive(rctx->in_buf);
+       rctx->in_buf = NULL;
+}
+
+/*
+ * Submit the second CTR chunk after the first completes.
+ * Called from cmh_sm4_complete when ctr_chunk1_len > 0.
+ */
+static int cmh_sm4_ctr_submit_chunk2(struct skcipher_request *req);
+
+static void cmh_sm4_complete(void *data, int error)
+{
+       struct skcipher_request *req = data;
+       struct cmh_sm4_reqctx *rctx = skcipher_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       /*
+        * CTR counter-wrap: first chunk completed, submit second.
+        * DMA mappings remain valid (they cover the full buffer).
+        *
+        * Recursion depth bounded: chunk2 clears ctr_chunk1_len before
+        * submission, so the second cmh_sm4_complete invocation sees 0
+        * and finalizes (max depth = 2).
+        */
+       if (rctx->ctr_chunk1_len && !error) {
+               int ret = cmh_sm4_ctr_submit_chunk2(req);
+
+               if (!ret || ret == -EBUSY)
+                       return;
+               /* Submission failed; clean up below */
+               error = ret;
+       }
+
+       cmh_sm4_unmap_dma(rctx);
+
+       if (!error) {
+               scatterwalk_map_and_copy(rctx->out_buf, req->dst,
+                                        0, rctx->cryptlen, 1);
+               cmh_sm4_update_iv(req, rctx->sm4_mode, rctx->sm4_op,
+                                 rctx->in_buf, rctx->out_buf);
+       }
+
+       cmh_sm4_free_bufs(rctx);
+       cmh_complete(&req->base, error);
+}
+
+static int cmh_sm4_ctr_submit_chunk2(struct skcipher_request *req)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct cmh_sm4_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+       struct cmh_sm4_reqctx *rctx = skcipher_request_ctx(req);
+       struct vcq_cmd cmds[CMH_SM4_MAX_PAYLOAD];
+       u32 chunk1 = rctx->ctr_chunk1_len;
+       u32 chunk2 = rctx->cryptlen - chunk1;
+       u64 key_ref;
+       u32 keylen;
+       u32 idx = 0;
+
+       /* Clear split flag so next completion is final */
+       rctx->ctr_chunk1_len = 0;
+
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP,
+                         (u64)rctx->key_dma, SYS_REF_NONE,
+                         tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       keylen = tctx->key.raw.len;
+
+       vcq_add_sm4_init(&cmds[idx++], rctx->core_id, key_ref,
+                        (u64)rctx->iv2_dma, keylen, rctx->ivsize,
+                        rctx->sm4_mode, rctx->sm4_op, chunk2);
+       vcq_add_sm4_final(&cmds[idx++], rctx->core_id,
+                         (u64)(rctx->in_dma + chunk1),
+                         (u64)(rctx->out_dma + chunk1), chunk2);
+       vcq_add_flush(&cmds[idx++], rctx->core_id);
+
+       return cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                            CMH_SM4_MAX_PACKED,
+                                            rctx->target_mbx,
+                                            cmh_sm4_complete, req,
+                                            !!(req->base.flags &
+                                               CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                            cmh_tm_async_timeout_jiffies());
+}
+
+static int cmh_sm4_crypt(struct skcipher_request *req, u32 sm4_op)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct cmh_sm4_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+       const struct cmh_sm4_alg_info *info = cmh_sm4_get_info(tfm);
+       struct cmh_sm4_reqctx *rctx = skcipher_request_ctx(req);
+       struct vcq_cmd cmds[CMH_SM4_MAX_PAYLOAD];
+       u64 key_ref;
+       u32 keylen;
+       struct core_dispatch d;
+       s32 target_mbx;
+       u32 core_id;
+       u32 idx;
+       int ret;
+       gfp_t gfp;
+
+       if (tctx->key.mode == CMH_KEY_NONE)
+               return -ENOKEY;
+
+       if (!req->cryptlen)
+               return 0;
+
+       if (req->cryptlen > CMH_SM4_MAX_CRYPTLEN)
+               return -EINVAL;
+
+       switch (info->sm4_mode) {
+       case SM4_MODE_CTR:
+       case SM4_MODE_CFB:
+               break;
+       case SM4_MODE_XTS:
+               if (req->cryptlen < CMH_SM4_BLOCK_SIZE)
+                       return -EINVAL;
+               break;
+       default:
+               if (req->cryptlen & (CMH_SM4_BLOCK_SIZE - 1))
+                       return -EINVAL;
+               break;
+       }
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       memset(rctx, 0, sizeof(*rctx));
+       rctx->cryptlen = req->cryptlen;
+       rctx->ivsize = info->ivsize;
+       rctx->sm4_mode = info->sm4_mode;
+       rctx->sm4_op = sm4_op;
+       rctx->iv2_buf = NULL;
+
+       rctx->in_buf = kmalloc(req->cryptlen, gfp);
+       if (!rctx->in_buf)
+               return -ENOMEM;
+
+       scatterwalk_map_and_copy(rctx->in_buf, req->src, 0, req->cryptlen, 0);
+
+       rctx->in_dma = cmh_dma_map_single(rctx->in_buf, req->cryptlen,
+                                         DMA_TO_DEVICE);
+       if (cmh_dma_map_error(rctx->in_dma)) {
+               ret = -ENOMEM;
+               goto out_free_in;
+       }
+
+       rctx->out_buf = kmalloc(req->cryptlen, gfp);
+       if (!rctx->out_buf) {
+               ret = -ENOMEM;
+               goto out_unmap_in;
+       }
+
+       rctx->out_dma = cmh_dma_map_single(rctx->out_buf, req->cryptlen,
+                                          DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(rctx->out_dma)) {
+               ret = -ENOMEM;
+               goto out_free_out;
+       }
+
+       if (info->ivsize > 0) {
+               rctx->iv_buf = kmemdup(req->iv, info->ivsize, gfp);
+               if (!rctx->iv_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_out;
+               }
+               rctx->iv_dma = cmh_dma_map_single(rctx->iv_buf, info->ivsize,
+                                                 DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->iv_dma)) {
+                       ret = -ENOMEM;
+                       goto out_free_iv;
+               }
+       }
+
+       idx = 0;
+
+       rctx->key_dma = tctx->key.raw.dma;
+       rctx->keylen = tctx->key.raw.len;
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP,
+                         (u64)rctx->key_dma, SYS_REF_NONE,
+                         tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       keylen = tctx->key.raw.len;
+       d = cmh_core_select_instance(CMH_CORE_SM4);
+       target_mbx = d.mbx_idx;
+       core_id = d.core_id;
+
+       /*
+        * iolen in INIT: passed for all modes.  The EIP-40 eSW ignores
+        * it for CTR (stream cipher), but uses it for XTS/CBC/ECB to
+        * know the total data length.  Pass cryptlen unconditionally.
+        */
+       vcq_add_sm4_init(&cmds[idx++], core_id, key_ref, (u64)rctx->iv_dma,
+                        keylen, info->ivsize, info->sm4_mode, sm4_op,
+                        req->cryptlen);
+
+       if (info->sm4_mode == SM4_MODE_XTS &&
+           req->cryptlen > 2 * CMH_SM4_BLOCK_SIZE) {
+               u32 final_len, update_len;
+
+               if (req->cryptlen & (CMH_SM4_BLOCK_SIZE - 1))
+                       final_len = CMH_SM4_BLOCK_SIZE +
+                                   (req->cryptlen & (CMH_SM4_BLOCK_SIZE - 1));
+               else
+                       final_len = 2 * CMH_SM4_BLOCK_SIZE;
+
+               update_len = req->cryptlen - final_len;
+
+               vcq_add_sm4_update(&cmds[idx++], core_id,
+                                  (u64)rctx->in_dma,
+                                  (u64)rctx->out_dma, update_len);
+               vcq_add_sm4_final(&cmds[idx++], core_id,
+                                 (u64)(rctx->in_dma + update_len),
+                                 (u64)(rctx->out_dma + update_len),
+                                 final_len);
+       } else if (info->sm4_mode == SM4_MODE_CTR) {
+               /*
+                * CTR counter-wrap: split at the 64-bit boundary,
+                * consistent with the AES-SCA driver.  The completion
+                * callback submits chunk2 with IV = {upper64+1, 0}.
+                */
+               u64 lower64 = get_unaligned_be64(rctx->iv_buf + 8);
+               u32 nblocks = DIV_ROUND_UP(req->cryptlen,
+                                         CMH_SM4_BLOCK_SIZE);
+               u64 bwrap = lower64 ? (~lower64 + 1ULL) : U64_MAX;
+
+               if (nblocks > bwrap) {
+                       u32 chunk1 = (u32)bwrap * CMH_SM4_BLOCK_SIZE;
+                       u64 upper64;
+
+                       /* Prepare second IV for chained submission */
+                       rctx->iv2_buf = kmalloc(info->ivsize, gfp);
+                       if (!rctx->iv2_buf) {
+                               ret = -ENOMEM;
+                               goto out_unmap_iv;
+                       }
+                       upper64 = get_unaligned_be64(rctx->iv_buf);
+                       put_unaligned_be64(upper64 + 1, rctx->iv2_buf);
+                       put_unaligned_be64(0, rctx->iv2_buf + 8);
+
+                       rctx->iv2_dma =
+                               cmh_dma_map_single(rctx->iv2_buf,
+                                                  info->ivsize,
+                                                  DMA_TO_DEVICE);
+                       if (cmh_dma_map_error(rctx->iv2_dma)) {
+                               ret = -ENOMEM;
+                               goto out_free_iv2;
+                       }
+
+                       /* Store state for the chained second submission */
+                       rctx->ctr_chunk1_len = chunk1;
+                       rctx->core_id = core_id;
+                       rctx->target_mbx = target_mbx;
+                       rctx->key_ref = key_ref;
+
+                       /* First transaction: only chunk1 */
+                       vcq_add_sm4_final(&cmds[idx++], core_id,
+                                         (u64)rctx->in_dma,
+                                         (u64)rctx->out_dma, chunk1);
+               } else {
+                       /* No wrap: single FINAL with all data */
+                       vcq_add_sm4_final(&cmds[idx++], core_id,
+                                         (u64)rctx->in_dma,
+                                         (u64)rctx->out_dma,
+                                         req->cryptlen);
+               }
+       } else {
+               vcq_add_sm4_final(&cmds[idx++], core_id,
+                                 (u64)rctx->in_dma,
+                                 (u64)rctx->out_dma, req->cryptlen);
+       }
+
+       vcq_add_flush(&cmds[idx++], core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_SM4_MAX_PACKED, target_mbx,
+                                           cmh_sm4_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto out_cleanup_all;
+
+       return -EINPROGRESS;
+
+out_cleanup_all:
+       if (rctx->iv2_buf) {
+               cmh_dma_unmap_single(rctx->iv2_dma, info->ivsize,
+                                    DMA_TO_DEVICE);
+       }
+out_free_iv2:
+       kfree(rctx->iv2_buf);
+out_unmap_iv:
+       if (info->ivsize > 0)
+               cmh_dma_unmap_single(rctx->iv_dma, info->ivsize,
+                                    DMA_TO_DEVICE);
+out_free_iv:
+       kfree(rctx->iv_buf);
+out_unmap_out:
+       cmh_dma_unmap_single(rctx->out_dma, req->cryptlen, DMA_FROM_DEVICE);
+out_free_out:
+       kfree_sensitive(rctx->out_buf);
+out_unmap_in:
+       cmh_dma_unmap_single(rctx->in_dma, req->cryptlen, DMA_TO_DEVICE);
+out_free_in:
+       kfree_sensitive(rctx->in_buf);
+       return ret;
+}
+
+static int cmh_sm4_encrypt(struct skcipher_request *req)
+{
+       return cmh_sm4_crypt(req, SM4_OP_ENCRYPT);
+}
+
+static int cmh_sm4_decrypt(struct skcipher_request *req)
+{
+       return cmh_sm4_crypt(req, SM4_OP_DECRYPT);
+}
+
+/* Registration */
+
+static struct cmh_sm4_alg_drv sm4_drv_algs[ARRAY_SIZE(sm4_algs)];
+
+/**
+ * cmh_sm4_register() - Register SM4-CBC/CTR/ECB/XTS skcipher algorithms
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_sm4_register(void)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(sm4_algs); i++) {
+               const struct cmh_sm4_alg_info *info = &sm4_algs[i];
+               struct cmh_sm4_alg_drv *drv = &sm4_drv_algs[i];
+               struct skcipher_alg *alg = &drv->alg;
+
+               drv->info = info;
+
+               memset(alg, 0, sizeof(*alg));
+
+               alg->setkey      = cmh_sm4_setkey;
+               alg->encrypt     = cmh_sm4_encrypt;
+               alg->decrypt     = cmh_sm4_decrypt;
+               alg->init        = cmh_sm4_init_tfm;
+               alg->exit        = cmh_sm4_exit_tfm;
+               alg->min_keysize = info->min_keysize;
+               alg->max_keysize = info->max_keysize;
+               alg->ivsize      = info->ivsize;
+
+               strscpy(alg->base.cra_name, info->alg_name,
+                       CRYPTO_MAX_ALG_NAME);
+               strscpy(alg->base.cra_driver_name, info->drv_name,
+                       CRYPTO_MAX_ALG_NAME);
+               alg->base.cra_priority  = 300;
+               alg->base.cra_flags     = CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                         CRYPTO_ALG_ASYNC;
+               alg->base.cra_blocksize = sm4_is_stream_mode(info->sm4_mode)
+                                         ? 1 : CMH_SM4_BLOCK_SIZE;
+               alg->base.cra_ctxsize  = sizeof(struct cmh_sm4_tfm_ctx);
+               alg->base.cra_module   = THIS_MODULE;
+
+               ret = crypto_register_skcipher(alg);
+               if (ret) {
+                       dev_err(cmh_dev(), "cmh_sm4: failed to register %s (rc=%d)\n",
+                               info->alg_name, ret);
+                       goto err_unregister;
+               }
+
+               dev_dbg(cmh_dev(), "cmh_sm4: registered %s\n", info->alg_name);
+       }
+
+       return 0;
+
+err_unregister:
+       while (i--)
+               crypto_unregister_skcipher(&sm4_drv_algs[i].alg);
+       return ret;
+}
+
+/**
+ * cmh_sm4_unregister() - Unregister SM4 skcipher algorithms from the crypto framework
+ */
+void cmh_sm4_unregister(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(sm4_algs); i++) {
+               crypto_unregister_skcipher(&sm4_drv_algs[i].alg);
+               dev_dbg(cmh_dev(), "cmh_sm4: unregistered %s\n", sm4_algs[i].alg_name);
+       }
+}
diff --git a/drivers/crypto/cmh/include/cmh_sm4.h b/drivers/crypto/cmh/include/cmh_sm4.h
new file mode 100644
index 000000000000..9f4b0fb918db
--- /dev/null
+++ b/drivers/crypto/cmh/include/cmh_sm4.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- SM4 Crypto API Drivers
+ *
+ * Registers SM4 algorithms with the Linux crypto subsystem:
+ *   skcipher: ecb/cbc/ctr/cfb/xts(sm4)
+ *   aead:     gcm/ccm(sm4)
+ *   shash:    cmac/xcbc(sm4)
+ */
+
+#ifndef CMH_SM4_H
+#define CMH_SM4_H
+
+int  cmh_sm4_register(void);
+void cmh_sm4_unregister(void);
+
+int  cmh_sm4_aead_register(void);
+void cmh_sm4_aead_unregister(void);
+
+int  cmh_sm4_cmac_register(void);
+void cmh_sm4_cmac_unregister(void);
+
+#endif /* CMH_SM4_H */
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 06/19] crypto: cmh - add CSHAKE/KMAC ahash
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Register ahash algorithms for cSHAKE128, cSHAKE256, KMAC128, and
KMAC256 using the CMH hash core.  cSHAKE supports incremental
update and export/import.  KMAC has a 64KB data cap imposed by the
hardware.

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 drivers/crypto/cmh/Makefile             |   4 +-
 drivers/crypto/cmh/cmh_cshake.c         | 808 ++++++++++++++++++++++++
 drivers/crypto/cmh/cmh_kmac.c           | 630 ++++++++++++++++++
 drivers/crypto/cmh/cmh_main.c           |  18 +
 drivers/crypto/cmh/include/cmh_cshake.h |  16 +
 drivers/crypto/cmh/include/cmh_kmac.h   |  16 +
 6 files changed, 1491 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/cmh/cmh_cshake.c
 create mode 100644 drivers/crypto/cmh/cmh_kmac.c
 create mode 100644 drivers/crypto/cmh/include/cmh_cshake.h
 create mode 100644 drivers/crypto/cmh/include/cmh_kmac.h

diff --git a/drivers/crypto/cmh/Makefile b/drivers/crypto/cmh/Makefile
index 1f760c0214ef..2bb240b97f31 100644
--- a/drivers/crypto/cmh/Makefile
+++ b/drivers/crypto/cmh/Makefile
@@ -16,7 +16,9 @@ cmh-y := \
        cmh_key.o \
        cmh_sys.o \
        cmh_hash.o \
-       cmh_hmac.o
+       cmh_hmac.o \
+       cmh_cshake.o \
+       cmh_kmac.o

 # Management ioctl device (/dev/cmh_mgmt): key lifecycle, PKE, PQC ioctls.
 cmh-$(CONFIG_CRYPTO_DEV_CMH_MGMT) += \
diff --git a/drivers/crypto/cmh/cmh_cshake.c b/drivers/crypto/cmh/cmh_cshake.c
new file mode 100644
index 000000000000..02f9b853dd33
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_cshake.c
@@ -0,0 +1,808 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API CSHAKE Driver
+ *
+ * Registers cSHAKE-128 and cSHAKE-256 as ahash algorithms using the
+ * CMH Hash Core (HC) via HC_CMD_CSHAKE.
+ *
+ * CSHAKE (NIST SP 800-185) extends SHAKE with two domain separation
+ * parameters: function name N and customization string S.  When both
+ * are empty, cSHAKE reduces to plain SHAKE -- the driver falls back to
+ * HC_CMD_INIT in that case (per SP 800-185 S6.2).
+ *
+ * N and S are set via .setkey() using a self-describing binary header
+ * (matching the upstream authenc precedent):
+ *
+ *   struct cshake_cfg { __be32 n_len; __be32 s_len; };
+ *   setkey blob: cshake_cfg || N[n_len] || S[s_len]
+ *
+ * If .setkey() is never called, the driver defaults to plain SHAKE
+ * (N="" S="").  .setkey() is per-tfm, not per-request.
+ *
+ * N is embedded inline in the HC_CMD_CSHAKE struct (max 36 bytes).
+ * S is passed as VCQ inline data following the command slot (multi-span).
+ *
+ * Uses the same self-contained transaction model as cmh_hash.c:
+ *   .init()   -> software-only
+ *   .update() -> software-only (accumulate chunks)
+ *   .final()  -> CSHAKE [+ inline S] [+ RESTORE] [+ GATHER] + FINAL + FLUSH
+ *   .export() -> CSHAKE [+ inline S] [+ RESTORE] [+ GATHER] + SAVE + FLUSH
+ *   .import() -> restore HC context checkpoint (software-only)
+ *
+ * The HC core supports HC_CMD_SAVE / HC_CMD_RESTORE for cSHAKE mode.
+ * The cSHAKE domain-separation prefix (function name N, customization
+ * string S) is absorbed into the Keccak sponge state by HC_CMD_CSHAKE
+ * on the first submission, and preserved through save/restore.
+ * Export/import enables crypto API transform cloning.
+ *
+ * .setkey() here configures public domain-separation parameters (N, S),
+ * not a secret key.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/internal/hash.h>
+#include <linux/scatterlist.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+
+#include "cmh_cshake.h"
+#include "cmh_vcq.h"
+#include "cmh_hc_abi.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+
+/* Algorithm Table */
+
+struct cmh_cshake_alg_info {
+       u32         hc_algo;
+       u32         digest_size;
+       const char *alg_name;
+       const char *drv_name;
+};
+
+static const struct cmh_cshake_alg_info cmh_cshake_algs_info[] = {
+       {
+               .hc_algo     = HC_ALGO_SHAKE128,
+               .digest_size = CMH_SHAKE128_DIGEST_SIZE,
+               .alg_name    = "cshake128",
+               .drv_name    = "cri-cmh-cshake128",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHAKE256,
+               .digest_size = CMH_SHAKE256_DIGEST_SIZE,
+               .alg_name    = "cshake256",
+               .drv_name    = "cri-cmh-cshake256",
+       },
+};
+
+#define CMH_CSHAKE_ALG_COUNT  ARRAY_SIZE(cmh_cshake_algs_info)
+
+/* Per-Request State */
+
+struct cmh_cshake_chunk {
+       struct list_head  list;
+       struct list_head  tfm_node; /* per-tfm orphan tracking */
+       u32               len;
+       u8                data[];
+};
+
+/*
+ * Max payload slots for CSHAKE:
+ *   CSHAKE (1) + inline S (ceil(S_len/64)) + GATHER (1) + FINAL (1) + FLUSH (1)
+ * S can be up to SHAKE-128 block (168 bytes) = 3 inline slots.
+ * Conservative: 1 + 3 + 1 + 1 + 1 = 7, plus headers.
+ *
+ * Or INIT + GATHER + FINAL + FLUSH = 4 (plain SHAKE fallback).
+ */
+#define CMH_CSHAKE_MAX_PAYLOAD   8
+#define CMH_CSHAKE_MAX_PACKED    (CMH_CSHAKE_MAX_PAYLOAD * 2)
+
+/*
+ * Checkpoint embedded inline: the kernel ahash API has no per-request
+ * destructor, so a heap-allocated checkpoint leaks if a request is
+ * abandoned without .final().
+ */
+struct cmh_cshake_reqctx {
+       const struct cmh_cshake_alg_info *info;
+       int                               error;
+       struct list_head                  chunks;
+       u32                               num_chunks;
+       u32                               total_len;
+       u32                               has_checkpoint;
+       u8                                checkpoint[HC_CONTEXT_SIZE];
+       /* DMA state for async final */
+       dma_addr_t                        digest_dma;
+       dma_addr_t                        ckpt_dma;
+       u8                               *digest_buf;
+       struct cmh_sg_map                *sgm;
+       struct vcq_cmd packed[CMH_CSHAKE_MAX_PACKED];
+};
+
+/* Per-Transform State (carries N and S across requests) */
+
+struct cmh_cshake_tfm_ctx {
+       u8  *func_name;     /* N (function name), NULL if empty */
+       u32  func_name_len;
+       u8  *custom;        /* S (customization string), NULL if empty */
+       u32  custom_len;
+       spinlock_t         chunk_lock;  /* protects all_chunks */
+       struct list_head   all_chunks;  /* orphan-safe chunk tracking */
+};
+
+/* VCQ Builders */
+
+/* VCQ Builders (cSHAKE-specific; shared builders in cmh_hc_abi.h / cmh_vcq.h) */
+
+static void vcq_add_hc_save(struct vcq_cmd *slot, u32 core_id,
+                           u64 output_phys, u32 outlen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, HC_CMD_SAVE);
+       slot->hwc.hc.cmd_save.output = output_phys;
+       slot->hwc.hc.cmd_save.outlen = outlen;
+}
+
+static void vcq_add_hc_restore(struct vcq_cmd *slot, u32 core_id,
+                              u64 input_phys, u32 inlen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, HC_CMD_RESTORE);
+       slot->hwc.hc.cmd_restore.input = input_phys;
+       slot->hwc.hc.cmd_restore.inlen = inlen;
+}
+
+static void vcq_add_hc_cshake(struct vcq_cmd *slot, u32 core_id, u32 algo,
+                             const u8 *name, u32 namelen,
+                             u32 customlen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, HC_CMD_CSHAKE);
+       slot->hwc.hc.cmd_cshake.custom = 0;  /* inline -- CMH eSW reads from next slot(s) */
+       slot->hwc.hc.cmd_cshake.customlen = customlen;
+       slot->hwc.hc.cmd_cshake.algo = algo;
+       slot->hwc.hc.cmd_cshake.namelen = namelen;
+       if (namelen > 0 && name)
+               memcpy(slot->hwc.hc.cmd_cshake.name, name,
+                      min_t(u32, namelen, HC_CSHAKE_MAX_NAMELEN));
+}
+
+/* Request Context Cleanup */
+
+static void cmh_cshake_free_chunks(struct cmh_cshake_reqctx *rctx,
+                                  struct cmh_cshake_tfm_ctx *tctx)
+{
+       struct cmh_cshake_chunk *chunk, *tmp;
+
+       spin_lock_bh(&tctx->chunk_lock);
+       list_for_each_entry_safe(chunk, tmp, &rctx->chunks, list) {
+               list_del(&chunk->list);
+               list_del(&chunk->tfm_node);
+               kfree(chunk);
+       }
+       spin_unlock_bh(&tctx->chunk_lock);
+       rctx->num_chunks = 0;
+       rctx->total_len = 0;
+}
+
+static void cmh_cshake_free_reqctx(struct cmh_cshake_reqctx *rctx,
+                                  struct cmh_cshake_tfm_ctx *tctx)
+{
+       cmh_cshake_free_chunks(rctx, tctx);
+       rctx->has_checkpoint = 0;
+}
+
+static struct cmh_sg_map *
+cmh_cshake_build_sg(struct cmh_cshake_reqctx *rctx, gfp_t gfp)
+{
+       struct cmh_dma_buf *bufs;
+       struct cmh_cshake_chunk *chunk;
+       struct cmh_sg_map *sgm;
+       u32 i;
+
+       bufs = kcalloc(rctx->num_chunks, sizeof(*bufs), gfp);
+       if (!bufs)
+               return NULL;
+
+       i = 0;
+       list_for_each_entry(chunk, &rctx->chunks, list) {
+               bufs[i].data = chunk->data;
+               bufs[i].len = chunk->len;
+               i++;
+       }
+
+       sgm = cmh_dma_build_sg(bufs, rctx->num_chunks, gfp);
+       kfree(bufs);
+       return sgm;
+}
+
+/* VCQ Packing + Submit */
+
+/* ahash Operations */
+
+struct cmh_cshake_alg_drv {
+       struct ahash_alg                   alg;
+       const struct cmh_cshake_alg_info  *info;
+};
+
+static const struct cmh_cshake_alg_info *
+cmh_cshake_get_info(struct crypto_ahash *tfm)
+{
+       struct ahash_alg *alg = crypto_ahash_alg(tfm);
+
+       return container_of(alg, struct cmh_cshake_alg_drv, alg)->info;
+}
+
+/*
+ * .setkey() -- parse N and S from the self-describing cshake_cfg header.
+ *
+ * Blob format: cshake_cfg { __be32 n_len; __be32 s_len; } || N || S
+ * If never called, the driver defaults to plain SHAKE (N="" S="").
+ */
+struct cshake_cfg {
+       __be32 n_len;
+       __be32 s_len;
+};
+
+static int cmh_cshake_setkey(struct crypto_ahash *tfm, const u8 *key,
+                            unsigned int keylen)
+{
+       struct cmh_cshake_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cshake_cfg cfg;
+       u32 n_len, s_len;
+       const u8 *ptr;
+
+       if (keylen < sizeof(cfg))
+               return -EINVAL;
+
+       memcpy(&cfg, key, sizeof(cfg));
+       n_len = be32_to_cpu(cfg.n_len);
+       s_len = be32_to_cpu(cfg.s_len);
+
+       if (keylen != sizeof(cfg) + n_len + s_len)
+               return -EINVAL;
+
+       if (n_len > HC_CSHAKE_MAX_NAMELEN)
+               return -EINVAL;
+
+       if (s_len > HC_CSHAKE_MAX_CUSTOMLEN)
+               return -EINVAL;
+
+       /* Free previous N and S */
+       kfree(tctx->func_name);
+       kfree(tctx->custom);
+       tctx->func_name = NULL;
+       tctx->func_name_len = 0;
+       tctx->custom = NULL;
+       tctx->custom_len = 0;
+
+       ptr = key + sizeof(cfg);
+
+       if (n_len > 0) {
+               tctx->func_name = kmemdup(ptr, n_len, GFP_KERNEL);
+               if (!tctx->func_name)
+                       return -ENOMEM;
+               tctx->func_name_len = n_len;
+               ptr += n_len;
+       }
+
+       if (s_len > 0) {
+               tctx->custom = kmemdup(ptr, s_len, GFP_KERNEL);
+               if (!tctx->custom) {
+                       kfree(tctx->func_name);
+                       tctx->func_name = NULL;
+                       tctx->func_name_len = 0;
+                       return -ENOMEM;
+               }
+               tctx->custom_len = s_len;
+       }
+
+       return 0;
+}
+
+static int cmh_cshake_init(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_cshake_reqctx *rctx = ahash_request_ctx(req);
+
+       rctx->info = cmh_cshake_get_info(tfm);
+       rctx->error = 0;
+       INIT_LIST_HEAD(&rctx->chunks);
+       rctx->num_chunks = 0;
+       rctx->total_len = 0;
+       rctx->has_checkpoint = 0;
+
+       return 0;
+}
+
+static int cmh_cshake_update(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_cshake_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_cshake_reqctx *rctx = ahash_request_ctx(req);
+       struct cmh_cshake_chunk *chunk;
+       int nents;
+
+       if (rctx->error)
+               return rctx->error;
+
+       if (!req->nbytes)
+               return 0;
+
+       chunk = kmalloc(sizeof(*chunk) + req->nbytes,
+                       req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                       GFP_KERNEL : GFP_ATOMIC);
+       if (!chunk) {
+               rctx->error = -ENOMEM;
+               goto err_free_chunks;
+       }
+
+       chunk->len = req->nbytes;
+       if (req->base.flags & CRYPTO_AHASH_REQ_VIRT) {
+               memcpy(chunk->data, req->svirt, req->nbytes);
+       } else {
+               nents = sg_nents_for_len(req->src, req->nbytes);
+               if (nents < 0 ||
+                   sg_copy_to_buffer(req->src, nents,
+                                     chunk->data, req->nbytes) != req->nbytes) {
+                       kfree(chunk);
+                       rctx->error = -EINVAL;
+                       goto err_free_chunks;
+               }
+       }
+
+       list_add_tail(&chunk->list, &rctx->chunks);
+       spin_lock_bh(&tctx->chunk_lock);
+       list_add_tail(&chunk->tfm_node, &tctx->all_chunks);
+       spin_unlock_bh(&tctx->chunk_lock);
+       rctx->num_chunks++;
+       rctx->total_len += req->nbytes;
+
+       return 0;
+
+err_free_chunks:
+       /*
+        * Terminal error -- free all previously accumulated chunks.
+        * The crypto API hash path does not call .final() on error,
+        * so chunks would be orphaned otherwise.
+        */
+       cmh_cshake_free_chunks(rctx, tctx);
+       return rctx->error;
+}
+
+static void cmh_cshake_complete(void *data, int error)
+{
+       struct ahash_request *req = data;
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_cshake_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_cshake_reqctx *rctx = ahash_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       if (rctx->has_checkpoint)
+               cmh_dma_unmap_single(rctx->ckpt_dma, HC_CONTEXT_SIZE,
+                                    DMA_TO_DEVICE);
+       cmh_dma_unmap_single(rctx->digest_dma, rctx->info->digest_size,
+                            DMA_FROM_DEVICE);
+
+       if (!error)
+               memcpy(req->result, rctx->digest_buf,
+                      rctx->info->digest_size);
+
+       kfree(rctx->digest_buf);
+       rctx->digest_buf = NULL;
+       cmh_dma_free_sg(rctx->sgm);
+       rctx->sgm = NULL;
+       cmh_cshake_free_reqctx(rctx, tctx);
+       cmh_complete(&req->base, error);
+}
+
+static int cmh_cshake_final(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_cshake_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_cshake_reqctx *rctx = ahash_request_ctx(req);
+       const struct cmh_cshake_alg_info *info = rctx->info;
+       struct core_dispatch d;
+       struct vcq_cmd cmds[CMH_CSHAKE_MAX_PAYLOAD];
+       struct cmh_sg_map *sgm = NULL;
+       dma_addr_t digest_dma = DMA_MAPPING_ERROR;
+       dma_addr_t ckpt_dma = DMA_MAPPING_ERROR;
+       u8 *digest_buf;
+       u32 idx;
+       int ret;
+       gfp_t gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                    GFP_KERNEL : GFP_ATOMIC;
+
+       if (rctx->error) {
+               ret = rctx->error;
+               goto out_free;
+       }
+
+       if (rctx->num_chunks > 0) {
+               sgm = cmh_cshake_build_sg(rctx, gfp);
+               if (!sgm) {
+                       ret = -ENOMEM;
+                       goto out_free;
+               }
+       }
+
+       digest_buf = kzalloc(info->digest_size, gfp);
+       if (!digest_buf) {
+               ret = -ENOMEM;
+               goto out_free_sg;
+       }
+       digest_dma = cmh_dma_map_single(digest_buf, info->digest_size,
+                                       DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(digest_dma)) {
+               ret = -ENOMEM;
+               goto out_free_digest;
+       }
+
+       /* Map checkpoint buffer if present (CMH eSW reads it) */
+       if (rctx->has_checkpoint) {
+               ckpt_dma = cmh_dma_map_single(rctx->checkpoint,
+                                             HC_CONTEXT_SIZE, DMA_TO_DEVICE);
+               if (cmh_dma_map_error(ckpt_dma)) {
+                       ret = -ENOMEM;
+                       goto out_unmap_digest;
+               }
+       }
+
+       d = cmh_core_select_instance(CMH_CORE_HC);
+       idx = 0;
+
+       if (rctx->has_checkpoint) {
+               /*
+                * Resuming from a saved checkpoint (after export/import):
+                * INIT + RESTORE [+ GATHER] + FINAL + FLUSH
+                * The cSHAKE prefix (N,S) is already absorbed in the
+                * saved Keccak state -- no need to replay HC_CMD_CSHAKE.
+                */
+               vcq_add_hc_init(&cmds[idx++], d.core_id, info->hc_algo);
+               vcq_add_hc_restore(&cmds[idx++], d.core_id, (u64)ckpt_dma,
+                                  HC_CONTEXT_SIZE);
+       } else {
+               bool use_cshake = (tctx->func_name_len > 0 ||
+                                  tctx->custom_len > 0);
+
+               if (use_cshake) {
+                       u32 span;
+
+                       vcq_add_hc_cshake(&cmds[idx], d.core_id,
+                                         info->hc_algo,
+                                         tctx->func_name,
+                                         tctx->func_name_len,
+                                         tctx->custom_len);
+                       span = vcq_add_inline_data(&cmds[idx],
+                                                  tctx->custom,
+                                                  tctx->custom_len);
+                       idx += span;
+               } else {
+                       vcq_add_hc_init(&cmds[idx++], d.core_id,
+                                       info->hc_algo);
+               }
+       }
+
+       if (sgm)
+               vcq_add_hc_gather(&cmds[idx++], d.core_id, (u64)sgm->items_dma,
+                                 HC_CMD_UPDATE);
+
+       vcq_add_hc_final(&cmds[idx++], d.core_id, (u64)digest_dma, info->digest_size);
+       vcq_add_flush(&cmds[idx++], d.core_id);
+
+       rctx->digest_buf = digest_buf;
+       rctx->digest_dma = digest_dma;
+       rctx->ckpt_dma = ckpt_dma;
+       rctx->sgm = sgm;
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_CSHAKE_MAX_PACKED,
+                                           d.mbx_idx,
+                                           cmh_cshake_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto out_cleanup_all;
+
+       return -EINPROGRESS;
+
+out_cleanup_all:
+       if (rctx->has_checkpoint)
+               cmh_dma_unmap_single(ckpt_dma, HC_CONTEXT_SIZE,
+                                    DMA_TO_DEVICE);
+out_unmap_digest:
+       cmh_dma_unmap_single(digest_dma, info->digest_size,
+                            DMA_FROM_DEVICE);
+out_free_digest:
+       kfree(digest_buf);
+
+out_free_sg:
+       cmh_dma_free_sg(sgm);
+
+out_free:
+       cmh_cshake_free_reqctx(rctx, tctx);
+       return ret;
+}
+
+static int cmh_cshake_finup(struct ahash_request *req)
+{
+       int ret;
+
+       ret = cmh_cshake_update(req);
+       if (ret)
+               return ret;
+
+       return cmh_cshake_final(req);
+}
+
+static int cmh_cshake_digest(struct ahash_request *req)
+{
+       int ret;
+
+       ret = cmh_cshake_init(req);
+       if (ret)
+               return ret;
+
+       return cmh_cshake_finup(req);
+}
+
+static int cmh_cshake_export(struct ahash_request *req, void *out)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_cshake_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_cshake_reqctx *rctx = ahash_request_ctx(req);
+       const struct cmh_cshake_alg_info *info = rctx->info;
+       struct core_dispatch d;
+       struct vcq_cmd cmds[CMH_CSHAKE_MAX_PAYLOAD];
+       struct cmh_sg_map *sgm = NULL;
+       dma_addr_t save_dma = DMA_MAPPING_ERROR;
+       dma_addr_t ckpt_dma = DMA_MAPPING_ERROR;
+       u8 *save_buf;
+       u32 idx;
+       int ret;
+
+       if (rctx->num_chunks > 0) {
+               sgm = cmh_cshake_build_sg(rctx, GFP_KERNEL);
+               if (!sgm)
+                       return -ENOMEM;
+       }
+
+       save_buf = kzalloc(HC_CONTEXT_SIZE, GFP_KERNEL);
+       if (!save_buf) {
+               cmh_dma_free_sg(sgm);
+               return -ENOMEM;
+       }
+       save_dma = cmh_dma_map_single(save_buf, HC_CONTEXT_SIZE,
+                                     DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(save_dma)) {
+               kfree(save_buf);
+               cmh_dma_free_sg(sgm);
+               return -ENOMEM;
+       }
+
+       /* Map checkpoint buffer if present (CMH eSW reads it) */
+       if (rctx->has_checkpoint) {
+               ckpt_dma = cmh_dma_map_single(rctx->checkpoint,
+                                             HC_CONTEXT_SIZE, DMA_TO_DEVICE);
+               if (cmh_dma_map_error(ckpt_dma)) {
+                       cmh_dma_unmap_single(save_dma, HC_CONTEXT_SIZE,
+                                            DMA_FROM_DEVICE);
+                       kfree(save_buf);
+                       cmh_dma_free_sg(sgm);
+                       return -ENOMEM;
+               }
+       }
+
+       d = cmh_core_select_instance(CMH_CORE_HC);
+       idx = 0;
+
+       if (rctx->has_checkpoint) {
+               /*
+                * Resuming from a saved checkpoint:
+                * INIT + RESTORE [+ GATHER] + SAVE + FLUSH
+                */
+               vcq_add_hc_init(&cmds[idx++], d.core_id, info->hc_algo);
+               vcq_add_hc_restore(&cmds[idx++], d.core_id, (u64)ckpt_dma,
+                                  HC_CONTEXT_SIZE);
+       } else {
+               bool use_cshake = (tctx->func_name_len > 0 ||
+                                  tctx->custom_len > 0);
+
+               if (use_cshake) {
+                       u32 span;
+
+                       vcq_add_hc_cshake(&cmds[idx], d.core_id,
+                                         info->hc_algo,
+                                         tctx->func_name,
+                                         tctx->func_name_len,
+                                         tctx->custom_len);
+                       span = vcq_add_inline_data(&cmds[idx],
+                                                  tctx->custom,
+                                                  tctx->custom_len);
+                       idx += span;
+               } else {
+                       vcq_add_hc_init(&cmds[idx++], d.core_id,
+                                       info->hc_algo);
+               }
+       }
+
+       if (sgm)
+               vcq_add_hc_gather(&cmds[idx++], d.core_id, (u64)sgm->items_dma,
+                                 HC_CMD_UPDATE);
+
+       vcq_add_hc_save(&cmds[idx++], d.core_id, (u64)save_dma,
+                       HC_CONTEXT_SIZE);
+       vcq_add_flush(&cmds[idx++], d.core_id);
+
+       ret = cmh_vcq_pack_and_submit(cmds, idx, rctx->packed, CMH_CSHAKE_MAX_PACKED,
+                                     d.mbx_idx);
+
+       /* Unmap before CPU read */
+       if (rctx->has_checkpoint)
+               cmh_dma_unmap_single(ckpt_dma, HC_CONTEXT_SIZE, DMA_TO_DEVICE);
+       cmh_dma_unmap_single(save_dma, HC_CONTEXT_SIZE, DMA_FROM_DEVICE);
+
+       if (!ret) {
+               memcpy(out, save_buf, HC_CONTEXT_SIZE);
+               /* Checkpoint now represents all accumulated state */
+               memcpy(rctx->checkpoint, save_buf, HC_CONTEXT_SIZE);
+               rctx->has_checkpoint = 1;
+               /* Accumulated chunks are now captured in checkpoint */
+               cmh_cshake_free_chunks(rctx, tctx);
+       }
+
+       kfree(save_buf);
+       cmh_dma_free_sg(sgm);
+       return ret;
+}
+
+static int cmh_cshake_import(struct ahash_request *req, const void *in)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_cshake_reqctx *rctx = ahash_request_ctx(req);
+
+       rctx->info = cmh_cshake_get_info(tfm);
+       rctx->error = 0;
+       INIT_LIST_HEAD(&rctx->chunks);
+       rctx->num_chunks = 0;
+       rctx->total_len = 0;
+
+       memcpy(rctx->checkpoint, in, HC_CONTEXT_SIZE);
+       rctx->has_checkpoint = 1;
+
+       return 0;
+}
+
+/* Transform init/exit */
+
+static int cmh_cshake_cra_init(struct crypto_tfm *tfm)
+{
+       struct cmh_cshake_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+
+       tctx->func_name = NULL;
+       tctx->func_name_len = 0;
+       tctx->custom = NULL;
+       tctx->custom_len = 0;
+       spin_lock_init(&tctx->chunk_lock);
+       INIT_LIST_HEAD(&tctx->all_chunks);
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct cmh_cshake_reqctx));
+       return 0;
+}
+
+static void cmh_cshake_cra_exit(struct crypto_tfm *tfm)
+{
+       struct cmh_cshake_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+       struct cmh_cshake_chunk *chunk, *tmp;
+
+       /* Free any orphaned chunks (e.g. testmgr export/reimport poison) */
+       spin_lock_bh(&tctx->chunk_lock);
+       list_for_each_entry_safe(chunk, tmp, &tctx->all_chunks, tfm_node) {
+               list_del(&chunk->tfm_node);
+               kfree(chunk);
+       }
+       spin_unlock_bh(&tctx->chunk_lock);
+
+       kfree(tctx->func_name);
+       kfree(tctx->custom);
+       tctx->func_name = NULL;
+       tctx->custom = NULL;
+}
+
+/* Registration */
+
+static struct cmh_cshake_alg_drv cmh_cshake_drvs[CMH_CSHAKE_ALG_COUNT];
+
+/**
+ * cmh_cshake_register() - Register cSHAKE-128/256 hash algorithms with the crypto framework
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_cshake_register(void)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < CMH_CSHAKE_ALG_COUNT; i++) {
+               const struct cmh_cshake_alg_info *info =
+                       &cmh_cshake_algs_info[i];
+               struct cmh_cshake_alg_drv *drv = &cmh_cshake_drvs[i];
+               struct ahash_alg *alg = &drv->alg;
+
+               drv->info = info;
+
+               alg->init   = cmh_cshake_init;
+               alg->update = cmh_cshake_update;
+               alg->final  = cmh_cshake_final;
+               alg->finup  = cmh_cshake_finup;
+               alg->digest = cmh_cshake_digest;
+               alg->export = cmh_cshake_export;
+               alg->import = cmh_cshake_import;
+               alg->setkey = cmh_cshake_setkey;
+
+               alg->halg.digestsize = info->digest_size;
+               alg->halg.statesize  = HC_CONTEXT_SIZE;
+
+               strscpy(alg->halg.base.cra_name, info->alg_name,
+                       CRYPTO_MAX_ALG_NAME);
+               strscpy(alg->halg.base.cra_driver_name, info->drv_name,
+                       CRYPTO_MAX_ALG_NAME);
+               alg->halg.base.cra_priority    = 300;
+               alg->halg.base.cra_flags       = CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                                CRYPTO_ALG_NO_FALLBACK |
+                                                CRYPTO_ALG_ASYNC |
+                                                CRYPTO_ALG_OPTIONAL_KEY |
+                                                CRYPTO_ALG_REQ_VIRT;
+               alg->halg.base.cra_blocksize   = 1;  /* XOF */
+               alg->halg.base.cra_ctxsize     = sizeof(struct cmh_cshake_tfm_ctx);
+               alg->halg.base.cra_init        = cmh_cshake_cra_init;
+               alg->halg.base.cra_exit        = cmh_cshake_cra_exit;
+               alg->halg.base.cra_module      = THIS_MODULE;
+
+               ret = crypto_register_ahash(alg);
+               if (ret) {
+                       dev_err(cmh_dev(), "cshake: failed to register %s (rc=%d)\n",
+                               info->drv_name, ret);
+                       while (i--)
+                               crypto_unregister_ahash(&cmh_cshake_drvs[i].alg);
+                       return ret;
+               }
+
+               dev_dbg(cmh_dev(), "cshake: registered %s (priority 300)\n",
+                       info->drv_name);
+       }
+
+       dev_info(cmh_dev(), "cshake: %zu algorithm(s) registered\n",
+                CMH_CSHAKE_ALG_COUNT);
+       return 0;
+}
+
+/**
+ * cmh_cshake_unregister() - Unregister cSHAKE hash algorithms from the crypto framework
+ */
+void cmh_cshake_unregister(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < CMH_CSHAKE_ALG_COUNT; i++) {
+               crypto_unregister_ahash(&cmh_cshake_drvs[i].alg);
+               dev_dbg(cmh_dev(), "cshake: unregistered %s\n",
+                       cmh_cshake_algs_info[i].drv_name);
+       }
+
+       dev_info(cmh_dev(), "cshake: cleaned up\n");
+}
diff --git a/drivers/crypto/cmh/cmh_kmac.c b/drivers/crypto/cmh/cmh_kmac.c
new file mode 100644
index 000000000000..7177a2558e97
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_kmac.c
@@ -0,0 +1,630 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API KMAC Driver
+ *
+ * Registers KMAC-128 and KMAC-256 as keyed ahash algorithms using the
+ * CMH Hash Core (HC) via HC_CMD_KMAC.
+ *
+ * KMAC (NIST SP 800-185) is a keyed variant of cSHAKE.  The function
+ * name N is always "KMAC" (hardcoded by the CMH eSW).  The user sets:
+ *   - A key via .setkey() (raw bytes + optional S)
+ *   - An optional customization string S via the setkey blob
+ *
+ * setkey blob format:
+ *   struct kmac_key_param { __be32 keylen; __be32 s_len; };
+ *   blob: kmac_key_param || key[keylen] || S[s_len]
+ *
+ * Uses the same self-contained transaction model as cmh_hmac.c:
+ *   .setkey() -> store raw key (+ S)
+ *   .init()   -> software-only
+ *   .update() -> software-only (accumulate chunks)
+ *   .final()  -> [SYS_CMD_WRITE] + HC_CMD_KMAC [+ inline S] +
+ *               [GATHER] + FINAL + FLUSH
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/internal/hash.h>
+#include <linux/scatterlist.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+
+#include "cmh_kmac.h"
+#include "cmh_vcq.h"
+#include "cmh_hc_abi.h"
+#include "cmh_sys_abi.h"
+#include "cmh_sys.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+#include "cmh_key.h"
+
+/*
+ * Maximum data that can be accumulated across .update() calls.
+ * The CMH eSW rejects HC_CMD_SAVE when ctx->outlen != 0, which is
+ * always the case for KMAC (eip59_hc_kmac() sets ctx->outlen for
+ * right_encode(outlen) at finalization).  All data must be buffered
+ * in kernel memory and submitted atomically in .final().
+ *
+ * The CMH eSW does not serialize outlen into the external save
+ * context, so HC_CMD_SAVE fails for KMAC mode.
+ */
+#define KMAC_MAX_DATA          (64 * 1024)
+
+/* Algorithm Table */
+
+struct cmh_kmac_alg_info {
+       u32         hc_algo;
+       u32         digest_size;
+       const char *alg_name;
+       const char *drv_name;
+};
+
+static const struct cmh_kmac_alg_info cmh_kmac_algs_info[] = {
+       {
+               .hc_algo     = HC_ALGO_SHAKE128,
+               .digest_size = CMH_SHAKE128_DIGEST_SIZE,
+               .alg_name    = "kmac128",
+               .drv_name    = "cri-cmh-kmac128",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHAKE256,
+               .digest_size = CMH_SHAKE256_DIGEST_SIZE,
+               .alg_name    = "kmac256",
+               .drv_name    = "cri-cmh-kmac256",
+       },
+};
+
+#define CMH_KMAC_ALG_COUNT  ARRAY_SIZE(cmh_kmac_algs_info)
+
+/* Per-Request State */
+
+struct cmh_kmac_chunk {
+       struct list_head  list;
+       struct list_head  tfm_node; /* per-tfm orphan tracking */
+       u32               len;
+       u8                data[];
+};
+
+/*
+ * Max payload slots for KMAC:
+ *   SYS_CMD_WRITE (1) + KMAC (1) + inline S (3 max) + GATHER (1) +
+ *   FINAL (1) + FLUSH (1) = 8
+ */
+#define CMH_KMAC_MAX_PAYLOAD   9
+#define CMH_KMAC_MAX_PACKED    (CMH_KMAC_MAX_PAYLOAD * 2)
+
+struct cmh_kmac_reqctx {
+       const struct cmh_kmac_alg_info *info;
+       int                             error;
+       struct list_head                chunks;
+       u32                             num_chunks;
+       u32                             total_len;
+       /* DMA state for async final */
+       dma_addr_t                      digest_dma;
+       dma_addr_t                      key_dma;
+       u8                             *digest_buf;
+       struct cmh_sg_map              *sgm;
+       u32                             keylen;
+       struct vcq_cmd packed[CMH_KMAC_MAX_PACKED];
+};
+
+/* Per-Transform State (carries key + S across requests) */
+
+struct cmh_kmac_tfm_ctx {
+       struct cmh_key_ctx key;
+       u8  *custom;        /* S (customization string), NULL if empty */
+       u32  custom_len;
+       spinlock_t         chunk_lock;  /* protects all_chunks */
+       struct list_head   all_chunks;  /* orphan-safe chunk tracking */
+};
+
+/* VCQ Builders (KMAC-specific; shared builders in cmh_hc_abi.h / cmh_vcq.h) */
+
+static void vcq_add_hc_kmac(struct vcq_cmd *slot, u32 core_id, u64 key_ref, u32 keylen,
+                           u32 customlen, u32 algo, u32 outlen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, HC_CMD_KMAC);
+       slot->hwc.hc.cmd_kmac.key = key_ref;
+       slot->hwc.hc.cmd_kmac.custom = 0;  /* inline */
+       slot->hwc.hc.cmd_kmac.keylen = keylen;
+       slot->hwc.hc.cmd_kmac.customlen = customlen;
+       slot->hwc.hc.cmd_kmac.algo = algo;
+       slot->hwc.hc.cmd_kmac.outlen = outlen;
+}
+
+/* Request Context Cleanup */
+
+static void cmh_kmac_free_chunks(struct cmh_kmac_reqctx *rctx,
+                                struct cmh_kmac_tfm_ctx *tctx)
+{
+       struct cmh_kmac_chunk *chunk, *tmp;
+
+       spin_lock_bh(&tctx->chunk_lock);
+       list_for_each_entry_safe(chunk, tmp, &rctx->chunks, list) {
+               list_del(&chunk->list);
+               list_del(&chunk->tfm_node);
+               kfree(chunk);
+       }
+       spin_unlock_bh(&tctx->chunk_lock);
+       rctx->num_chunks = 0;
+       rctx->total_len = 0;
+}
+
+static struct cmh_sg_map *
+cmh_kmac_build_sg(struct cmh_kmac_reqctx *rctx, gfp_t gfp)
+{
+       struct cmh_dma_buf *bufs;
+       struct cmh_kmac_chunk *chunk;
+       struct cmh_sg_map *sgm;
+       u32 i;
+
+       bufs = kcalloc(rctx->num_chunks, sizeof(*bufs), gfp);
+       if (!bufs)
+               return NULL;
+
+       i = 0;
+       list_for_each_entry(chunk, &rctx->chunks, list) {
+               bufs[i].data = chunk->data;
+               bufs[i].len = chunk->len;
+               i++;
+       }
+
+       sgm = cmh_dma_build_sg(bufs, rctx->num_chunks, gfp);
+       kfree(bufs);
+       return sgm;
+}
+
+/* VCQ Packing + Submit */
+
+/* ahash Operations */
+
+struct cmh_kmac_alg_drv {
+       struct ahash_alg                 alg;
+       const struct cmh_kmac_alg_info  *info;
+};
+
+static const struct cmh_kmac_alg_info *
+cmh_kmac_get_info(struct crypto_ahash *tfm)
+{
+       struct ahash_alg *alg = crypto_ahash_alg(tfm);
+
+       return container_of(alg, struct cmh_kmac_alg_drv, alg)->info;
+}
+
+/*
+ * setkey blob for KMAC (raw key path):
+ *   struct kmac_key_param { __be32 keylen; __be32 s_len; };
+ *   blob: kmac_key_param || key[keylen] || S[s_len]
+ */
+struct kmac_key_param {
+       __be32 keylen;
+       __be32 s_len;
+};
+
+static int cmh_kmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+                          unsigned int keylen)
+{
+       struct cmh_kmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       /* raw key bytes with optional S */
+       {
+               struct kmac_key_param hdr;
+               u32 raw_keylen, s_len;
+               const u8 *ptr;
+
+               if (keylen < sizeof(hdr))
+                       return -EINVAL;
+
+               memcpy(&hdr, key, sizeof(hdr));
+               raw_keylen = be32_to_cpu(hdr.keylen);
+               s_len = be32_to_cpu(hdr.s_len);
+
+               if (keylen != sizeof(hdr) + raw_keylen + s_len)
+                       return -EINVAL;
+
+               if (raw_keylen == 0)
+                       return -EINVAL;
+
+               if (s_len > HC_CSHAKE_MAX_CUSTOMLEN)
+                       return -EINVAL;
+
+               ptr = key + sizeof(hdr);
+
+               /* Store raw key */
+               {
+                       int ret = cmh_key_setkey_raw(&tctx->key, ptr,
+                                                    raw_keylen, CORE_ID_HC);
+                       if (ret)
+                               return ret;
+               }
+               ptr += raw_keylen;
+
+               /* Store S */
+               kfree(tctx->custom);
+               tctx->custom = NULL;
+               tctx->custom_len = 0;
+
+               if (s_len > 0) {
+                       tctx->custom = kmemdup(ptr, s_len, GFP_KERNEL);
+                       if (!tctx->custom) {
+                               cmh_key_destroy(&tctx->key);
+                               return -ENOMEM;
+                       }
+                       tctx->custom_len = s_len;
+               }
+
+               return 0;
+       }
+}
+
+static int cmh_kmac_init(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_kmac_reqctx *rctx = ahash_request_ctx(req);
+
+       rctx->info = cmh_kmac_get_info(tfm);
+       rctx->error = 0;
+       INIT_LIST_HEAD(&rctx->chunks);
+       rctx->num_chunks = 0;
+       rctx->total_len = 0;
+
+       return 0;
+}
+
+static int cmh_kmac_update(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_kmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_kmac_reqctx *rctx = ahash_request_ctx(req);
+       struct cmh_kmac_chunk *chunk;
+       int nents;
+
+       if (rctx->error)
+               return rctx->error;
+
+       if (!req->nbytes)
+               return 0;
+
+       if (req->nbytes > KMAC_MAX_DATA - rctx->total_len) {
+               rctx->error = -EINVAL;
+               goto err_free_chunks;
+       }
+
+       chunk = kmalloc(sizeof(*chunk) + req->nbytes,
+                       req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                       GFP_KERNEL : GFP_ATOMIC);
+       if (!chunk) {
+               rctx->error = -ENOMEM;
+               goto err_free_chunks;
+       }
+
+       chunk->len = req->nbytes;
+       if (req->base.flags & CRYPTO_AHASH_REQ_VIRT) {
+               memcpy(chunk->data, req->svirt, req->nbytes);
+       } else {
+               nents = sg_nents_for_len(req->src, req->nbytes);
+               if (nents < 0 ||
+                   sg_copy_to_buffer(req->src, nents,
+                                     chunk->data, req->nbytes) != req->nbytes) {
+                       kfree(chunk);
+                       rctx->error = -EINVAL;
+                       goto err_free_chunks;
+               }
+       }
+
+       list_add_tail(&chunk->list, &rctx->chunks);
+       spin_lock_bh(&tctx->chunk_lock);
+       list_add_tail(&chunk->tfm_node, &tctx->all_chunks);
+       spin_unlock_bh(&tctx->chunk_lock);
+       rctx->num_chunks++;
+       rctx->total_len += req->nbytes;
+
+       return 0;
+
+err_free_chunks:
+       /*
+        * Terminal error -- free all previously accumulated chunks.
+        * The crypto API hash path does not call .final() on error,
+        * so chunks would be orphaned otherwise.
+        */
+       cmh_kmac_free_chunks(rctx, tctx);
+       return rctx->error;
+}
+
+static void cmh_kmac_complete(void *data, int error)
+{
+       struct ahash_request *req = data;
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_kmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_kmac_reqctx *rctx = ahash_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       cmh_dma_unmap_single(rctx->digest_dma, rctx->info->digest_size,
+                            DMA_FROM_DEVICE);
+
+       if (!error)
+               memcpy(req->result, rctx->digest_buf,
+                      rctx->info->digest_size);
+
+       kfree(rctx->digest_buf);
+       rctx->digest_buf = NULL;
+       cmh_dma_free_sg(rctx->sgm);
+       rctx->sgm = NULL;
+       cmh_kmac_free_chunks(rctx, tctx);
+       cmh_complete(&req->base, error);
+}
+
+static int cmh_kmac_final(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_kmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_kmac_reqctx *rctx = ahash_request_ctx(req);
+       const struct cmh_kmac_alg_info *info = rctx->info;
+       struct vcq_cmd cmds[CMH_KMAC_MAX_PAYLOAD];
+       struct cmh_sg_map *sgm = NULL;
+       dma_addr_t digest_dma = DMA_MAPPING_ERROR, key_dma = DMA_MAPPING_ERROR;
+       u8 *digest_buf;
+       u64 key_ref;
+       u32 key_len;
+       struct core_dispatch d;
+       s32 target_mbx;
+       u32 core_id;
+       u32 idx;
+       int ret;
+       gfp_t gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                  GFP_KERNEL : GFP_ATOMIC;
+
+       if (rctx->error) {
+               ret = rctx->error;
+               goto out_free;
+       }
+
+       if (tctx->key.mode == CMH_KEY_NONE) {
+               ret = -ENOKEY;
+               goto out_free;
+       }
+
+       if (rctx->num_chunks > 0) {
+               sgm = cmh_kmac_build_sg(rctx, gfp);
+               if (!sgm) {
+                       ret = -ENOMEM;
+                       goto out_free;
+               }
+       }
+
+       digest_buf = kzalloc(info->digest_size, gfp);
+       if (!digest_buf) {
+               ret = -ENOMEM;
+               goto out_free_sg;
+       }
+       digest_dma = cmh_dma_map_single(digest_buf, info->digest_size,
+                                       DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(digest_dma)) {
+               ret = -ENOMEM;
+               goto out_free_digest;
+       }
+
+       /* Resolve key reference */
+       idx = 0;
+
+       key_dma = tctx->key.raw.dma;
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP, (u64)key_dma,
+                         SYS_REF_NONE, tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       key_len = tctx->key.raw.len;
+       d = cmh_core_select_instance(CMH_CORE_HC);
+
+       target_mbx = d.mbx_idx;
+
+       core_id = d.core_id;
+
+       {
+               u32 span;
+
+               vcq_add_hc_kmac(&cmds[idx], core_id, key_ref, key_len,
+                               tctx->custom_len, info->hc_algo,
+                               info->digest_size);
+
+               /* Add inline S data after the KMAC slot */
+               span = vcq_add_inline_data(&cmds[idx], tctx->custom,
+                                          tctx->custom_len);
+               idx += span;
+       }
+
+       if (sgm)
+               vcq_add_hc_gather(&cmds[idx++], core_id, (u64)sgm->items_dma,
+                                 HC_CMD_UPDATE);
+
+       vcq_add_hc_final(&cmds[idx++], core_id, (u64)digest_dma, info->digest_size);
+       vcq_add_flush(&cmds[idx++], core_id);
+
+       rctx->digest_buf = digest_buf;
+       rctx->digest_dma = digest_dma;
+       rctx->sgm = sgm;
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_KMAC_MAX_PACKED,
+                                           target_mbx,
+                                           cmh_kmac_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto out_cleanup_all;
+
+       return -EINPROGRESS;
+
+out_cleanup_all:
+       cmh_dma_unmap_single(digest_dma, info->digest_size,
+                            DMA_FROM_DEVICE);
+out_free_digest:
+       kfree(digest_buf);
+
+out_free_sg:
+       cmh_dma_free_sg(sgm);
+
+out_free:
+       cmh_kmac_free_chunks(rctx, tctx);
+       return ret;
+}
+
+static int cmh_kmac_finup(struct ahash_request *req)
+{
+       int ret;
+
+       ret = cmh_kmac_update(req);
+       if (ret)
+               return ret;
+
+       return cmh_kmac_final(req);
+}
+
+static int cmh_kmac_digest(struct ahash_request *req)
+{
+       int ret;
+
+       ret = cmh_kmac_init(req);
+       if (ret)
+               return ret;
+
+       return cmh_kmac_finup(req);
+}
+
+static int cmh_kmac_export(struct ahash_request *req, void *out)
+{
+       return -EOPNOTSUPP;
+}
+
+static int cmh_kmac_import(struct ahash_request *req, const void *in)
+{
+       return -EOPNOTSUPP;
+}
+
+/* Transform init/exit */
+
+static int cmh_kmac_cra_init(struct crypto_tfm *tfm)
+{
+       struct cmh_kmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+
+       tctx->key.mode = CMH_KEY_NONE;
+       tctx->custom = NULL;
+       tctx->custom_len = 0;
+       spin_lock_init(&tctx->chunk_lock);
+       INIT_LIST_HEAD(&tctx->all_chunks);
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct cmh_kmac_reqctx));
+       return 0;
+}
+
+static void cmh_kmac_cra_exit(struct crypto_tfm *tfm)
+{
+       struct cmh_kmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+       struct cmh_kmac_chunk *chunk, *tmp;
+
+       /* Free any orphaned chunks (e.g. testmgr export/reimport poison) */
+       spin_lock_bh(&tctx->chunk_lock);
+       list_for_each_entry_safe(chunk, tmp, &tctx->all_chunks, tfm_node) {
+               list_del(&chunk->tfm_node);
+               kfree(chunk);
+       }
+       spin_unlock_bh(&tctx->chunk_lock);
+
+       cmh_key_destroy(&tctx->key);
+       kfree(tctx->custom);
+       tctx->custom = NULL;
+}
+
+/* Registration */
+
+static struct cmh_kmac_alg_drv cmh_kmac_drvs[CMH_KMAC_ALG_COUNT];
+
+/**
+ * cmh_kmac_register() - Register KMAC-128/256 hash algorithms with the crypto framework
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_kmac_register(void)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < CMH_KMAC_ALG_COUNT; i++) {
+               const struct cmh_kmac_alg_info *info =
+                       &cmh_kmac_algs_info[i];
+               struct cmh_kmac_alg_drv *drv = &cmh_kmac_drvs[i];
+               struct ahash_alg *alg = &drv->alg;
+
+               drv->info = info;
+
+               alg->init   = cmh_kmac_init;
+               alg->update = cmh_kmac_update;
+               alg->final  = cmh_kmac_final;
+               alg->finup  = cmh_kmac_finup;
+               alg->digest = cmh_kmac_digest;
+               alg->export = cmh_kmac_export;
+               alg->import = cmh_kmac_import;
+               alg->setkey = cmh_kmac_setkey;
+
+               alg->halg.digestsize = info->digest_size;
+               alg->halg.statesize  = sizeof(struct cmh_kmac_reqctx);
+
+               strscpy(alg->halg.base.cra_name, info->alg_name,
+                       CRYPTO_MAX_ALG_NAME);
+               strscpy(alg->halg.base.cra_driver_name, info->drv_name,
+                       CRYPTO_MAX_ALG_NAME);
+               alg->halg.base.cra_priority    = 300;
+               alg->halg.base.cra_flags       = CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                                CRYPTO_ALG_NO_FALLBACK |
+                                                CRYPTO_ALG_ASYNC |
+                                                CRYPTO_ALG_REQ_VIRT;
+               alg->halg.base.cra_blocksize   = 1;  /* XOF/keyed XOF */
+               alg->halg.base.cra_ctxsize     = sizeof(struct cmh_kmac_tfm_ctx);
+               alg->halg.base.cra_init        = cmh_kmac_cra_init;
+               alg->halg.base.cra_exit        = cmh_kmac_cra_exit;
+               alg->halg.base.cra_module      = THIS_MODULE;
+
+               ret = crypto_register_ahash(alg);
+               if (ret) {
+                       dev_err(cmh_dev(), "kmac: failed to register %s (rc=%d)\n",
+                               info->drv_name, ret);
+                       while (i--)
+                               crypto_unregister_ahash(&cmh_kmac_drvs[i].alg);
+                       return ret;
+               }
+
+               dev_dbg(cmh_dev(), "kmac: registered %s (priority 300)\n",
+                       info->drv_name);
+       }
+
+       dev_info(cmh_dev(), "kmac: %zu algorithm(s) registered\n",
+                CMH_KMAC_ALG_COUNT);
+       return 0;
+}
+
+/**
+ * cmh_kmac_unregister() - Unregister KMAC hash algorithms from the crypto framework
+ */
+void cmh_kmac_unregister(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < CMH_KMAC_ALG_COUNT; i++) {
+               crypto_unregister_ahash(&cmh_kmac_drvs[i].alg);
+               dev_dbg(cmh_dev(), "kmac: unregistered %s\n",
+                       cmh_kmac_algs_info[i].drv_name);
+       }
+
+       dev_info(cmh_dev(), "kmac: cleaned up\n");
+}
diff --git a/drivers/crypto/cmh/cmh_main.c b/drivers/crypto/cmh/cmh_main.c
index c18219197bd8..f04cc6855963 100644
--- a/drivers/crypto/cmh/cmh_main.c
+++ b/drivers/crypto/cmh/cmh_main.c
@@ -31,6 +31,8 @@
 #include "cmh_rh.h"
 #include "cmh_hash.h"
 #include "cmh_hmac.h"
+#include "cmh_cshake.h"
+#include "cmh_kmac.h"
 #include "cmh_mgmt.h"
 #include "cmh_registers.h"
 #include "cmh_debugfs.h"
@@ -203,6 +205,16 @@ static int cmh_probe(struct platform_device *pdev)
        if (ret)
                goto err_hmac_register;

+       /* Register CSHAKE hash algorithms */
+       ret = cmh_cshake_register();
+       if (ret)
+               goto err_cshake_register;
+
+       /* Register KMAC hash algorithms */
+       ret = cmh_kmac_register();
+       if (ret)
+               goto err_kmac_register;
+
        /* Register key management device (/dev/cmh_mgmt) */
        ret = cmh_mgmt_register();
        if (ret)
@@ -215,6 +227,10 @@ static int cmh_probe(struct platform_device *pdev)
        return 0;

 err_mgmt_register:
+       cmh_kmac_unregister();
+err_kmac_register:
+       cmh_cshake_unregister();
+err_cshake_register:
        cmh_hmac_unregister();
 err_hmac_register:
        cmh_hash_unregister();
@@ -245,6 +261,8 @@ static void cmh_remove(struct platform_device *pdev)
        cfg = &dev->config;

        cmh_mgmt_unregister();
+       cmh_kmac_unregister();
+       cmh_cshake_unregister();
        cmh_hmac_unregister();
        cmh_hash_unregister();
        cmh_rh_cleanup(cfg);
diff --git a/drivers/crypto/cmh/include/cmh_cshake.h b/drivers/crypto/cmh/include/cmh_cshake.h
new file mode 100644
index 000000000000..9bafe0baf52f
--- /dev/null
+++ b/drivers/crypto/cmh/include/cmh_cshake.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API CSHAKE Driver
+ *
+ * Registers cSHAKE-128 and cSHAKE-256 ahash algorithms using
+ * HC_CMD_CSHAKE with inline customization string S.
+ */
+
+#ifndef CMH_CSHAKE_H
+#define CMH_CSHAKE_H
+
+int  cmh_cshake_register(void);
+void cmh_cshake_unregister(void);
+
+#endif /* CMH_CSHAKE_H */
diff --git a/drivers/crypto/cmh/include/cmh_kmac.h b/drivers/crypto/cmh/include/cmh_kmac.h
new file mode 100644
index 000000000000..b3c92d71a0b6
--- /dev/null
+++ b/drivers/crypto/cmh/include/cmh_kmac.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API KMAC Driver
+ *
+ * Registers KMAC-128 and KMAC-256 ahash algorithms using
+ * HC_CMD_KMAC with inline customization string S.
+ */
+
+#ifndef CMH_KMAC_H
+#define CMH_KMAC_H
+
+int  cmh_kmac_register(void);
+void cmh_kmac_unregister(void);
+
+#endif /* CMH_KMAC_H */
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 05/19] crypto: cmh - add HMAC ahash
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Register ahash algorithms for HMAC-SHA-224, HMAC-SHA-256,
HMAC-SHA-384, HMAC-SHA-512, HMAC-SHA3-224, HMAC-SHA3-256,
HMAC-SHA3-384, and HMAC-SHA3-512 using the CMH hash core.

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 drivers/crypto/cmh/Makefile           |   3 +-
 drivers/crypto/cmh/cmh_hmac.c         | 684 ++++++++++++++++++++++++++
 drivers/crypto/cmh/cmh_main.c         |   9 +
 drivers/crypto/cmh/include/cmh_hmac.h |  16 +
 4 files changed, 711 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/cmh/cmh_hmac.c
 create mode 100644 drivers/crypto/cmh/include/cmh_hmac.h

diff --git a/drivers/crypto/cmh/Makefile b/drivers/crypto/cmh/Makefile
index c0531f416229..1f760c0214ef 100644
--- a/drivers/crypto/cmh/Makefile
+++ b/drivers/crypto/cmh/Makefile
@@ -15,7 +15,8 @@ cmh-y := \
        cmh_sysfs.o \
        cmh_key.o \
        cmh_sys.o \
-       cmh_hash.o
+       cmh_hash.o \
+       cmh_hmac.o

 # Management ioctl device (/dev/cmh_mgmt): key lifecycle, PKE, PQC ioctls.
 cmh-$(CONFIG_CRYPTO_DEV_CMH_MGMT) += \
diff --git a/drivers/crypto/cmh/cmh_hmac.c b/drivers/crypto/cmh/cmh_hmac.c
new file mode 100644
index 000000000000..1f536088eabf
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_hmac.c
@@ -0,0 +1,684 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API HMAC Driver
+ *
+ * Registers HMAC ahash algorithms with the Linux crypto subsystem.
+ * Supports HMAC-SHA-2 (224/256/384/512) and HMAC-SHA-3 (224/256/384/512)
+ * using the CMH Hash Core (HC) via HC_CMD_HMAC.
+ *
+ * Uses the same self-contained transaction model as cmh_hash.c:
+ *   .setkey() -> store raw key bytes
+ *   .init()   -> software-only: initialize per-request context
+ *   .update() -> software-only: copy SG data into per-call chunk
+ *   .final()  -> [SYS_CMD_WRITE] + HC_CMD_HMAC + [GATHER] + FINAL + FLUSH
+ *
+ * Raw-key atomicity: SYS_CMD_WRITE to SYS_REF_TEMP is packed into
+ * the same VCQ as HC_CMD_HMAC (see cmh_key.h for details).
+ *
+ * ahash .export()/.import() (state cloning): supported at the
+ * software accumulation level only.  The HW hash core does NOT
+ * support save/restore of intermediate HMAC state (SHA3 sponge
+ * invertibility, SHA2 blocked for consistency).  Since this driver
+ * accumulates all input data in kernel memory before submitting
+ * atomically in .final(), export/import simply serializes the
+ * input queue -- no keying material or HW state is exposed.
+ *
+ * All HMAC data is accumulated in kernel memory and capped at
+ * HMAC_MAX_DATA (64 KB).
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/internal/hash.h>
+#include <crypto/hash.h>
+#include <linux/scatterlist.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "cmh_hmac.h"
+#include "cmh_vcq.h"
+#include "cmh_hc_abi.h"
+#include "cmh_sys_abi.h"
+#include "cmh_sys.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+#include "cmh_key.h"
+
+/*
+ * Maximum data that can be accumulated across .update() calls.
+ * HMAC save/restore is intentionally unsupported (see file header),
+ * so all data must be buffered in kernel memory and submitted
+ * atomically in .final().  This cap prevents unbounded allocation.
+ */
+#define HMAC_MAX_DATA          (64 * 1024)
+
+/* Algorithm Table */
+
+struct cmh_hmac_alg_info {
+       u32         hc_algo;        /* HC_ALGO_* */
+       u32         digest_size;    /* bytes */
+       u32         block_size;     /* cra_blocksize */
+       const char *alg_name;       /* Linux crypto name: "hmac(sha256)" */
+       const char *drv_name;       /* driver name: "cri-cmh-hmac-sha256" */
+};
+
+static const struct cmh_hmac_alg_info cmh_hmac_algs_info[] = {
+       /* HMAC-SHA-2 family */
+       {
+               .hc_algo     = HC_ALGO_SHA2_224,
+               .digest_size = CMH_SHA224_DIGEST_SIZE,
+               .block_size  = 64,
+               .alg_name    = "hmac(sha224)",
+               .drv_name    = "cri-cmh-hmac-sha224",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA2_256,
+               .digest_size = CMH_SHA256_DIGEST_SIZE,
+               .block_size  = 64,
+               .alg_name    = "hmac(sha256)",
+               .drv_name    = "cri-cmh-hmac-sha256",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA2_384,
+               .digest_size = CMH_SHA384_DIGEST_SIZE,
+               .block_size  = 128,
+               .alg_name    = "hmac(sha384)",
+               .drv_name    = "cri-cmh-hmac-sha384",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA2_512,
+               .digest_size = CMH_SHA512_DIGEST_SIZE,
+               .block_size  = 128,
+               .alg_name    = "hmac(sha512)",
+               .drv_name    = "cri-cmh-hmac-sha512",
+       },
+       /* HMAC-SHA-3 family */
+       {
+               .hc_algo     = HC_ALGO_SHA3_224,
+               .digest_size = CMH_SHA3_224_DIGEST_SIZE,
+               .block_size  = 144,
+               .alg_name    = "hmac(sha3-224)",
+               .drv_name    = "cri-cmh-hmac-sha3-224",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA3_256,
+               .digest_size = CMH_SHA3_256_DIGEST_SIZE,
+               .block_size  = 136,
+               .alg_name    = "hmac(sha3-256)",
+               .drv_name    = "cri-cmh-hmac-sha3-256",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA3_384,
+               .digest_size = CMH_SHA3_384_DIGEST_SIZE,
+               .block_size  = 104,
+               .alg_name    = "hmac(sha3-384)",
+               .drv_name    = "cri-cmh-hmac-sha3-384",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA3_512,
+               .digest_size = CMH_SHA3_512_DIGEST_SIZE,
+               .block_size  = 72,
+               .alg_name    = "hmac(sha3-512)",
+               .drv_name    = "cri-cmh-hmac-sha3-512",
+       },
+};
+
+#define CMH_HMAC_ALG_COUNT  ARRAY_SIZE(cmh_hmac_algs_info)
+
+/* Per-Request State */
+
+struct cmh_hmac_chunk {
+       struct list_head  list;
+       struct list_head  tfm_node; /* per-tfm orphan tracking */
+       u32               len;
+       u8                data[];
+};
+
+/*
+ * Maximum payload commands any HMAC transaction can produce:
+ *   [SYS_CMD_WRITE] + HC_CMD_HMAC + [GATHER] + FINAL + FLUSH = 5
+ * Worst-case packed output (stride=7, 1 payload per VCQ):
+ *   5 VCQs x 2 entries = 10
+ */
+#define CMH_HMAC_MAX_PAYLOAD    5
+#define CMH_HMAC_MAX_PACKED     (CMH_HMAC_MAX_PAYLOAD * 2)
+
+struct cmh_hmac_reqctx {
+       const struct cmh_hmac_alg_info *info;
+       int                             error;
+       struct list_head                chunks;
+       u32                             num_chunks;
+       u32                             total_len;
+       /* DMA state for async final */
+       dma_addr_t                      digest_dma;
+       dma_addr_t                      key_dma;
+       u8                             *digest_buf;
+       struct cmh_sg_map              *sgm;
+       u32                             keylen;
+       struct vcq_cmd packed[CMH_HMAC_MAX_PACKED];
+};
+
+/* Flat state for export/import -- holds accumulated input data only */
+struct cmh_hmac_export_state {
+       u32 total_len;
+       u8  data[];
+};
+
+/*
+ * Flat state buffer for export/import.  The CMH hash core does not
+ * support save/restore of intermediate HMAC state, so this driver
+ * accumulates input in SW and serialises the buffer on export.
+ *
+ * PAGE_SIZE (4096) caps the exportable accumulated-data window.
+ * Full-range export (up to HMAC_MAX_DATA = 64 KB) is not feasible
+ * because the crypto subsystem pre-allocates statesize bytes per
+ * request.  Export returns -EINVAL if the caller has accumulated
+ * more than CMH_HMAC_EXPORT_MAX.
+ */
+#define CMH_HMAC_STATE_SIZE 4096
+#define CMH_HMAC_EXPORT_MAX (CMH_HMAC_STATE_SIZE - sizeof(struct cmh_hmac_export_state))
+
+/* Per-Transform State (carries key across requests) */
+
+struct cmh_hmac_tfm_ctx {
+       struct cmh_key_ctx key;
+       spinlock_t         chunk_lock;  /* protects all_chunks */
+       struct list_head   all_chunks;  /* orphan-safe chunk tracking */
+};
+
+/* VCQ Builders (HMAC-specific; shared builders in cmh_hc_abi.h / cmh_vcq.h) */
+
+/* Add an HC_CMD_HMAC entry */
+static void vcq_add_hc_hmac(struct vcq_cmd *slot, u32 core_id, u64 key_ref,
+                           u32 keylen, u32 algo)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, HC_CMD_HMAC);
+       slot->hwc.hc.cmd_hmac.key = key_ref;
+       slot->hwc.hc.cmd_hmac.keylen = keylen;
+       slot->hwc.hc.cmd_hmac.algo = algo;
+}
+
+/* Request Context Cleanup */
+
+static void cmh_hmac_free_chunks(struct cmh_hmac_reqctx *rctx,
+                                struct cmh_hmac_tfm_ctx *tctx)
+{
+       struct cmh_hmac_chunk *chunk, *tmp;
+
+       spin_lock_bh(&tctx->chunk_lock);
+       list_for_each_entry_safe(chunk, tmp, &rctx->chunks, list) {
+               list_del(&chunk->list);
+               list_del(&chunk->tfm_node);
+               kfree_sensitive(chunk);
+       }
+       spin_unlock_bh(&tctx->chunk_lock);
+       rctx->num_chunks = 0;
+       rctx->total_len = 0;
+}
+
+/*
+ * Build a DMA-mapped CMH eSW scatter-gather chain from accumulated chunks.
+ */
+static struct cmh_sg_map *
+cmh_hmac_build_sg(struct cmh_hmac_reqctx *rctx, gfp_t gfp)
+{
+       struct cmh_dma_buf *bufs;
+       struct cmh_hmac_chunk *chunk;
+       struct cmh_sg_map *sgm;
+       u32 i;
+
+       bufs = kcalloc(rctx->num_chunks, sizeof(*bufs), gfp);
+       if (!bufs)
+               return NULL;
+
+       i = 0;
+       list_for_each_entry(chunk, &rctx->chunks, list) {
+               bufs[i].data = chunk->data;
+               bufs[i].len = chunk->len;
+               i++;
+       }
+
+       sgm = cmh_dma_build_sg(bufs, rctx->num_chunks, gfp);
+       kfree(bufs);
+       return sgm;
+}
+
+/* VCQ Packing + Submit */
+
+/* ahash Operations */
+
+struct cmh_hmac_alg_drv {
+       struct ahash_alg                  alg;
+       const struct cmh_hmac_alg_info   *info;
+};
+
+static const struct cmh_hmac_alg_info *
+cmh_hmac_get_info(struct crypto_ahash *tfm)
+{
+       struct ahash_alg *alg = crypto_ahash_alg(tfm);
+
+       return container_of(alg, struct cmh_hmac_alg_drv, alg)->info;
+}
+
+static int cmh_hmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+                          unsigned int keylen)
+{
+       struct cmh_hmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+
+       return cmh_key_setkey_raw(&tctx->key, key, keylen, CORE_ID_HC);
+}
+
+static int cmh_hmac_init(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_hmac_reqctx *rctx = ahash_request_ctx(req);
+
+       rctx->info = cmh_hmac_get_info(tfm);
+       rctx->error = 0;
+       INIT_LIST_HEAD(&rctx->chunks);
+       rctx->num_chunks = 0;
+       rctx->total_len = 0;
+
+       return 0;
+}
+
+static int cmh_hmac_update(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_hmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_hmac_reqctx *rctx = ahash_request_ctx(req);
+       struct cmh_hmac_chunk *chunk;
+       int nents;
+
+       if (rctx->error)
+               return rctx->error;
+
+       if (!req->nbytes)
+               return 0;
+
+       if (req->nbytes > HMAC_MAX_DATA - rctx->total_len) {
+               rctx->error = -EINVAL;
+               goto err_free_chunks;
+       }
+
+       chunk = kmalloc(sizeof(*chunk) + req->nbytes,
+                       req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                       GFP_KERNEL : GFP_ATOMIC);
+       if (!chunk) {
+               rctx->error = -ENOMEM;
+               goto err_free_chunks;
+       }
+
+       chunk->len = req->nbytes;
+       if (req->base.flags & CRYPTO_AHASH_REQ_VIRT) {
+               memcpy(chunk->data, req->svirt, req->nbytes);
+       } else {
+               nents = sg_nents_for_len(req->src, req->nbytes);
+               if (nents < 0 ||
+                   sg_copy_to_buffer(req->src, nents,
+                                     chunk->data, req->nbytes) != req->nbytes) {
+                       kfree_sensitive(chunk);
+                       rctx->error = -EINVAL;
+                       goto err_free_chunks;
+               }
+       }
+
+       list_add_tail(&chunk->list, &rctx->chunks);
+       spin_lock_bh(&tctx->chunk_lock);
+       list_add_tail(&chunk->tfm_node, &tctx->all_chunks);
+       spin_unlock_bh(&tctx->chunk_lock);
+       rctx->num_chunks++;
+       rctx->total_len += req->nbytes;
+
+       return 0;
+
+err_free_chunks:
+       /*
+        * Terminal error -- free all previously accumulated chunks.
+        * The crypto API hash path does not call .final()
+        * on error, and hash_sock_destruct has no per-request
+        * destructor, so chunks would be orphaned otherwise.
+        */
+       cmh_hmac_free_chunks(rctx, tctx);
+       return rctx->error;
+}
+
+static void cmh_hmac_complete(void *data, int error)
+{
+       struct ahash_request *req = data;
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_hmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_hmac_reqctx *rctx = ahash_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       cmh_dma_unmap_single(rctx->digest_dma, rctx->info->digest_size,
+                            DMA_FROM_DEVICE);
+
+       if (!error)
+               memcpy(req->result, rctx->digest_buf,
+                      rctx->info->digest_size);
+
+       kfree(rctx->digest_buf);
+       rctx->digest_buf = NULL;
+       cmh_dma_free_sg(rctx->sgm);
+       rctx->sgm = NULL;
+       cmh_hmac_free_chunks(rctx, tctx);
+       cmh_complete(&req->base, error);
+}
+
+static int cmh_hmac_final(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_hmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_hmac_reqctx *rctx = ahash_request_ctx(req);
+       const struct cmh_hmac_alg_info *info = rctx->info;
+       struct vcq_cmd cmds[CMH_HMAC_MAX_PAYLOAD];
+       struct cmh_sg_map *sgm = NULL;
+       dma_addr_t digest_dma = DMA_MAPPING_ERROR, key_dma = DMA_MAPPING_ERROR;
+       u8 *digest_buf;
+       u64 key_ref;
+       u32 keylen;
+       struct core_dispatch d;
+       s32 target_mbx;
+       u32 core_id;
+       u32 idx;
+       int ret;
+       gfp_t gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                  GFP_KERNEL : GFP_ATOMIC;
+
+       if (rctx->error) {
+               ret = rctx->error;
+               goto out_free;
+       }
+
+       if (tctx->key.mode == CMH_KEY_NONE) {
+               ret = -ENOKEY;
+               goto out_free;
+       }
+
+       if (rctx->num_chunks > 0) {
+               sgm = cmh_hmac_build_sg(rctx, gfp);
+               if (!sgm) {
+                       ret = -ENOMEM;
+                       goto out_free;
+               }
+       }
+
+       digest_buf = kzalloc(info->digest_size, gfp);
+       if (!digest_buf) {
+               ret = -ENOMEM;
+               goto out_free_sg;
+       }
+       digest_dma = cmh_dma_map_single(digest_buf, info->digest_size,
+                                       DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(digest_dma)) {
+               ret = -ENOMEM;
+               goto out_free_digest;
+       }
+
+       /* Resolve key reference */
+       idx = 0;
+
+       /*
+        * Raw key: pack SYS_CMD_WRITE(SYS_REF_TEMP) into the
+        * same VCQ so the key write + HMAC are atomic.
+        */
+       key_dma = tctx->key.raw.dma;
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP, (u64)key_dma,
+                         SYS_REF_NONE, tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       keylen = tctx->key.raw.len;
+       d = cmh_core_select_instance(CMH_CORE_HC);
+
+       target_mbx = d.mbx_idx;
+
+       core_id = d.core_id;
+
+       vcq_add_hc_hmac(&cmds[idx++], core_id, key_ref, keylen, info->hc_algo);
+
+       if (sgm)
+               vcq_add_hc_gather(&cmds[idx++], core_id, (u64)sgm->items_dma,
+                                 HC_CMD_UPDATE);
+
+       vcq_add_hc_final(&cmds[idx++], core_id, (u64)digest_dma, info->digest_size);
+       vcq_add_flush(&cmds[idx++], core_id);
+
+       rctx->digest_buf = digest_buf;
+       rctx->digest_dma = digest_dma;
+       rctx->sgm = sgm;
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_HMAC_MAX_PACKED,
+                                           target_mbx,
+                                           cmh_hmac_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto out_cleanup_all;
+
+       return -EINPROGRESS;
+
+out_cleanup_all:
+       cmh_dma_unmap_single(digest_dma, info->digest_size,
+                            DMA_FROM_DEVICE);
+out_free_digest:
+       kfree(digest_buf);
+
+out_free_sg:
+       cmh_dma_free_sg(sgm);
+
+out_free:
+       cmh_hmac_free_chunks(rctx, tctx);
+       return ret;
+}
+
+static int cmh_hmac_finup(struct ahash_request *req)
+{
+       int ret;
+
+       ret = cmh_hmac_update(req);
+       if (ret)
+               return ret;
+
+       return cmh_hmac_final(req);
+}
+
+static int cmh_hmac_digest(struct ahash_request *req)
+{
+       int ret;
+
+       ret = cmh_hmac_init(req);
+       if (ret)
+               return ret;
+
+       return cmh_hmac_finup(req);
+}
+
+/*
+ * ahash .export()/.import(): serialize/deserialize the software
+ * accumulation buffer.  No HW state is involved.
+ */
+
+static int cmh_hmac_export(struct ahash_request *req, void *out)
+{
+       struct cmh_hmac_reqctx *rctx = ahash_request_ctx(req);
+       struct cmh_hmac_export_state *state = out;
+       struct cmh_hmac_chunk *chunk;
+       u32 offset = 0;
+
+       if (rctx->total_len > CMH_HMAC_EXPORT_MAX)
+               return -ENOSPC;
+
+       state->total_len = rctx->total_len;
+       list_for_each_entry(chunk, &rctx->chunks, list) {
+               memcpy(state->data + offset, chunk->data, chunk->len);
+               offset += chunk->len;
+       }
+       return 0;
+}
+
+static int cmh_hmac_import(struct ahash_request *req, const void *in)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_hmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_hmac_reqctx *rctx = ahash_request_ctx(req);
+       const struct cmh_hmac_export_state *state = in;
+       struct cmh_hmac_chunk *chunk;
+
+       /*
+        * Do NOT call free_chunks() here: the crypto API does not
+        * guarantee the request context is in a valid state before
+        * import(), so the list pointers may be stale or invalid.
+        * Re-initialize from scratch instead.  Any pre-existing chunks
+        * are tracked on tctx->all_chunks and freed in cra_exit.
+        */
+       rctx->info = cmh_hmac_get_info(tfm);
+       rctx->error = 0;
+       INIT_LIST_HEAD(&rctx->chunks);
+       rctx->num_chunks = 0;
+       rctx->total_len = 0;
+
+       if (state->total_len > CMH_HMAC_EXPORT_MAX)
+               return -EINVAL;
+
+       if (state->total_len) {
+               chunk = kmalloc(sizeof(*chunk) + state->total_len, GFP_KERNEL);
+               if (!chunk)
+                       return -ENOMEM;
+               chunk->len = state->total_len;
+               memcpy(chunk->data, state->data, state->total_len);
+               list_add_tail(&chunk->list, &rctx->chunks);
+               spin_lock_bh(&tctx->chunk_lock);
+               list_add_tail(&chunk->tfm_node, &tctx->all_chunks);
+               spin_unlock_bh(&tctx->chunk_lock);
+               rctx->num_chunks = 1;
+               rctx->total_len = state->total_len;
+       }
+       return 0;
+}
+
+/* Transform init/exit (cra_init/cra_exit) */
+
+static int cmh_hmac_cra_init(struct crypto_tfm *tfm)
+{
+       struct cmh_hmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+
+       memset(tctx, 0, sizeof(*tctx));
+       tctx->key.mode = CMH_KEY_NONE;
+       spin_lock_init(&tctx->chunk_lock);
+       INIT_LIST_HEAD(&tctx->all_chunks);
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct cmh_hmac_reqctx));
+       return 0;
+}
+
+static void cmh_hmac_cra_exit(struct crypto_tfm *tfm)
+{
+       struct cmh_hmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+       struct cmh_hmac_chunk *chunk, *tmp;
+
+       /* Free any orphaned chunks (e.g. testmgr export/reimport poison) */
+       spin_lock_bh(&tctx->chunk_lock);
+       list_for_each_entry_safe(chunk, tmp, &tctx->all_chunks, tfm_node) {
+               list_del(&chunk->tfm_node);
+               kfree_sensitive(chunk);
+       }
+       spin_unlock_bh(&tctx->chunk_lock);
+
+       cmh_key_destroy(&tctx->key);
+}
+
+/* Registration */
+
+static struct cmh_hmac_alg_drv cmh_hmac_drvs[CMH_HMAC_ALG_COUNT];
+
+/**
+ * cmh_hmac_register() - Register HMAC-SHA hash algorithms with the crypto framework
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_hmac_register(void)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < CMH_HMAC_ALG_COUNT; i++) {
+               const struct cmh_hmac_alg_info *info = &cmh_hmac_algs_info[i];
+               struct cmh_hmac_alg_drv *drv = &cmh_hmac_drvs[i];
+               struct ahash_alg *alg = &drv->alg;
+
+               drv->info = info;
+
+               alg->init   = cmh_hmac_init;
+               alg->update = cmh_hmac_update;
+               alg->final  = cmh_hmac_final;
+               alg->finup  = cmh_hmac_finup;
+               alg->digest = cmh_hmac_digest;
+               alg->export = cmh_hmac_export;
+               alg->import = cmh_hmac_import;
+               alg->setkey = cmh_hmac_setkey;
+
+               alg->halg.digestsize = info->digest_size;
+               alg->halg.statesize  = CMH_HMAC_STATE_SIZE;
+
+               strscpy(alg->halg.base.cra_name, info->alg_name,
+                       CRYPTO_MAX_ALG_NAME);
+               strscpy(alg->halg.base.cra_driver_name, info->drv_name,
+                       CRYPTO_MAX_ALG_NAME);
+               alg->halg.base.cra_priority    = 300;
+               alg->halg.base.cra_flags       = CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                                CRYPTO_ALG_NO_FALLBACK |
+                                                CRYPTO_ALG_ASYNC |
+                                                CRYPTO_ALG_REQ_VIRT;
+               alg->halg.base.cra_blocksize   = info->block_size;
+               alg->halg.base.cra_ctxsize     = sizeof(struct cmh_hmac_tfm_ctx);
+               alg->halg.base.cra_init        = cmh_hmac_cra_init;
+               alg->halg.base.cra_exit        = cmh_hmac_cra_exit;
+               alg->halg.base.cra_module      = THIS_MODULE;
+
+               ret = crypto_register_ahash(alg);
+               if (ret) {
+                       dev_err(cmh_dev(), "hmac: failed to register %s (rc=%d)\n",
+                               info->drv_name, ret);
+                       while (i--)
+                               crypto_unregister_ahash(&cmh_hmac_drvs[i].alg);
+                       return ret;
+               }
+
+               dev_dbg(cmh_dev(), "hmac: registered %s (priority 300)\n",
+                       info->drv_name);
+       }
+
+       dev_info(cmh_dev(), "hmac: %zu algorithm(s) registered\n",
+                CMH_HMAC_ALG_COUNT);
+       return 0;
+}
+
+/**
+ * cmh_hmac_unregister() - Unregister HMAC-SHA hash algorithms from the crypto framework
+ */
+void cmh_hmac_unregister(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < CMH_HMAC_ALG_COUNT; i++) {
+               crypto_unregister_ahash(&cmh_hmac_drvs[i].alg);
+               dev_dbg(cmh_dev(), "hmac: unregistered %s\n",
+                       cmh_hmac_algs_info[i].drv_name);
+       }
+
+       dev_info(cmh_dev(), "hmac: cleaned up\n");
+}
diff --git a/drivers/crypto/cmh/cmh_main.c b/drivers/crypto/cmh/cmh_main.c
index e8e30b893932..c18219197bd8 100644
--- a/drivers/crypto/cmh/cmh_main.c
+++ b/drivers/crypto/cmh/cmh_main.c
@@ -30,6 +30,7 @@
 #include "cmh_txn.h"
 #include "cmh_rh.h"
 #include "cmh_hash.h"
+#include "cmh_hmac.h"
 #include "cmh_mgmt.h"
 #include "cmh_registers.h"
 #include "cmh_debugfs.h"
@@ -197,6 +198,11 @@ static int cmh_probe(struct platform_device *pdev)
        if (ret)
                goto err_hash_register;

+       /* Register HMAC hash algorithms */
+       ret = cmh_hmac_register();
+       if (ret)
+               goto err_hmac_register;
+
        /* Register key management device (/dev/cmh_mgmt) */
        ret = cmh_mgmt_register();
        if (ret)
@@ -209,6 +215,8 @@ static int cmh_probe(struct platform_device *pdev)
        return 0;

 err_mgmt_register:
+       cmh_hmac_unregister();
+err_hmac_register:
        cmh_hash_unregister();
 err_hash_register:
        cmh_rh_cleanup(cfg);
@@ -237,6 +245,7 @@ static void cmh_remove(struct platform_device *pdev)
        cfg = &dev->config;

        cmh_mgmt_unregister();
+       cmh_hmac_unregister();
        cmh_hash_unregister();
        cmh_rh_cleanup(cfg);
        cmh_tm_cleanup();
diff --git a/drivers/crypto/cmh/include/cmh_hmac.h b/drivers/crypto/cmh/include/cmh_hmac.h
new file mode 100644
index 000000000000..fb1a11fb76eb
--- /dev/null
+++ b/drivers/crypto/cmh/include/cmh_hmac.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API HMAC Driver
+ *
+ * Registers HMAC ahash algorithms (HMAC-SHA-2, HMAC-SHA-3) with the
+ * Linux crypto subsystem using HC_CMD_HMAC.
+ */
+
+#ifndef CMH_HMAC_H
+#define CMH_HMAC_H
+
+int  cmh_hmac_register(void);
+void cmh_hmac_unregister(void);
+
+#endif /* CMH_HMAC_H */
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 07/19] crypto: cmh - add SM3 ahash
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Register the SM3 ahash algorithm using the CMH SM3 core (core ID
0x05).  Supports incremental update/finup/final and export/import.

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 drivers/crypto/cmh/Makefile          |   3 +-
 drivers/crypto/cmh/cmh_main.c        |   9 +
 drivers/crypto/cmh/cmh_sm3.c         | 651 +++++++++++++++++++++++++++
 drivers/crypto/cmh/include/cmh_sm3.h |  27 ++
 4 files changed, 689 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/cmh/cmh_sm3.c
 create mode 100644 drivers/crypto/cmh/include/cmh_sm3.h

diff --git a/drivers/crypto/cmh/Makefile b/drivers/crypto/cmh/Makefile
index 2bb240b97f31..b3018fbcf211 100644
--- a/drivers/crypto/cmh/Makefile
+++ b/drivers/crypto/cmh/Makefile
@@ -18,7 +18,8 @@ cmh-y := \
        cmh_hash.o \
        cmh_hmac.o \
        cmh_cshake.o \
-       cmh_kmac.o
+       cmh_kmac.o \
+       cmh_sm3.o

 # Management ioctl device (/dev/cmh_mgmt): key lifecycle, PKE, PQC ioctls.
 cmh-$(CONFIG_CRYPTO_DEV_CMH_MGMT) += \
diff --git a/drivers/crypto/cmh/cmh_main.c b/drivers/crypto/cmh/cmh_main.c
index f04cc6855963..56541e0d4219 100644
--- a/drivers/crypto/cmh/cmh_main.c
+++ b/drivers/crypto/cmh/cmh_main.c
@@ -33,6 +33,7 @@
 #include "cmh_hmac.h"
 #include "cmh_cshake.h"
 #include "cmh_kmac.h"
+#include "cmh_sm3.h"
 #include "cmh_mgmt.h"
 #include "cmh_registers.h"
 #include "cmh_debugfs.h"
@@ -215,6 +216,11 @@ static int cmh_probe(struct platform_device *pdev)
        if (ret)
                goto err_kmac_register;

+       /* Register SM3 hash algorithm */
+       ret = cmh_sm3_register();
+       if (ret)
+               goto err_sm3_register;
+
        /* Register key management device (/dev/cmh_mgmt) */
        ret = cmh_mgmt_register();
        if (ret)
@@ -227,6 +233,8 @@ static int cmh_probe(struct platform_device *pdev)
        return 0;

 err_mgmt_register:
+       cmh_sm3_unregister();
+err_sm3_register:
        cmh_kmac_unregister();
 err_kmac_register:
        cmh_cshake_unregister();
@@ -261,6 +269,7 @@ static void cmh_remove(struct platform_device *pdev)
        cfg = &dev->config;

        cmh_mgmt_unregister();
+       cmh_sm3_unregister();
        cmh_kmac_unregister();
        cmh_cshake_unregister();
        cmh_hmac_unregister();
diff --git a/drivers/crypto/cmh/cmh_sm3.c b/drivers/crypto/cmh/cmh_sm3.c
new file mode 100644
index 000000000000..156f93da70af
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_sm3.c
@@ -0,0 +1,651 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- SM3 Hash Driver (CORE_ID_SM3)
+ *
+ * Registers an asynchronous hash (ahash) algorithm for SM3
+ * (GB/T 32905-2016) using the CMH SM3 core.  This is a standalone
+ * driver separate from cmh_hash.c (which handles HC-based SHA-2/3/SHAKE)
+ * because SM3 runs on a different hardware core with its own command
+ * IDs and context layout.
+ *
+ * Incremental HW update model (same pattern as cmh_hash.c):
+ *
+ *   .init()   -> software-only: zero per-request context
+ *   .update() -> buffer data in holdback; when >= block_size bytes:
+ *                SM3_CMD_INIT [+ RESTORE] + UPDATE + SAVE + FLUSH
+ *                -> return -EINPROGRESS  (else return 0)
+ *   .final()  -> SM3_CMD_INIT [+ RESTORE] [+ UPDATE] + FINAL + FLUSH
+ *   .finup()  -> linearise holdback + new data, then final path
+ *   .digest() -> INIT + UPDATE + FINAL + FLUSH (single-shot, zero-copy)
+ *   .export() -> software-only: copy checkpoint + holdback to out
+ *   .import() -> software-only: restore checkpoint + holdback from in
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "cmh_sm3.h"
+#include "cmh_vcq.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+
+/* Per-Request State */
+
+/*
+ * Exported SM3 state -- serialised by .export(), deserialised by
+ * .import().  This is what statesize advertises to the crypto subsystem.
+ */
+struct cmh_sm3_export_state {
+       u8  checkpoint[SM3_CONTEXT_SIZE]; /* SM3 context from last SAVE */
+       u8  buf[CMH_SM3_BLOCK_SIZE];     /* holdback buffer */
+       u32 buf_len;                      /* valid bytes in buf[] */
+       u32 hw_started;                   /* non-zero if checkpoint valid */
+};
+
+#define CMH_SM3_MAX_PAYLOAD    5   /* INIT + RESTORE + UPDATE + FINAL/SAVE + FLUSH */
+#define CMH_SM3_MAX_PACKED     (CMH_SM3_MAX_PAYLOAD * 2)
+
+/*
+ * Checkpoint embedded inline: the kernel ahash API has no per-request
+ * destructor, so a heap-allocated checkpoint leaks if a request is
+ * abandoned without .final().
+ */
+struct cmh_sm3_reqctx {
+       int    error;
+       u32    hw_started;
+       u32    buf_len;
+       u32    has_checkpoint;
+       u8     checkpoint[SM3_CONTEXT_SIZE]; /* SM3 context from last SAVE */
+       /* DMA state for current async operation */
+       dma_addr_t ckpt_dma;
+       dma_addr_t save_dma;
+       dma_addr_t data_dma;
+       dma_addr_t digest_dma;
+       u8    *save_buf;
+       u8    *data_buf;
+       u32    data_len;
+       u8    *digest_buf;
+       u8     buf[CMH_SM3_BLOCK_SIZE]; /* holdback for partial block */
+       struct vcq_cmd packed[CMH_SM3_MAX_PACKED];
+};
+
+/* VCQ Builders -- SM3 core (CORE_ID_SM3); generic flush from cmh_vcq.h */
+
+static void vcq_add_sm3_init(struct vcq_cmd *slot, u32 core_id)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM3_CMD_INIT);
+       /* SM3 has a single algorithm -- no algo selector field */
+}
+
+static void vcq_add_sm3_update(struct vcq_cmd *slot, u32 core_id, u64 input_phys, u32 len)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM3_CMD_UPDATE);
+       slot->hwc.sm3.cmd_update.input = input_phys;
+       slot->hwc.sm3.cmd_update.inlen = len;
+}
+
+static void vcq_add_sm3_final(struct vcq_cmd *slot, u32 core_id, u64 digest_phys, u32 outlen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM3_CMD_FINAL);
+       slot->hwc.sm3.cmd_final.digest = digest_phys;
+       slot->hwc.sm3.cmd_final.outlen = outlen;
+}
+
+static void vcq_add_sm3_save(struct vcq_cmd *slot, u32 core_id, u64 output_phys, u32 outlen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM3_CMD_SAVE);
+       slot->hwc.sm3.cmd_save.output = output_phys;
+       slot->hwc.sm3.cmd_save.outlen = outlen;
+}
+
+static void vcq_add_sm3_restore(struct vcq_cmd *slot, u32 core_id, u64 input_phys, u32 inlen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, SM3_CMD_RESTORE);
+       slot->hwc.sm3.cmd_restore.input = input_phys;
+       slot->hwc.sm3.cmd_restore.inlen = inlen;
+}
+
+/* Request Context Cleanup */
+
+static void cmh_sm3_free_reqctx(struct cmh_sm3_reqctx *rctx)
+{
+       rctx->has_checkpoint = 0;
+}
+
+/* VCQ Packing + Submit */
+
+/* ahash Operations */
+
+static int cmh_sm3_init(struct ahash_request *req)
+{
+       struct cmh_sm3_reqctx *rctx = ahash_request_ctx(req);
+
+       memset(rctx, 0, sizeof(*rctx));
+       return 0;
+}
+
+/*
+ * Update completion -- takes ownership of save_buf as new checkpoint.
+ */
+static void cmh_sm3_update_complete(void *data, int error)
+{
+       struct ahash_request *req = data;
+       struct cmh_sm3_reqctx *rctx = ahash_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       if (rctx->has_checkpoint)
+               cmh_dma_unmap_single(rctx->ckpt_dma, SM3_CONTEXT_SIZE,
+                                    DMA_TO_DEVICE);
+       cmh_dma_unmap_single(rctx->save_dma, SM3_CONTEXT_SIZE,
+                            DMA_FROM_DEVICE);
+       cmh_dma_unmap_single(rctx->data_dma, rctx->data_len,
+                            DMA_TO_DEVICE);
+
+       if (!error) {
+               memcpy(rctx->checkpoint, rctx->save_buf, SM3_CONTEXT_SIZE);
+               rctx->has_checkpoint = 1;
+               kfree(rctx->save_buf);
+               rctx->save_buf = NULL;
+               rctx->hw_started = 1;
+       } else {
+               kfree(rctx->save_buf);
+               rctx->save_buf = NULL;
+               rctx->error = error;
+       }
+
+       kfree(rctx->data_buf);
+       rctx->data_buf = NULL;
+       rctx->data_len = 0;
+
+       cmh_complete(&req->base, error);
+}
+
+static int cmh_sm3_update(struct ahash_request *req)
+{
+       struct cmh_sm3_reqctx *rctx = ahash_request_ctx(req);
+       struct vcq_cmd cmds[CMH_SM3_MAX_PAYLOAD];
+       struct core_dispatch d;
+       u32 total_avail, full_len, tail_len, from_src;
+       u32 idx;
+       int ret;
+       gfp_t gfp;
+
+       if (rctx->error)
+               return rctx->error;
+
+       if (!req->nbytes)
+               return 0;
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       total_avail = rctx->buf_len + req->nbytes;
+
+       if (total_avail < CMH_SM3_BLOCK_SIZE) {
+               if (req->base.flags & CRYPTO_AHASH_REQ_VIRT)
+                       memcpy(rctx->buf + rctx->buf_len,
+                              req->svirt, req->nbytes);
+               else
+                       scatterwalk_map_and_copy(rctx->buf + rctx->buf_len,
+                                                req->src, 0,
+                                                req->nbytes, 0);
+               rctx->buf_len = total_avail;
+               return 0;
+       }
+
+       full_len = total_avail - total_avail % CMH_SM3_BLOCK_SIZE;
+       tail_len = total_avail - full_len;
+       from_src = full_len - rctx->buf_len;
+
+       rctx->data_buf = kmalloc(full_len, gfp);
+       if (!rctx->data_buf)
+               return -ENOMEM;
+
+       if (rctx->buf_len > 0)
+               memcpy(rctx->data_buf, rctx->buf, rctx->buf_len);
+
+       if (from_src > 0) {
+               if (req->base.flags & CRYPTO_AHASH_REQ_VIRT)
+                       memcpy(rctx->data_buf + rctx->buf_len,
+                              req->svirt, from_src);
+               else
+                       scatterwalk_map_and_copy(rctx->data_buf + rctx->buf_len,
+                                                req->src, 0,
+                                                from_src, 0);
+       }
+
+       if (tail_len > 0) {
+               if (req->base.flags & CRYPTO_AHASH_REQ_VIRT)
+                       memcpy(rctx->buf, req->svirt + from_src,
+                              tail_len);
+               else
+                       scatterwalk_map_and_copy(rctx->buf, req->src,
+                                                from_src, tail_len,
+                                                0);
+       }
+       rctx->buf_len = tail_len;
+       rctx->data_len = full_len;
+
+       rctx->save_buf = kzalloc(SM3_CONTEXT_SIZE, gfp);
+       if (!rctx->save_buf) {
+               ret = -ENOMEM;
+               goto err_free;
+       }
+
+       rctx->data_dma = cmh_dma_map_single(rctx->data_buf, full_len,
+                                           DMA_TO_DEVICE);
+       if (cmh_dma_map_error(rctx->data_dma)) {
+               ret = -ENOMEM;
+               goto err_free;
+       }
+
+       rctx->save_dma = cmh_dma_map_single(rctx->save_buf, SM3_CONTEXT_SIZE,
+                                           DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(rctx->save_dma)) {
+               ret = -ENOMEM;
+               goto err_unmap_data;
+       }
+
+       rctx->ckpt_dma = DMA_MAPPING_ERROR;
+       if (rctx->has_checkpoint) {
+               rctx->ckpt_dma = cmh_dma_map_single(rctx->checkpoint,
+                                                   SM3_CONTEXT_SIZE,
+                                                    DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->ckpt_dma)) {
+                       ret = -ENOMEM;
+                       goto err_unmap_save;
+               }
+       }
+
+       d = cmh_core_select_instance(CMH_CORE_SM3);
+       idx = 0;
+
+       vcq_add_sm3_init(&cmds[idx++], d.core_id);
+
+       if (rctx->has_checkpoint)
+               vcq_add_sm3_restore(&cmds[idx++], d.core_id,
+                                   (u64)rctx->ckpt_dma, SM3_CONTEXT_SIZE);
+
+       vcq_add_sm3_update(&cmds[idx++], d.core_id,
+                          (u64)rctx->data_dma, full_len);
+
+       vcq_add_sm3_save(&cmds[idx++], d.core_id,
+                        (u64)rctx->save_dma, SM3_CONTEXT_SIZE);
+
+       vcq_add_flush(&cmds[idx++], d.core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_SM3_MAX_PACKED,
+                                           d.mbx_idx,
+                                           cmh_sm3_update_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto err_unmap_ckpt;
+
+       return -EINPROGRESS;
+
+err_unmap_ckpt:
+       if (rctx->has_checkpoint)
+               cmh_dma_unmap_single(rctx->ckpt_dma, SM3_CONTEXT_SIZE,
+                                    DMA_TO_DEVICE);
+err_unmap_save:
+       cmh_dma_unmap_single(rctx->save_dma, SM3_CONTEXT_SIZE,
+                            DMA_FROM_DEVICE);
+err_unmap_data:
+       cmh_dma_unmap_single(rctx->data_dma, full_len, DMA_TO_DEVICE);
+err_free:
+       kfree(rctx->save_buf);
+       rctx->save_buf = NULL;
+       kfree(rctx->data_buf);
+       rctx->data_buf = NULL;
+       rctx->data_len = 0;
+       return ret;
+}
+
+static void cmh_sm3_final_complete(void *data, int error)
+{
+       struct ahash_request *req = data;
+       struct cmh_sm3_reqctx *rctx = ahash_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       if (rctx->has_checkpoint)
+               cmh_dma_unmap_single(rctx->ckpt_dma, SM3_CONTEXT_SIZE,
+                                    DMA_TO_DEVICE);
+       if (rctx->data_buf)
+               cmh_dma_unmap_single(rctx->data_dma, rctx->data_len,
+                                    DMA_TO_DEVICE);
+       cmh_dma_unmap_single(rctx->digest_dma, CMH_SM3_DIGEST_SIZE,
+                            DMA_FROM_DEVICE);
+
+       if (!error)
+               memcpy(req->result, rctx->digest_buf, CMH_SM3_DIGEST_SIZE);
+
+       kfree(rctx->digest_buf);
+       rctx->digest_buf = NULL;
+       kfree(rctx->data_buf);
+       rctx->data_buf = NULL;
+       cmh_sm3_free_reqctx(rctx);
+       cmh_complete(&req->base, error);
+}
+
+static int cmh_sm3_submit_final(struct ahash_request *req,
+                               u8 *data_buf, u32 data_len)
+{
+       struct cmh_sm3_reqctx *rctx = ahash_request_ctx(req);
+       struct vcq_cmd cmds[CMH_SM3_MAX_PAYLOAD];
+       struct core_dispatch d;
+       u32 idx;
+       int ret;
+       gfp_t gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                  GFP_KERNEL : GFP_ATOMIC;
+
+       rctx->data_buf = data_buf;
+       rctx->data_len = data_len;
+
+       rctx->digest_buf = kzalloc(CMH_SM3_DIGEST_SIZE, gfp);
+       if (!rctx->digest_buf) {
+               ret = -ENOMEM;
+               goto err_free_data;
+       }
+
+       rctx->digest_dma = cmh_dma_map_single(rctx->digest_buf,
+                                             CMH_SM3_DIGEST_SIZE,
+                                              DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(rctx->digest_dma)) {
+               ret = -ENOMEM;
+               goto err_free_digest;
+       }
+
+       rctx->data_dma = DMA_MAPPING_ERROR;
+       if (data_buf && data_len > 0) {
+               rctx->data_dma = cmh_dma_map_single(data_buf, data_len,
+                                                   DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->data_dma)) {
+                       ret = -ENOMEM;
+                       goto err_unmap_digest;
+               }
+       }
+
+       rctx->ckpt_dma = DMA_MAPPING_ERROR;
+       if (rctx->has_checkpoint) {
+               rctx->ckpt_dma = cmh_dma_map_single(rctx->checkpoint,
+                                                   SM3_CONTEXT_SIZE,
+                                                    DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->ckpt_dma)) {
+                       ret = -ENOMEM;
+                       goto err_unmap_data;
+               }
+       }
+
+       d = cmh_core_select_instance(CMH_CORE_SM3);
+       idx = 0;
+
+       vcq_add_sm3_init(&cmds[idx++], d.core_id);
+
+       if (rctx->has_checkpoint)
+               vcq_add_sm3_restore(&cmds[idx++], d.core_id,
+                                   (u64)rctx->ckpt_dma, SM3_CONTEXT_SIZE);
+
+       if (data_buf && data_len > 0)
+               vcq_add_sm3_update(&cmds[idx++], d.core_id,
+                                  (u64)rctx->data_dma, data_len);
+
+       vcq_add_sm3_final(&cmds[idx++], d.core_id,
+                         (u64)rctx->digest_dma, CMH_SM3_DIGEST_SIZE);
+
+       vcq_add_flush(&cmds[idx++], d.core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_SM3_MAX_PACKED,
+                                           d.mbx_idx,
+                                           cmh_sm3_final_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto err_unmap_ckpt;
+
+       return -EINPROGRESS;
+
+err_unmap_ckpt:
+       if (rctx->has_checkpoint)
+               cmh_dma_unmap_single(rctx->ckpt_dma, SM3_CONTEXT_SIZE,
+                                    DMA_TO_DEVICE);
+err_unmap_data:
+       if (data_buf && data_len > 0)
+               cmh_dma_unmap_single(rctx->data_dma, data_len,
+                                    DMA_TO_DEVICE);
+err_unmap_digest:
+       cmh_dma_unmap_single(rctx->digest_dma, CMH_SM3_DIGEST_SIZE,
+                            DMA_FROM_DEVICE);
+err_free_digest:
+       kfree(rctx->digest_buf);
+       rctx->digest_buf = NULL;
+err_free_data:
+       kfree(data_buf);
+       rctx->data_buf = NULL;
+       cmh_sm3_free_reqctx(rctx);
+       return ret;
+}
+
+static int cmh_sm3_final(struct ahash_request *req)
+{
+       struct cmh_sm3_reqctx *rctx = ahash_request_ctx(req);
+       u8 *data_buf = NULL;
+       u32 data_len = 0;
+       gfp_t gfp;
+
+       if (rctx->error)
+               return rctx->error;
+
+       if (rctx->buf_len > 0) {
+               gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                     GFP_KERNEL : GFP_ATOMIC;
+               data_buf = kmalloc(rctx->buf_len, gfp);
+               if (!data_buf)
+                       return -ENOMEM;
+               memcpy(data_buf, rctx->buf, rctx->buf_len);
+               data_len = rctx->buf_len;
+               rctx->buf_len = 0;
+       }
+
+       return cmh_sm3_submit_final(req, data_buf, data_len);
+}
+
+static int cmh_sm3_finup(struct ahash_request *req);
+
+/*
+ * One-shot digest -- delegates to init + finup so that all data is
+ * linearised and mapped through cmh_dma_map_single(), which is the
+ * only DMA mapping path aware of all supported DMA backends.
+ */
+static int cmh_sm3_digest(struct ahash_request *req)
+{
+       int ret;
+
+       ret = cmh_sm3_init(req);
+       if (ret)
+               return ret;
+       return cmh_sm3_finup(req);
+}
+
+static int cmh_sm3_finup(struct ahash_request *req)
+{
+       struct cmh_sm3_reqctx *rctx = ahash_request_ctx(req);
+       u32 data_len;
+       u8 *data_buf;
+       gfp_t gfp;
+
+       if (rctx->error)
+               return rctx->error;
+
+       data_len = rctx->buf_len + req->nbytes;
+
+       if (data_len == 0)
+               return cmh_sm3_submit_final(req, NULL, 0);
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       data_buf = kmalloc(data_len, gfp);
+       if (!data_buf)
+               return -ENOMEM;
+
+       if (rctx->buf_len > 0)
+               memcpy(data_buf, rctx->buf, rctx->buf_len);
+
+       if (req->nbytes > 0) {
+               if (req->base.flags & CRYPTO_AHASH_REQ_VIRT)
+                       memcpy(data_buf + rctx->buf_len,
+                              req->svirt, req->nbytes);
+               else
+                       scatterwalk_map_and_copy(data_buf + rctx->buf_len,
+                                                req->src, 0,
+                                                req->nbytes, 0);
+       }
+
+       rctx->buf_len = 0;
+       return cmh_sm3_submit_final(req, data_buf, data_len);
+}
+
+static int cmh_sm3_export(struct ahash_request *req, void *out)
+{
+       struct cmh_sm3_reqctx *rctx = ahash_request_ctx(req);
+       struct cmh_sm3_export_state *state = out;
+
+       if (rctx->hw_started && rctx->has_checkpoint)
+               memcpy(state->checkpoint, rctx->checkpoint, SM3_CONTEXT_SIZE);
+       else
+               memset(state->checkpoint, 0, SM3_CONTEXT_SIZE);
+
+       if (rctx->buf_len > 0)
+               memcpy(state->buf, rctx->buf, rctx->buf_len);
+
+       state->buf_len = rctx->buf_len;
+       state->hw_started = rctx->hw_started;
+
+       return 0;
+}
+
+static int cmh_sm3_import(struct ahash_request *req, const void *in)
+{
+       struct cmh_sm3_reqctx *rctx = ahash_request_ctx(req);
+       const struct cmh_sm3_export_state *state = in;
+
+       memset(rctx, 0, sizeof(*rctx));
+
+       if (state->buf_len > CMH_SM3_BLOCK_SIZE)
+               return -EINVAL;
+
+       rctx->hw_started = state->hw_started;
+       rctx->buf_len = state->buf_len;
+       memcpy(rctx->buf, state->buf, state->buf_len);
+
+       if (state->hw_started) {
+               memcpy(rctx->checkpoint, state->checkpoint, SM3_CONTEXT_SIZE);
+               rctx->has_checkpoint = 1;
+       }
+
+       return 0;
+}
+
+/* Transform init (cra_init) */
+
+static int cmh_sm3_cra_init(struct crypto_tfm *tfm)
+{
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct cmh_sm3_reqctx));
+       return 0;
+}
+
+/* Registration */
+
+static struct ahash_alg cmh_sm3_ahash_alg = {
+       .init    = cmh_sm3_init,
+       .update  = cmh_sm3_update,
+       .final   = cmh_sm3_final,
+       .finup   = cmh_sm3_finup,
+       .digest  = cmh_sm3_digest,
+       .export  = cmh_sm3_export,
+       .import  = cmh_sm3_import,
+
+       .halg = {
+               .digestsize = CMH_SM3_DIGEST_SIZE,
+               .statesize  = sizeof(struct cmh_sm3_export_state),
+               .base = {
+                       .cra_name        = "sm3",
+                       .cra_driver_name = "cri-cmh-sm3",
+                       .cra_priority    = 300,
+                       .cra_flags       = CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                          CRYPTO_ALG_NO_FALLBACK |
+                                          CRYPTO_ALG_ASYNC |
+                                          CRYPTO_ALG_REQ_VIRT,
+                       .cra_blocksize   = CMH_SM3_BLOCK_SIZE,
+                       .cra_ctxsize     = 0,
+                       .cra_init        = cmh_sm3_cra_init,
+                       .cra_module      = THIS_MODULE,
+               },
+       },
+};
+
+/**
+ * cmh_sm3_register() - Register SM3 hash algorithm with the crypto framework
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_sm3_register(void)
+{
+       int ret;
+
+       ret = crypto_register_ahash(&cmh_sm3_ahash_alg);
+       if (ret) {
+               dev_err(cmh_dev(), "sm3: failed to register cmh-sm3 (rc=%d)\n",
+                       ret);
+               return ret;
+       }
+
+       dev_info(cmh_dev(), "sm3: registered cri-cmh-sm3 (priority 300)\n");
+       dev_info(cmh_dev(), "sm3: 1 algorithm(s) registered\n");
+       return 0;
+}
+
+/**
+ * cmh_sm3_unregister() - Unregister SM3 hash algorithm from the crypto framework
+ */
+void cmh_sm3_unregister(void)
+{
+       crypto_unregister_ahash(&cmh_sm3_ahash_alg);
+       dev_info(cmh_dev(), "sm3: unregistered cri-cmh-sm3\n");
+       dev_info(cmh_dev(), "sm3: cleaned up\n");
+}
diff --git a/drivers/crypto/cmh/include/cmh_sm3.h b/drivers/crypto/cmh/include/cmh_sm3.h
new file mode 100644
index 000000000000..2f73537f9c87
--- /dev/null
+++ b/drivers/crypto/cmh/include/cmh_sm3.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- SM3 Hash Driver
+ *
+ * Registers an ahash algorithm for SM3 (GB/T 32905-2016) with the
+ * Linux crypto subsystem using the CMH SM3 core (CORE_ID_SM3).
+ * Uses the same incremental HW update model as cmh_hash.c:
+ *
+ *   .init()   -> software-only: zero per-request context
+ *   .update() -> holdback partial blocks; submit full blocks via
+ *                SM3_CMD_INIT [+ RESTORE] + UPDATE + SAVE + FLUSH
+ *   .final()  -> SM3_CMD_INIT [+ RESTORE] [+ UPDATE] + FINAL + FLUSH
+ *   .digest() -> INIT + UPDATE + FINAL + FLUSH (single-shot)
+ *   .export() -> software-only: copy checkpoint + holdback
+ *   .import() -> software-only: restore checkpoint + holdback
+ */
+
+#ifndef CMH_SM3_H
+#define CMH_SM3_H
+
+#include "cmh_config.h"
+
+int  cmh_sm3_register(void);
+void cmh_sm3_unregister(void);
+
+#endif /* CMH_SM3_H */
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 13/19] crypto: cmh - add ECDSA/SM2 sig
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Register ECDSA and SM2 sig algorithms using the CMH PKE core.
Supports P-256, P-384, P-521, and SM2 curves for sign and verify
operations.  SM2 is registered as verify-only via the crypto API;
full SM2 operations (encrypt, decrypt, key exchange) are available
through the /dev/cmh_mgmt ioctl interface.

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 drivers/crypto/cmh/Makefile        |   3 +-
 drivers/crypto/cmh/cmh_main.c      |   8 +
 drivers/crypto/cmh/cmh_pke_ecdsa.c | 575 +++++++++++++++++++++++++++++
 3 files changed, 585 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/cmh/cmh_pke_ecdsa.c

diff --git a/drivers/crypto/cmh/Makefile b/drivers/crypto/cmh/Makefile
index 7afd9852c337..fdbf66b13628 100644
--- a/drivers/crypto/cmh/Makefile
+++ b/drivers/crypto/cmh/Makefile
@@ -31,7 +31,8 @@ cmh-y := \
        cmh_ccp_poly.o \
        cmh_rng.o \
        cmh_pke_common.o \
-       cmh_pke_rsa.o
+       cmh_pke_rsa.o \
+       cmh_pke_ecdsa.o

 # Management ioctl device (/dev/cmh_mgmt): key lifecycle, PKE, PQC ioctls.
 cmh-$(CONFIG_CRYPTO_DEV_CMH_MGMT) += \
diff --git a/drivers/crypto/cmh/cmh_main.c b/drivers/crypto/cmh/cmh_main.c
index 8535453342d7..939ff5007755 100644
--- a/drivers/crypto/cmh/cmh_main.c
+++ b/drivers/crypto/cmh/cmh_main.c
@@ -281,6 +281,11 @@ static int cmh_probe(struct platform_device *pdev)
        if (ret)
                goto err_pke_rsa_register;

+       /* Register PKE ECDSA/SM2 sig */
+       ret = cmh_pke_ecdsa_register();
+       if (ret)
+               goto err_pke_ecdsa_register;
+
        /* Register key management device (/dev/cmh_mgmt) */
        ret = cmh_mgmt_register();
        if (ret)
@@ -293,6 +298,8 @@ static int cmh_probe(struct platform_device *pdev)
        return 0;

 err_mgmt_register:
+       cmh_pke_ecdsa_unregister();
+err_pke_ecdsa_register:
        cmh_pke_rsa_unregister();
 err_pke_rsa_register:
        cmh_ccp_poly_unregister();
@@ -351,6 +358,7 @@ static void cmh_remove(struct platform_device *pdev)
        cfg = &dev->config;

        cmh_mgmt_unregister();
+       cmh_pke_ecdsa_unregister();
        cmh_pke_rsa_unregister();
        cmh_ccp_poly_unregister();
        cmh_ccp_aead_unregister();
diff --git a/drivers/crypto/cmh/cmh_pke_ecdsa.c b/drivers/crypto/cmh/cmh_pke_ecdsa.c
new file mode 100644
index 000000000000..6b65f7fb72cc
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_pke_ecdsa.c
@@ -0,0 +1,575 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- ECDSA / SM2 Signature Driver (sig_alg, synchronous)
+ *
+ * Registers "ecdsa-nist-p256", "ecdsa-nist-p384", and "ecdsa-nist-p521"
+ * sig algorithms with sign, verify, set_pub_key, and set_priv_key callbacks.
+ * Registers "sm2" as verify-only (set_pub_key + verify); SM2 sign is
+ * provided via the cmh_mgmt ioctl path in cmh_pke_sm2.c.
+ *
+ * In-kernel consumers typically use verify-only (module signatures, IMA),
+ * but we provide sign as well for completeness -- matching the CMH eSW
+ * capability.
+ *
+ * Key format: Public key = raw 04 || X || Y (uncompressed).
+ * Signature format: struct ecdsa_raw_sig (two u64[ECC_MAX_DIGITS] arrays
+ * in VLI format -- native byte order, LE digit order) for both sign
+ * output and verify input.  This matches the kernel crypto sig API.
+ *
+ * Private key via cmh_key_ctx: raw keys written via SYS_REF_TEMP.
+ * Datastore-referenced keys are only reachable through the ioctl
+ * path (cmh_mgmt.c).
+ *
+ * SM2 note: The SM2 sig entry is verify-only (no sign/set_priv_key).
+ * SM2 signature verification requires the digest to be SM3(ZA || M)
+ * where ZA = SM3(ENTLA || IDA || a || b || xG || yG || xA || yA).
+ * The ZA identity pre-hash is the caller's responsibility; the driver
+ * passes the digest directly to the CMH eSW SM2 verify engine.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <crypto/sha2.h>
+#include <crypto/sig.h>
+#include <crypto/internal/sig.h>
+#include <crypto/internal/ecc.h>
+
+#include "cmh_pke.h"
+#include "cmh_sys.h"
+#include "cmh_sys_abi.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+#include "cmh_key.h"
+
+/*
+ * Number of ECC digits needed for a given coordinate byte length.
+ * P-256: 4, P-384: 6, P-521/SM2(clen=68): 9.
+ */
+static inline unsigned int clen_to_ndigits(u32 clen)
+{
+       return DIV_ROUND_UP(clen, sizeof(u64));
+}
+
+struct cmh_ecdsa_tfm_ctx {
+       struct cmh_key_ctx key;         /* private key (raw only) */
+       u8 *pub_key;                    /* uncompressed (x, y) without 04 prefix */
+       u32 pub_key_len;
+       u32 curve;                      /* PKE_CURVE_* */
+       u32 clen;                       /* coordinate length in bytes */
+};
+
+static inline struct cmh_ecdsa_tfm_ctx *cmh_ecdsa_ctx(struct crypto_sig *tfm)
+{
+       return crypto_sig_ctx(tfm);
+}
+
+/*
+ * Convert one VLI component (u64 array, LE digit order, native byte order)
+ * to big-endian byte array of @out_len bytes.  The VLI value is right-aligned
+ * in the output (leading zero bytes if ndigits*8 > out_len are discarded;
+ * leading zero padding added if ndigits*8 < out_len).
+ */
+static void ecdsa_vli_to_be(const u64 *vli, unsigned int ndigits,
+                           u8 *out, unsigned int out_len)
+{
+       unsigned int full_len = ndigits * sizeof(u64);
+       unsigned int i, skip;
+
+       memset(out, 0, out_len);
+
+       if (full_len <= out_len) {
+               /* VLI fits entirely -- write at right end of out */
+               u8 *dst = out + (out_len - full_len);
+
+               for (i = 0; i < ndigits; i++)
+                       put_unaligned_be64(vli[ndigits - 1 - i],
+                                          &dst[i * sizeof(u64)]);
+       } else {
+               /* VLI wider than out -- skip leading (zero) bytes */
+               u8 tmp[ECC_MAX_BYTES];
+
+               for (i = 0; i < ndigits; i++)
+                       put_unaligned_be64(vli[ndigits - 1 - i],
+                                          &tmp[i * sizeof(u64)]);
+               skip = full_len - out_len;
+               WARN_ON_ONCE(memchr_inv(tmp, 0, skip));
+               memcpy(out, tmp + skip, out_len);
+       }
+}
+
+/*
+ * Convert big-endian byte array to VLI (u64 array, LE digit order).
+ * Output is zero-filled to @max_digits entries.
+ */
+static void ecdsa_be_to_vli(const u8 *in, unsigned int in_len,
+                           u64 *vli, unsigned int max_digits)
+{
+       unsigned int full_len = max_digits * sizeof(u64);
+       u8 tmp[ECC_MAX_BYTES];
+       unsigned int i;
+
+       if (WARN_ON_ONCE(max_digits > ECC_MAX_DIGITS))
+               max_digits = ECC_MAX_DIGITS;
+
+       memset(tmp, 0, full_len);
+       if (in_len <= full_len)
+               memcpy(tmp + (full_len - in_len), in, in_len);
+       else
+               memcpy(tmp, in + (in_len - full_len), full_len);
+
+       for (i = 0; i < max_digits; i++) {
+               unsigned int off = (max_digits - 1 - i) * sizeof(u64);
+
+               vli[i] = get_unaligned_be64(&tmp[off]);
+       }
+}
+
+/*
+ * Extract raw (r || s) big-endian byte arrays from struct ecdsa_raw_sig.
+ * Each component is written as @clen bytes into @raw_rs.
+ */
+static int ecdsa_sig_to_raw(const void *src, unsigned int slen,
+                           u8 *raw_rs, u32 clen)
+{
+       const struct ecdsa_raw_sig *sig = src;
+       unsigned int ndigits = clen_to_ndigits(clen);
+
+       if (slen != sizeof(struct ecdsa_raw_sig))
+               return -EINVAL;
+
+       ecdsa_vli_to_be(sig->r, ndigits, raw_rs, clen);
+       ecdsa_vli_to_be(sig->s, ndigits, raw_rs + clen, clen);
+       return 0;
+}
+
+/*
+ * Encode raw (r || s) big-endian byte arrays into struct ecdsa_raw_sig.
+ * Returns sizeof(struct ecdsa_raw_sig) on success.
+ */
+static int ecdsa_raw_to_sig(const u8 *raw_rs, u32 clen,
+                           void *dst, unsigned int dlen)
+{
+       struct ecdsa_raw_sig *sig = dst;
+
+       if (dlen < sizeof(struct ecdsa_raw_sig))
+               return -ENOSPC;
+
+       memset(sig, 0, sizeof(*sig));
+       ecdsa_be_to_vli(raw_rs, clen, sig->r, ECC_MAX_DIGITS);
+       ecdsa_be_to_vli(raw_rs + clen, clen, sig->s, ECC_MAX_DIGITS);
+       return sizeof(struct ecdsa_raw_sig);
+}
+
+/*
+ * ECDSA verify (synchronous sig_alg)
+ *
+ * @src:    struct ecdsa_raw_sig (VLI format)
+ * @slen:   signature length (must be sizeof(struct ecdsa_raw_sig))
+ * @digest: hash digest
+ * @dlen:   digest length
+ *
+ * Returns 0 on successful verification, negative errno on failure.
+ */
+static int cmh_ecdsa_verify(struct crypto_sig *tfm,
+                           const void *src, unsigned int slen,
+                           const void *digest, unsigned int dlen)
+{
+       struct cmh_ecdsa_tfm_ctx *ctx = cmh_ecdsa_ctx(tfm);
+       u32 clen = ctx->clen;
+       u32 sig_raw_len = 2 * clen;
+       u32 copy_len = min_t(u32, dlen, clen);
+       struct core_dispatch d = cmh_core_select_instance(CMH_CORE_PKE);
+       struct vcq_cmd vcq[PKE_VCQ_CMDS_MIN];
+       u8 *sig_raw = NULL, *dig_buf = NULL, *pk_buf = NULL, *rp_buf = NULL;
+       dma_addr_t pk_dma, dig_dma, sig_dma, rp_dma;
+       int ret;
+
+       if (!ctx->pub_key)
+               return -EINVAL;
+
+       sig_raw = kzalloc(sig_raw_len, GFP_KERNEL);
+       dig_buf = kzalloc(clen, GFP_KERNEL);
+       pk_buf = kmemdup(ctx->pub_key, ctx->pub_key_len, GFP_KERNEL);
+       rp_buf = kzalloc(clen, GFP_KERNEL);
+       if (!sig_raw || !dig_buf || !pk_buf || !rp_buf) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
+
+       /* Extract raw (r, s) big-endian from VLI signature */
+       ret = ecdsa_sig_to_raw(src, slen, sig_raw, clen);
+       if (ret)
+               goto out_free;
+
+       /*
+        * Truncate or zero-pad digest to clen bytes, right-aligned.
+        * Matches ECDSA bits2int: use leftmost min(dlen, clen) bytes,
+        * zero-pad on the left when dlen < clen.
+        */
+       memcpy(dig_buf + (clen - copy_len), digest, copy_len);
+
+       pk_dma = cmh_dma_map_single(pk_buf, ctx->pub_key_len, DMA_TO_DEVICE);
+       dig_dma = cmh_dma_map_single(dig_buf, clen, DMA_TO_DEVICE);
+       sig_dma = cmh_dma_map_single(sig_raw, sig_raw_len, DMA_TO_DEVICE);
+       rp_dma = cmh_dma_map_single(rp_buf, clen, DMA_FROM_DEVICE);
+
+       if (cmh_dma_map_error(pk_dma) || cmh_dma_map_error(dig_dma) ||
+           cmh_dma_map_error(sig_dma) || cmh_dma_map_error(rp_dma)) {
+               ret = -ENOMEM;
+               goto out_unmap;
+       }
+
+       vcq_set_header(&vcq[0], PKE_VCQ_CMDS_MIN);
+       vcq_add_pke_ecdsa_verify(&vcq[1], d.core_id, ctx->curve, clen,
+                                pk_dma, dig_dma, sig_dma, rp_dma,
+                                pke_swap_flags(ctx->curve));
+       vcq_add_pke_flush(&vcq[2], d.core_id);
+
+       ret = cmh_tm_submit_sync_mbx(vcq, PKE_VCQ_CMDS_MIN, 1, d.mbx_idx);
+
+out_unmap:
+       if (!cmh_dma_map_error(rp_dma))
+               cmh_dma_unmap_single(rp_dma, clen, DMA_FROM_DEVICE);
+       if (!cmh_dma_map_error(sig_dma))
+               cmh_dma_unmap_single(sig_dma, sig_raw_len, DMA_TO_DEVICE);
+       if (!cmh_dma_map_error(dig_dma))
+               cmh_dma_unmap_single(dig_dma, clen, DMA_TO_DEVICE);
+       if (!cmh_dma_map_error(pk_dma))
+               cmh_dma_unmap_single(pk_dma, ctx->pub_key_len, DMA_TO_DEVICE);
+
+out_free:
+       kfree(rp_buf);
+       kfree(pk_buf);
+       kfree(sig_raw);
+       kfree(dig_buf);
+       return ret;
+}
+
+/*
+ * ECDSA sign (synchronous sig_alg)
+ *
+ * @src:  hash digest
+ * @slen: digest length
+ * @dst:  output buffer for struct ecdsa_raw_sig (VLI format)
+ * @dlen: output buffer length
+ *
+ * Returns sizeof(struct ecdsa_raw_sig) on success, negative errno on failure.
+ */
+static int cmh_ecdsa_sign(struct crypto_sig *tfm,
+                         const void *src, unsigned int slen,
+                         void *dst, unsigned int dlen)
+{
+       struct cmh_ecdsa_tfm_ctx *ctx = cmh_ecdsa_ctx(tfm);
+       u32 clen = ctx->clen;
+       u32 sig_raw_len = 2 * clen;
+       u32 copy_len = min_t(u32, slen, clen);
+       struct core_dispatch dd;
+       struct vcq_cmd vcq[PKE_VCQ_CMDS_MAX];
+       u8 *dig_buf = NULL, *sig_buf = NULL, *sk_buf = NULL;
+       dma_addr_t dig_dma, sig_dma, sk_dma;
+       int ret, idx;
+
+       if (ctx->key.mode != CMH_KEY_RAW)
+               return -EINVAL;
+       if (dlen < sizeof(struct ecdsa_raw_sig))
+               return -EINVAL;
+
+       dig_buf = kzalloc(clen, GFP_KERNEL);
+       sig_buf = kzalloc(sig_raw_len, GFP_KERNEL);
+       sk_buf = kmemdup(ctx->key.raw.data, ctx->key.raw.len, GFP_KERNEL);
+       if (!dig_buf || !sig_buf || !sk_buf) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
+
+       /*
+        * Truncate or zero-pad digest to clen bytes, right-aligned.
+        * Matches ECDSA bits2int: use leftmost min(slen, clen) bytes,
+        * zero-pad on the left when slen < clen.
+        */
+       memcpy(dig_buf + (clen - copy_len), src, copy_len);
+
+       dig_dma = cmh_dma_map_single(dig_buf, clen, DMA_TO_DEVICE);
+       sig_dma = cmh_dma_map_single(sig_buf, sig_raw_len, DMA_FROM_DEVICE);
+       sk_dma = cmh_dma_map_single(sk_buf, ctx->key.raw.len, DMA_TO_DEVICE);
+
+       if (cmh_dma_map_error(dig_dma) || cmh_dma_map_error(sig_dma) ||
+           cmh_dma_map_error(sk_dma)) {
+               ret = -ENOMEM;
+               goto out_unmap;
+       }
+
+       dd = cmh_core_select_instance(CMH_CORE_PKE);
+
+       idx = 1;
+       vcq_add_sys_write(&vcq[idx], SYS_REF_TEMP, sk_dma,
+                         SYS_REF_NONE, ctx->key.raw.len,
+                         ctx->key.raw.sys_type);
+       vcq[idx].id |= pke_swap_flags(ctx->curve);
+       idx++;
+       vcq_add_pke_ecdsa_sign(&vcq[idx++], dd.core_id, ctx->curve, clen,
+                              dig_dma, sig_dma, SYS_REF_TEMP,
+                              clen, pke_swap_flags(ctx->curve));
+       vcq_add_pke_flush(&vcq[idx++], dd.core_id);
+       vcq_set_header(&vcq[0], idx);
+
+       ret = cmh_tm_submit_sync_mbx(vcq, idx, 1, dd.mbx_idx);
+       if (!ret) {
+               /* Sync bounce buffer so CPU sees the DMA-written signature */
+               cmh_dma_sync_for_cpu(sig_dma, sig_raw_len, DMA_FROM_DEVICE);
+
+               /* Encode raw (r||s) into VLI ecdsa_raw_sig for kernel API */
+               ret = ecdsa_raw_to_sig(sig_buf, clen, dst, dlen);
+       }
+
+out_unmap:
+       if (!cmh_dma_map_error(sk_dma))
+               cmh_dma_unmap_single(sk_dma, ctx->key.raw.len, DMA_TO_DEVICE);
+       if (!cmh_dma_map_error(sig_dma))
+               cmh_dma_unmap_single(sig_dma, sig_raw_len, DMA_FROM_DEVICE);
+       if (!cmh_dma_map_error(dig_dma))
+               cmh_dma_unmap_single(dig_dma, clen, DMA_TO_DEVICE);
+
+out_free:
+       kfree_sensitive(sk_buf);
+       kfree(sig_buf);
+       kfree(dig_buf);
+       return ret;
+}
+
+static int cmh_ecdsa_set_pub_key(struct crypto_sig *tfm,
+                                const void *key, unsigned int keylen)
+{
+       struct cmh_ecdsa_tfm_ctx *ctx = cmh_ecdsa_ctx(tfm);
+       const u8 *d = key;
+       u32 clen = ctx->clen;
+       u32 raw_clen;
+
+       /* Accept 04 || X || Y (uncompressed point) */
+       if (keylen < 1 || d[0] != 0x04)
+               return -EINVAL;
+       d++;
+       keylen--;
+
+       if (keylen & 1)
+               return -EINVAL;
+       raw_clen = keylen / 2;
+
+       /*
+        * Kernel passes ceil(bits/8) per coordinate (e.g. 66 for P-521),
+        * but our HW ABI uses clen (ALIGN(66,4)=68 for P-521).
+        * Accept raw_clen <= clen and zero-pad on the left.
+        */
+       if (raw_clen > clen || raw_clen == 0)
+               return -EINVAL;
+
+       kfree(ctx->pub_key);
+       ctx->pub_key = NULL;
+       ctx->pub_key_len = 0;
+
+       ctx->pub_key = kzalloc(2 * clen, GFP_KERNEL);
+       if (!ctx->pub_key)
+               return -ENOMEM;
+
+       /* Right-align each coordinate to clen bytes */
+       memcpy(ctx->pub_key + (clen - raw_clen), d, raw_clen);
+       memcpy(ctx->pub_key + clen + (clen - raw_clen), d + raw_clen,
+              raw_clen);
+       ctx->pub_key_len = 2 * clen;
+       return 0;
+}
+
+static int cmh_ecdsa_set_priv_key(struct crypto_sig *tfm,
+                                 const void *key, unsigned int keylen)
+{
+       struct cmh_ecdsa_tfm_ctx *ctx = cmh_ecdsa_ctx(tfm);
+
+       if (keylen != ctx->clen)
+               return -EINVAL;
+
+       return cmh_key_setkey_raw(&ctx->key, key, keylen, CORE_ID_PKE);
+}
+
+static unsigned int cmh_ecdsa_key_size(struct crypto_sig *tfm)
+{
+       struct cmh_ecdsa_tfm_ctx *ctx = cmh_ecdsa_ctx(tfm);
+
+       /* crypto_sig_keysize() returns bits, not bytes */
+       return pke_curve_bits(ctx->curve);
+}
+
+static unsigned int cmh_ecdsa_max_size(struct crypto_sig *tfm)
+{
+       return sizeof(struct ecdsa_raw_sig);
+}
+
+static unsigned int cmh_ecdsa_digest_size(struct crypto_sig *tfm)
+{
+       /*
+        * Accept digests up to SHA-512 (64 bytes).  Digests longer
+        * than the curve order are truncated per ECDSA bits2int.
+        * Matches kernel ecdsa_digest_size().
+        */
+       return SHA512_DIGEST_SIZE;
+}
+
+static int cmh_ecdsa_p256_init(struct crypto_sig *tfm)
+{
+       struct cmh_ecdsa_tfm_ctx *ctx = cmh_ecdsa_ctx(tfm);
+
+       memset(ctx, 0, sizeof(*ctx));
+       ctx->curve = PKE_CURVE_P256;
+       ctx->clen = pke_curve_clen(PKE_CURVE_P256);
+       return 0;
+}
+
+static int cmh_ecdsa_p384_init(struct crypto_sig *tfm)
+{
+       struct cmh_ecdsa_tfm_ctx *ctx = cmh_ecdsa_ctx(tfm);
+
+       memset(ctx, 0, sizeof(*ctx));
+       ctx->curve = PKE_CURVE_P384;
+       ctx->clen = pke_curve_clen(PKE_CURVE_P384);
+       return 0;
+}
+
+static int cmh_ecdsa_p521_init(struct crypto_sig *tfm)
+{
+       struct cmh_ecdsa_tfm_ctx *ctx = cmh_ecdsa_ctx(tfm);
+
+       memset(ctx, 0, sizeof(*ctx));
+       ctx->curve = PKE_CURVE_P521;
+       ctx->clen = pke_curve_clen(PKE_CURVE_P521);
+       return 0;
+}
+
+static int cmh_sm2_init(struct crypto_sig *tfm)
+{
+       struct cmh_ecdsa_tfm_ctx *ctx = cmh_ecdsa_ctx(tfm);
+
+       memset(ctx, 0, sizeof(*ctx));
+       ctx->curve = PKE_CURVE_SM2;
+       ctx->clen = pke_curve_clen(PKE_CURVE_SM2);
+       return 0;
+}
+
+static void cmh_ecdsa_exit(struct crypto_sig *tfm)
+{
+       struct cmh_ecdsa_tfm_ctx *ctx = cmh_ecdsa_ctx(tfm);
+
+       cmh_key_destroy(&ctx->key);
+       kfree(ctx->pub_key);
+       ctx->pub_key = NULL;
+}
+
+static struct sig_alg cmh_ecdsa_algs[] = {
+       {
+               .sign           = cmh_ecdsa_sign,
+               .verify         = cmh_ecdsa_verify,
+               .set_pub_key    = cmh_ecdsa_set_pub_key,
+               .set_priv_key   = cmh_ecdsa_set_priv_key,
+               .key_size       = cmh_ecdsa_key_size,
+               .max_size       = cmh_ecdsa_max_size,
+               .digest_size    = cmh_ecdsa_digest_size,
+               .init           = cmh_ecdsa_p256_init,
+               .exit           = cmh_ecdsa_exit,
+               .base = {
+                       .cra_name         = "ecdsa-nist-p256",
+                       .cra_driver_name  = "cri-cmh-ecdsa-nist-p256",
+                       .cra_priority     = 300,
+                       .cra_module       = THIS_MODULE,
+                       .cra_ctxsize      = sizeof(struct cmh_ecdsa_tfm_ctx),
+               },
+       },
+       {
+               .sign           = cmh_ecdsa_sign,
+               .verify         = cmh_ecdsa_verify,
+               .set_pub_key    = cmh_ecdsa_set_pub_key,
+               .set_priv_key   = cmh_ecdsa_set_priv_key,
+               .key_size       = cmh_ecdsa_key_size,
+               .max_size       = cmh_ecdsa_max_size,
+               .digest_size    = cmh_ecdsa_digest_size,
+               .init           = cmh_ecdsa_p384_init,
+               .exit           = cmh_ecdsa_exit,
+               .base = {
+                       .cra_name         = "ecdsa-nist-p384",
+                       .cra_driver_name  = "cri-cmh-ecdsa-nist-p384",
+                       .cra_priority     = 300,
+                       .cra_module       = THIS_MODULE,
+                       .cra_ctxsize      = sizeof(struct cmh_ecdsa_tfm_ctx),
+               },
+       },
+       {
+               .sign           = cmh_ecdsa_sign,
+               .verify         = cmh_ecdsa_verify,
+               .set_pub_key    = cmh_ecdsa_set_pub_key,
+               .set_priv_key   = cmh_ecdsa_set_priv_key,
+               .key_size       = cmh_ecdsa_key_size,
+               .max_size       = cmh_ecdsa_max_size,
+               .digest_size    = cmh_ecdsa_digest_size,
+               .init           = cmh_ecdsa_p521_init,
+               .exit           = cmh_ecdsa_exit,
+               .base = {
+                       .cra_name         = "ecdsa-nist-p521",
+                       .cra_driver_name  = "cri-cmh-ecdsa-nist-p521",
+                       .cra_priority     = 300,
+                       .cra_module       = THIS_MODULE,
+                       .cra_ctxsize      = sizeof(struct cmh_ecdsa_tfm_ctx),
+               },
+       },
+       {
+               .verify         = cmh_ecdsa_verify,
+               .set_pub_key    = cmh_ecdsa_set_pub_key,
+               .key_size       = cmh_ecdsa_key_size,
+               .max_size       = cmh_ecdsa_max_size,
+               .digest_size    = cmh_ecdsa_digest_size,
+               .init           = cmh_sm2_init,
+               .exit           = cmh_ecdsa_exit,
+               .base = {
+                       .cra_name         = "sm2",
+                       .cra_driver_name  = "cri-cmh-sm2",
+                       .cra_priority     = 300,
+                       .cra_module       = THIS_MODULE,
+                       .cra_ctxsize      = sizeof(struct cmh_ecdsa_tfm_ctx),
+               },
+       },
+};
+
+/**
+ * cmh_pke_ecdsa_register() - Register ECDSA/SM2 sig algorithms with the crypto framework
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_pke_ecdsa_register(void)
+{
+       int ret, i;
+
+       for (i = 0; i < ARRAY_SIZE(cmh_ecdsa_algs); i++) {
+               ret = crypto_register_sig(&cmh_ecdsa_algs[i]);
+               if (ret) {
+                       dev_err(cmh_dev(), "cmh: failed to register %s (%d)\n",
+                               cmh_ecdsa_algs[i].base.cra_name, ret);
+                       goto err_unregister;
+               }
+       }
+
+       return 0;
+
+err_unregister:
+       while (i--)
+               crypto_unregister_sig(&cmh_ecdsa_algs[i]);
+       return ret;
+}
+
+/**
+ * cmh_pke_ecdsa_unregister() - Unregister ECDSA/SM2 sig algorithms from the crypto framework
+ */
+void cmh_pke_ecdsa_unregister(void)
+{
+       int i = ARRAY_SIZE(cmh_ecdsa_algs);
+
+       while (i--)
+               crypto_unregister_sig(&cmh_ecdsa_algs[i]);
+}
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 11/19] crypto: cmh - add DRBG hwrng
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Register the CMH DRBG core (core ID 0x0f) as an hwrng provider.
The hardware implements a NIST SP 800-90A compliant DRBG with
automatic self-seeding.

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 drivers/crypto/cmh/Makefile   |   3 +-
 drivers/crypto/cmh/cmh_main.c |   9 +
 drivers/crypto/cmh/cmh_rng.c  | 316 ++++++++++++++++++++++++++++++++++
 3 files changed, 327 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/cmh/cmh_rng.c

diff --git a/drivers/crypto/cmh/Makefile b/drivers/crypto/cmh/Makefile
index 4ebd0e1d10bc..1c4cb817424c 100644
--- a/drivers/crypto/cmh/Makefile
+++ b/drivers/crypto/cmh/Makefile
@@ -28,7 +28,8 @@ cmh-y := \
        cmh_sm4_cmac.o \
        cmh_ccp.o \
        cmh_ccp_aead.o \
-       cmh_ccp_poly.o
+       cmh_ccp_poly.o \
+       cmh_rng.o

 # Management ioctl device (/dev/cmh_mgmt): key lifecycle, PKE, PQC ioctls.
 cmh-$(CONFIG_CRYPTO_DEV_CMH_MGMT) += \
diff --git a/drivers/crypto/cmh/cmh_main.c b/drivers/crypto/cmh/cmh_main.c
index 79df27d43e7e..f31c50168e4a 100644
--- a/drivers/crypto/cmh/cmh_main.c
+++ b/drivers/crypto/cmh/cmh_main.c
@@ -34,6 +34,7 @@
 #include "cmh_cshake.h"
 #include "cmh_kmac.h"
 #include "cmh_sm3.h"
+#include "cmh_rng.h"
 #include "cmh_aes.h"
 #include "cmh_sm4.h"
 #include "cmh_ccp.h"
@@ -224,6 +225,11 @@ static int cmh_probe(struct platform_device *pdev)
        if (ret)
                goto err_sm3_register;

+       /* Register hwrng backed by DRBG core */
+       ret = cmh_rng_register(pdev);
+       if (ret)
+               goto err_rng_register;
+
        /* Register AES skcipher algorithms */
        ret = cmh_aes_register();
        if (ret)
@@ -299,6 +305,8 @@ static int cmh_probe(struct platform_device *pdev)
 err_aes_aead_register:
        cmh_aes_unregister();
 err_aes_register:
+       cmh_rng_unregister();
+err_rng_register:
        cmh_sm3_unregister();
 err_sm3_register:
        cmh_kmac_unregister();
@@ -344,6 +352,7 @@ static void cmh_remove(struct platform_device *pdev)
        cmh_aes_cmac_unregister();
        cmh_aes_aead_unregister();
        cmh_aes_unregister();
+       cmh_rng_unregister();
        cmh_sm3_unregister();
        cmh_kmac_unregister();
        cmh_cshake_unregister();
diff --git a/drivers/crypto/cmh/cmh_rng.c b/drivers/crypto/cmh/cmh_rng.c
new file mode 100644
index 000000000000..c9693f6cc360
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_rng.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Hardware RNG (DRBG) Driver
+ *
+ * Implements a Linux hwrng backed by the CMH DRBG core.  Each .read()
+ * builds a 3-entry VCQ (header + GENERATE + FLUSH) and submits it
+ * synchronously through the Transaction Manager.
+ *
+ * DRBG configuration (CONFIG) is a management-host operation in the
+ * CMH security model.  The driver's behaviour is controlled by the
+ * drbg_config setting (debug-only module parameter):
+ *
+ *   "auto" (default) -- attempt CONFIG at probe with the hardcoded
+ *     ratio/strength defaults.  Succeeds in stateless mode (any host may
+ *     CONFIG) or when this host is the management host in stateful
+ *     mode.  On -EPERM the driver logs a notice and continues --
+ *     GENERATE will work once the management host configures the DRBG.
+ *
+ *   "skip" -- do not issue CONFIG; assume an external management host
+ *     will configure the DRBG.  hwrng is still registered; .read()
+ *     returns -EAGAIN until GENERATE succeeds.
+ *
+ * The management host (or any privileged user-space process) can also
+ * reconfigure the DRBG at runtime via CMH_IOCTL_DRBG_CONFIG.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/hw_random.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+
+#include "cmh_rng.h"
+#include "cmh_vcq.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+#include "cmh_sys.h"
+#include "cmh_config.h"
+
+/* VCQ layout for .read(): header + GENERATE + FLUSH = 3 entries. */
+#define DRBG_READ_VCQ_CMDS     3
+
+/* VCQ layout for CONFIG: header + RESET + CONFIG + FLUSH = 4 entries. */
+#define DRBG_CONFIG_VCQ_CMDS   4
+
+/*
+ * Linux hwrng quality is expressed in bits of entropy per 1024 bits of
+ * input.  The kernel clamps to this maximum; mirror it here so our
+ * MODULE_PARM_DESC and clamp logic stay in sync.
+ */
+#define CMH_HWRNG_QUALITY_MAX  1024
+
+/* Module parameters */
+
+static int hwrng_quality;
+module_param(hwrng_quality, int, 0444);
+MODULE_PARM_DESC(hwrng_quality,
+                "hwrng quality (0=no CRNG seeding, 1-1024=enable; default: 0)");
+
+#ifdef CONFIG_CRYPTO_DEV_CMH_DEBUG
+static char *drbg_config = "auto";
+module_param(drbg_config, charp, 0444);
+MODULE_PARM_DESC(drbg_config,
+                "[debug] DRBG config at probe: \"auto\"=attempt CONFIG, \"skip\"=assume external (default: auto)");
+#else
+static const char *drbg_config = "auto";
+#endif
+
+/*
+ * DRBG parameters -- hardcoded to production defaults.
+ * Entropy ratio 0 = 1:1 (full entropy), security strength 0x10 = 256-bit.
+ */
+#define CMH_DRBG_ENTROPY_RATIO         0
+#define CMH_DRBG_SECURITY_STRENGTH     0x10
+
+static unsigned int drbg_timeout_ms = 500;
+
+/* VCQ Builders */
+
+static void vcq_add_drbg_generate(struct vcq_cmd *slot, u64 dst_phys, u32 len)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(CORE_ID_DRBG, 0, 1, DRBG_CMD_GENERATE);
+       slot->hwc.drbg.cmd_generate.dst = dst_phys;
+       slot->hwc.drbg.cmd_generate.len = len;
+}
+
+/*
+ * Maximum bytes per DRBG GENERATE request.  The kernel calls .read()
+ * repeatedly to fill larger requests, so capping here is safe.
+ * 32 bytes matches the 256-bit security strength natural output size.
+ */
+#define CMH_DRBG_MAX_GENERATE  32U
+
+/* hwrng .read() callback */
+
+static int cmh_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+       struct cmh_dma_orphan *orphan;
+       struct vcq_cmd vcq[DRBG_READ_VCQ_CMDS];
+       dma_addr_t dma_addr;
+       void *dmabuf;
+       size_t nbytes;
+       int ret;
+
+       if (max == 0)
+               return 0;
+
+       /*
+        * Our path uses GFP_KERNEL allocations and synchronous VCQ
+        * submission -- both may sleep.  When the caller indicates
+        * non-blocking context (!wait), return 0 ("no data yet") so
+        * the hwrng core retries later.
+        */
+       if (!wait)
+               return 0;
+
+       nbytes = min_t(size_t, max, CMH_DRBG_MAX_GENERATE);
+
+       orphan = kmalloc_obj(*orphan, GFP_KERNEL);
+       if (!orphan)
+               return -ENOMEM;
+
+       dmabuf = kmalloc(nbytes, GFP_KERNEL);
+       if (!dmabuf) {
+               kfree(orphan);
+               return -ENOMEM;
+       }
+
+       dma_addr = cmh_dma_map_single(dmabuf, nbytes, DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(dma_addr)) {
+               kfree(dmabuf);
+               kfree(orphan);
+               return -ENOMEM;
+       }
+
+       orphan->buf  = dmabuf;
+       orphan->addr = dma_addr;
+       orphan->len  = nbytes;
+       orphan->dir  = DMA_FROM_DEVICE;
+
+       vcq_set_header(&vcq[0], DRBG_READ_VCQ_CMDS);
+       vcq_add_drbg_generate(&vcq[1], dma_addr, nbytes);
+       vcq_add_flush(&vcq[2], CORE_ID_DRBG);
+
+       /*
+        * Use the noabort variant: if the MBX is occupied by a slow
+        * operation (e.g. SLH-DSA sign at 120 s), we must not issue
+        * MBX_COMMAND_ABORT -- that would kill the unrelated in-flight
+        * VCQ.  On timeout with an in-flight VCQ (-EINPROGRESS), the
+        * orphan callback defers DMA cleanup until the RH fires.
+        */
+       ret = cmh_tm_submit_sync_noabort(vcq, DRBG_READ_VCQ_CMDS, 1,
+                                        msecs_to_jiffies(drbg_timeout_ms),
+                                        cmh_dma_orphan_free, orphan);
+       if (ret == -EINPROGRESS) {
+               /* Orphan callback owns dmabuf -- will free on VCQ completion */
+               return -EAGAIN;
+       }
+
+       /* Normal path or cancelled-from-queue: caller owns DMA */
+       cmh_dma_unmap_single(dma_addr, nbytes, DMA_FROM_DEVICE);
+       kfree(orphan);
+
+       if (ret) {
+               /*
+                * Only translate known transient conditions to -EAGAIN
+                * so the hwrng subsystem retries later.  Propagate
+                * unexpected failures unchanged to avoid masking real
+                * faults and causing indefinite retry loops.
+                */
+               switch (ret) {
+               case -EAGAIN:
+               case -EBUSY:
+               case -ETIMEDOUT:
+               case -EIO:
+               /*
+                * -ENODEV: the TM is not running -- occurs when the
+                * hwrng kthread (PF_NOFREEZE, not frozen during
+                * suspend) calls .read() while the device is suspended.
+                * Treat as transient: the TM restarts on resume.
+                */
+               case -ENODEV:
+                       dev_dbg_ratelimited(cmh_dev(),
+                                           "rng: transient DRBG failure (rc=%d)\n",
+                                           ret);
+                       kfree(dmabuf);
+                       return -EAGAIN;
+               default:
+                       dev_err_ratelimited(cmh_dev(),
+                                           "rng: DRBG generate failed (rc=%d)\n",
+                                           ret);
+                       kfree(dmabuf);
+                       return ret;
+               }
+       }
+
+       memcpy(data, dmabuf, nbytes);
+       kfree(dmabuf);
+
+       return nbytes;
+}
+
+/* Registration */
+
+static bool cmh_rng_registered;
+
+static struct hwrng cmh_hwrng = {
+       .name = "cri-cmh-drbg",
+       .read = cmh_rng_read,
+};
+
+/**
+ * cmh_rng_register() - Register the CMH hardware RNG device
+ * @pdev: Platform device for the CMH accelerator
+ *
+ * Reads hwrng quality from device tree and module parameters, validates
+ * DRBG configuration, optionally sends a DRBG CONFIG VCQ to firmware,
+ * and registers the hwrng device with the kernel hwrng framework.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_rng_register(struct platform_device *pdev)
+{
+       int ret;
+
+       cmh_hwrng.quality = hwrng_quality;
+
+       if (cmh_hwrng.quality > CMH_HWRNG_QUALITY_MAX)
+               cmh_hwrng.quality = CMH_HWRNG_QUALITY_MAX;
+
+       /*
+        * DRBG CONFIG is a management-host operation.  In "auto" mode,
+        * attempt it -- this succeeds in stateless mode (any host) or
+        * when we are the management host in stateful mode.  On -EPERM
+        * (not management host) we continue without error -- GENERATE
+        * will work once the management host configures the DRBG.
+        *
+        * In "skip" mode, do not issue CONFIG -- assume the management
+        * host has already configured (or will configure) the DRBG.
+        */
+       if (strcmp(drbg_config, "skip") != 0) {
+               struct vcq_cmd cfg_vcq[DRBG_CONFIG_VCQ_CMDS];
+
+               if (strcmp(drbg_config, "auto") != 0)
+                       dev_warn(&pdev->dev,
+                                "rng: unrecognized drbg_config=\"%s\", treating as \"auto\"\n",
+                                drbg_config);
+
+               vcq_set_header(&cfg_vcq[0], DRBG_CONFIG_VCQ_CMDS);
+               vcq_add_drbg_reset(&cfg_vcq[1]);
+               vcq_add_drbg_config(&cfg_vcq[2], CMH_DRBG_ENTROPY_RATIO,
+                                   CMH_DRBG_SECURITY_STRENGTH);
+               vcq_add_flush(&cfg_vcq[3], CORE_ID_DRBG);
+               ret = cmh_tm_submit_sync(cfg_vcq, DRBG_CONFIG_VCQ_CMDS, 1);
+               if (ret == -EPERM)
+                       dev_notice(&pdev->dev,
+                                  "rng: DRBG config not permitted (not management host); assuming external configuration\n");
+               else if (ret)
+                       dev_warn(&pdev->dev,
+                                "rng: DRBG config failed (rc=%d)\n", ret);
+               else
+                       dev_info(&pdev->dev,
+                                "rng: DRBG configured (ratio=%u strength=0x%02x)\n",
+                                CMH_DRBG_ENTROPY_RATIO,
+                                CMH_DRBG_SECURITY_STRENGTH);
+       } else {
+               dev_info(&pdev->dev,
+                        "rng: DRBG config skipped (drbg_config=skip); assuming external configuration\n");
+       }
+
+       ret = hwrng_register(&cmh_hwrng);
+       if (ret) {
+               dev_err(&pdev->dev, "rng: hwrng_register failed (rc=%d)\n",
+                       ret);
+               return ret;
+       }
+
+       dev_info(&pdev->dev,
+                "rng: registered cri-cmh-drbg (quality=%d timeout=%ums)\n",
+                cmh_hwrng.quality, drbg_timeout_ms);
+
+       cmh_rng_registered = true;
+       return 0;
+}
+
+/**
+ * cmh_rng_unregister() - Unregister the CMH hardware RNG device
+ *
+ * Unregisters the hwrng device from the kernel hwrng framework if it
+ * was previously registered.
+ */
+void cmh_rng_unregister(void)
+{
+       if (!cmh_rng_registered)
+               return;
+       hwrng_unregister(&cmh_hwrng);
+       cmh_rng_registered = false;
+       dev_info(cmh_dev(), "rng: unregistered cri-cmh-drbg\n");
+}
+
+/* -- debugfs timeout accessor ------------------------------------------ */
+
+#ifdef CONFIG_CRYPTO_DEV_CMH_DEBUG
+/**
+ * cmh_rng_timeout_drbg_ptr() - Return pointer to drbg_timeout_ms for debugfs
+ *
+ * Exposes the DRBG operation timeout for runtime tuning via debugfs
+ * config/ directory.
+ *
+ * Return: pointer to the static drbg_timeout_ms variable.
+ */
+unsigned int *cmh_rng_timeout_drbg_ptr(void) { return &drbg_timeout_ms; }
+#endif
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 04/19] crypto: cmh - add SHA-2/SHA-3/SHAKE ahash
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Register ahash algorithms for SHA-224, SHA-256, SHA-384, SHA-512,
SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128, and SHAKE256
using the CMH hash core (core ID 0x02).

Supports incremental update/finup/final, init/export/import for
request cloning, and the CRYPTO_AHASH_REQ_VIRT flag for zero-copy
from virtual buffers.

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 drivers/crypto/cmh/Makefile           |   3 +-
 drivers/crypto/cmh/cmh_hash.c         | 860 ++++++++++++++++++++++++++
 drivers/crypto/cmh/cmh_main.c         |   9 +
 drivers/crypto/cmh/include/cmh_hash.h |  26 +
 4 files changed, 897 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/cmh/cmh_hash.c
 create mode 100644 drivers/crypto/cmh/include/cmh_hash.h

diff --git a/drivers/crypto/cmh/Makefile b/drivers/crypto/cmh/Makefile
index 1492e575598c..c0531f416229 100644
--- a/drivers/crypto/cmh/Makefile
+++ b/drivers/crypto/cmh/Makefile
@@ -14,7 +14,8 @@ cmh-y := \
        cmh_dma.o \
        cmh_sysfs.o \
        cmh_key.o \
-       cmh_sys.o
+       cmh_sys.o \
+       cmh_hash.o

 # Management ioctl device (/dev/cmh_mgmt): key lifecycle, PKE, PQC ioctls.
 cmh-$(CONFIG_CRYPTO_DEV_CMH_MGMT) += \
diff --git a/drivers/crypto/cmh/cmh_hash.c b/drivers/crypto/cmh/cmh_hash.c
new file mode 100644
index 000000000000..2256bf4314c3
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_hash.c
@@ -0,0 +1,860 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API Hash Driver
+ *
+ * Registers asynchronous hash (ahash) algorithms with the Linux crypto
+ * subsystem.  Implements SHA-2 (224/256/384/512), SHA-3
+ * (224/256/384/512), and SHAKE (128/256) families using the CMH Hash
+ * Core (HC).
+ *
+ * Incremental HW update model -- each .update() with enough data for
+ * at least one full block submits a self-contained VCQ transaction:
+ *
+ *   .init()   -> software-only: zero per-request context
+ *   .update() -> buffer data in holdback; when >= block_size bytes:
+ *                INIT [+ RESTORE] + UPDATE(full blocks) + SAVE + FLUSH
+ *                -> return -EINPROGRESS  (else return 0, data in holdback)
+ *   .final()  -> INIT [+ RESTORE] [+ UPDATE(residual)] + FINAL + FLUSH
+ *   .finup()  -> linearise holdback + new data, then final path
+ *   .digest() -> INIT + UPDATE + FINAL + FLUSH (single-shot, zero-copy)
+ *   .export() -> software-only: copy checkpoint + holdback to out
+ *   .import() -> software-only: restore checkpoint + holdback from in
+ *
+ * The FLUSH after each .update() releases the HC core, so no lockout.
+ * Two hash sessions interleave fine on the same MBX -- each saves its
+ * own state via SAVE and restores via RESTORE on the next call.
+ *
+ * Export/import is purely software (no HW interaction), enabling
+ * crypto API transform clone for all plain-hash algorithms.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "cmh_hash.h"
+#include "cmh_vcq.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+
+/* Algorithm Table */
+
+struct cmh_hash_alg_info {
+       u32         hc_algo;        /* HC_ALGO_* (SHA2, SHA3, SHAKE) */
+       u32         digest_size;    /* bytes */
+       u32         block_size;     /* cra_blocksize for Linux crypto API */
+       const char *alg_name;       /* Linux crypto name: "sha256" */
+       const char *drv_name;       /* driver name: "cri-cmh-sha256" */
+};
+
+static const struct cmh_hash_alg_info cmh_hash_algs_info[] = {
+       /* SHA-2 family */
+       {
+               .hc_algo     = HC_ALGO_SHA2_224,
+               .digest_size = CMH_SHA224_DIGEST_SIZE,
+               .block_size  = 64,
+               .alg_name    = "sha224",
+               .drv_name    = "cri-cmh-sha224",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA2_256,
+               .digest_size = CMH_SHA256_DIGEST_SIZE,
+               .block_size  = 64,
+               .alg_name    = "sha256",
+               .drv_name    = "cri-cmh-sha256",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA2_384,
+               .digest_size = CMH_SHA384_DIGEST_SIZE,
+               .block_size  = 128,
+               .alg_name    = "sha384",
+               .drv_name    = "cri-cmh-sha384",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA2_512,
+               .digest_size = CMH_SHA512_DIGEST_SIZE,
+               .block_size  = 128,
+               .alg_name    = "sha512",
+               .drv_name    = "cri-cmh-sha512",
+       },
+       /* SHA-3 family */
+       {
+               .hc_algo     = HC_ALGO_SHA3_224,
+               .digest_size = CMH_SHA3_224_DIGEST_SIZE,
+               .block_size  = 144,  /* rate = 1600/8 - 2*224/8 = 144 */
+               .alg_name    = "sha3-224",
+               .drv_name    = "cri-cmh-sha3-224",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA3_256,
+               .digest_size = CMH_SHA3_256_DIGEST_SIZE,
+               .block_size  = 136,  /* rate = 1600/8 - 2*256/8 = 136 */
+               .alg_name    = "sha3-256",
+               .drv_name    = "cri-cmh-sha3-256",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA3_384,
+               .digest_size = CMH_SHA3_384_DIGEST_SIZE,
+               .block_size  = 104,  /* rate = 1600/8 - 2*384/8 = 104 */
+               .alg_name    = "sha3-384",
+               .drv_name    = "cri-cmh-sha3-384",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHA3_512,
+               .digest_size = CMH_SHA3_512_DIGEST_SIZE,
+               .block_size  = 72,   /* rate = 1600/8 - 2*512/8 = 72 */
+               .alg_name    = "sha3-512",
+               .drv_name    = "cri-cmh-sha3-512",
+       },
+       /*
+        * SHAKE (XOF) family -- fixed-output ahash registration.
+        *
+        * cra_blocksize = 1: SHAKE is a sponge/XOF, not Merkle-Damgaard.
+        * The Keccak rate (168 for SHAKE-128, 136 for SHAKE-256) exceeds
+        * MAX_ALGAPI_BLOCKSIZE (160) on Linux <=6.7.  Using 1 signals
+        * "byte-oriented" which is correct for XOF consumers.  The kernel
+        * raised the limit to 208 in 6.8 (commit 2f3a22704889).
+        */
+       {
+               .hc_algo     = HC_ALGO_SHAKE128,
+               .digest_size = CMH_SHAKE128_DIGEST_SIZE,
+               .block_size  = 1,    /* XOF: no meaningful block for crypto API */
+               .alg_name    = "shake128",
+               .drv_name    = "cri-cmh-shake128",
+       },
+       {
+               .hc_algo     = HC_ALGO_SHAKE256,
+               .digest_size = CMH_SHAKE256_DIGEST_SIZE,
+               .block_size  = 1,    /* XOF: no meaningful block for crypto API */
+               .alg_name    = "shake256",
+               .drv_name    = "cri-cmh-shake256",
+       },
+};
+
+#define CMH_HASH_ALG_COUNT  ARRAY_SIZE(cmh_hash_algs_info)
+
+/* Per-Request State */
+
+/* Maximum cra_blocksize across all registered algorithms (SHA3-224) */
+#define CMH_HASH_MAX_BLOCK     144
+
+/*
+ * Exported hash state -- serialised by .export(), deserialised by
+ * .import().  This is what statesize advertises to the crypto subsystem.
+ */
+struct cmh_hash_export_state {
+       u8  checkpoint[HC_CONTEXT_SIZE]; /* HC context from last SAVE */
+       u8  buf[CMH_HASH_MAX_BLOCK];    /* holdback buffer */
+       u32 buf_len;                     /* valid bytes in buf[] */
+       u32 hw_started;                  /* non-zero if checkpoint valid */
+};
+
+/*
+ * Maximum payload commands any hash transaction can produce:
+ *   INIT + RESTORE + UPDATE + SAVE/FINAL + FLUSH = 5
+ * Worst-case packed output (stride=7, 1 payload per VCQ):
+ *   5 VCQs x 2 entries = 10
+ */
+#define CMH_HASH_MAX_PAYLOAD    5
+#define CMH_HASH_MAX_PACKED     (CMH_HASH_MAX_PAYLOAD * 2)
+
+/*
+ * Stored in ahash_request_ctx().  Tracks the algorithm, a holdback
+ * buffer for partial blocks, an HC context checkpoint from the last
+ * SAVE, and DMA state for the current in-flight async operation.
+ *
+ * The checkpoint is embedded inline rather than heap-allocated because
+ * the kernel ahash API has no per-request destructor.  If a request is
+ * abandoned without .final() (e.g. transform freed early), a heap
+ * checkpoint would leak unconditionally.
+ */
+struct cmh_hash_reqctx {
+       const struct cmh_hash_alg_info *info;
+       int    error;
+       u32    hw_started;      /* non-zero after first HW submission */
+       u32    buf_len;         /* bytes in holdback buf[] */
+       u32    has_checkpoint;  /* non-zero if checkpoint[] valid */
+       /* DMA state for current async operation */
+       dma_addr_t ckpt_dma;   /* RESTORE input */
+       dma_addr_t save_dma;   /* SAVE output (update only) */
+       dma_addr_t data_dma;   /* UPDATE input */
+       dma_addr_t digest_dma; /* FINAL output (final/digest only) */
+       u8    *save_buf;       /* SAVE output buffer */
+       u8    *data_buf;       /* linearised data for DMA */
+       u32    data_len;       /* bytes in data_buf */
+       u8    *digest_buf;     /* digest output buffer */
+       u8     buf[CMH_HASH_MAX_BLOCK]; /* holdback for partial block */
+       u8     checkpoint[HC_CONTEXT_SIZE]; /* HC context from last SAVE */
+       struct vcq_cmd packed[CMH_HASH_MAX_PACKED];
+};
+
+/* VCQ Builders (HC-specific; shared builders in cmh_hc_abi.h / cmh_vcq.h) */
+
+/* Add an HC_CMD_UPDATE entry */
+static void vcq_add_hc_update(struct vcq_cmd *slot, u32 core_id, u64 input_phys, u32 len)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, HC_CMD_UPDATE);
+       slot->hwc.hc.cmd_update.input = input_phys;
+       slot->hwc.hc.cmd_update.inlen = len;
+}
+
+/* Add an HC_CMD_SAVE entry */
+static void vcq_add_hc_save(struct vcq_cmd *slot, u32 core_id, u64 output_phys, u32 outlen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, HC_CMD_SAVE);
+       slot->hwc.hc.cmd_save.output = output_phys;
+       slot->hwc.hc.cmd_save.outlen = outlen;
+}
+
+/* Add an HC_CMD_RESTORE entry */
+static void vcq_add_hc_restore(struct vcq_cmd *slot, u32 core_id, u64 input_phys, u32 inlen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, HC_CMD_RESTORE);
+       slot->hwc.hc.cmd_restore.input = input_phys;
+       slot->hwc.hc.cmd_restore.inlen = inlen;
+}
+
+/* Request Context Cleanup */
+
+static void cmh_hash_free_reqctx(struct cmh_hash_reqctx *rctx)
+{
+       rctx->has_checkpoint = 0;
+}
+
+/* VCQ Packing + Submit */
+
+/* ahash Operations */
+
+/*
+ * Wrapper struct: embeds ahash_alg + a pointer to our alg_info table
+ * entry so we can recover it in the tfm callbacks.
+ */
+struct cmh_hash_alg_drv {
+       struct ahash_alg                 alg;
+       const struct cmh_hash_alg_info  *info;
+};
+
+/*
+ * Find the cmh_hash_alg_info from the crypto_ahash (embedded in our
+ * registered template).  We stash the info pointer in the algorithm's
+ * driver-private area at registration time (see cmh_hash_register).
+ */
+static const struct cmh_hash_alg_info *
+cmh_hash_get_info(struct crypto_ahash *tfm)
+{
+       struct ahash_alg *alg = crypto_ahash_alg(tfm);
+
+       return container_of(alg, struct cmh_hash_alg_drv, alg)->info;
+}
+
+static int cmh_hash_init(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_hash_reqctx *rctx = ahash_request_ctx(req);
+
+       memset(rctx, 0, sizeof(*rctx));
+       rctx->info = cmh_hash_get_info(tfm);
+       return 0;
+}
+
+/*
+ * Update completion -- called from threaded IRQ after SAVE completes.
+ * Takes ownership of save_buf as the new checkpoint.
+ */
+static void cmh_hash_update_complete(void *data, int error)
+{
+       struct ahash_request *req = data;
+       struct cmh_hash_reqctx *rctx = ahash_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       /* Unmap DMA buffers */
+       if (rctx->has_checkpoint)
+               cmh_dma_unmap_single(rctx->ckpt_dma, HC_CONTEXT_SIZE,
+                                    DMA_TO_DEVICE);
+       cmh_dma_unmap_single(rctx->save_dma, HC_CONTEXT_SIZE,
+                            DMA_FROM_DEVICE);
+       cmh_dma_unmap_single(rctx->data_dma, rctx->data_len,
+                            DMA_TO_DEVICE);
+
+       if (!error) {
+               memcpy(rctx->checkpoint, rctx->save_buf, HC_CONTEXT_SIZE);
+               rctx->has_checkpoint = 1;
+               kfree(rctx->save_buf);
+               rctx->save_buf = NULL;
+               rctx->hw_started = 1;
+       } else {
+               kfree(rctx->save_buf);
+               rctx->save_buf = NULL;
+               rctx->error = error;
+       }
+
+       kfree(rctx->data_buf);
+       rctx->data_buf = NULL;
+       rctx->data_len = 0;
+
+       cmh_complete(&req->base, error);
+}
+
+/*
+ * .update -- buffer incoming data, submit full blocks to HW.
+ *
+ * Maintains a partial-block holdback buffer in rctx->buf[].  When
+ * enough data is available for at least one full block, the full
+ * blocks are linearised and submitted as:
+ *   INIT [+ RESTORE] + UPDATE(full_blocks) + SAVE + FLUSH
+ *
+ * The tail (< block_size) stays in the holdback for the next call.
+ * Returns -EINPROGRESS on HW submission, 0 if only buffering.
+ */
+static int cmh_hash_update(struct ahash_request *req)
+{
+       struct cmh_hash_reqctx *rctx = ahash_request_ctx(req);
+       const struct cmh_hash_alg_info *info = rctx->info;
+       struct vcq_cmd cmds[CMH_HASH_MAX_PAYLOAD];
+       struct core_dispatch d;
+       u32 block_size = info->block_size;
+       u32 total_avail, full_len, tail_len, from_src;
+       u32 idx;
+       int ret;
+       gfp_t gfp;
+
+       if (rctx->error)
+               return rctx->error;
+
+       if (!req->nbytes)
+               return 0;
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       total_avail = rctx->buf_len + req->nbytes;
+
+       /* Not enough for a full block -- just buffer */
+       if (total_avail < block_size) {
+               if (req->base.flags & CRYPTO_AHASH_REQ_VIRT)
+                       memcpy(rctx->buf + rctx->buf_len,
+                              req->svirt, req->nbytes);
+               else
+                       scatterwalk_map_and_copy(rctx->buf + rctx->buf_len,
+                                                req->src, 0,
+                                                req->nbytes, 0);
+               rctx->buf_len = total_avail;
+               return 0;
+       }
+
+       /* Have at least one full block -- submit to HW */
+       full_len = total_avail - total_avail % block_size;
+       tail_len = total_avail - full_len;
+       from_src = full_len - rctx->buf_len;
+
+       /* Linearise: holdback prefix + full blocks from scatterlist */
+       rctx->data_buf = kmalloc(full_len, gfp);
+       if (!rctx->data_buf)
+               return -ENOMEM;
+
+       if (rctx->buf_len > 0)
+               memcpy(rctx->data_buf, rctx->buf, rctx->buf_len);
+
+       if (from_src > 0) {
+               if (req->base.flags & CRYPTO_AHASH_REQ_VIRT)
+                       memcpy(rctx->data_buf + rctx->buf_len,
+                              req->svirt, from_src);
+               else
+                       scatterwalk_map_and_copy(rctx->data_buf + rctx->buf_len,
+                                                req->src, 0,
+                                                from_src, 0);
+       }
+
+       /* Move tail to holdback */
+       if (tail_len > 0) {
+               if (req->base.flags & CRYPTO_AHASH_REQ_VIRT)
+                       memcpy(rctx->buf, req->svirt + from_src,
+                              tail_len);
+               else
+                       scatterwalk_map_and_copy(rctx->buf, req->src,
+                                                from_src, tail_len,
+                                                0);
+       }
+       rctx->buf_len = tail_len;
+       rctx->data_len = full_len;
+
+       /* Allocate SAVE output buffer */
+       rctx->save_buf = kzalloc(HC_CONTEXT_SIZE, gfp);
+       if (!rctx->save_buf) {
+               ret = -ENOMEM;
+               goto err_free;
+       }
+
+       /* DMA map data, save output, and checkpoint */
+       rctx->data_dma = cmh_dma_map_single(rctx->data_buf, full_len,
+                                           DMA_TO_DEVICE);
+       if (cmh_dma_map_error(rctx->data_dma)) {
+               ret = -ENOMEM;
+               goto err_free;
+       }
+
+       rctx->save_dma = cmh_dma_map_single(rctx->save_buf, HC_CONTEXT_SIZE,
+                                           DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(rctx->save_dma)) {
+               ret = -ENOMEM;
+               goto err_unmap_data;
+       }
+
+       rctx->ckpt_dma = DMA_MAPPING_ERROR;
+       if (rctx->has_checkpoint) {
+               rctx->ckpt_dma = cmh_dma_map_single(rctx->checkpoint,
+                                                   HC_CONTEXT_SIZE,
+                                                    DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->ckpt_dma)) {
+                       ret = -ENOMEM;
+                       goto err_unmap_save;
+               }
+       }
+
+       /* Build VCQ: INIT [+ RESTORE] + UPDATE + SAVE + FLUSH */
+       d = cmh_core_select_instance(CMH_CORE_HC);
+       idx = 0;
+
+       vcq_add_hc_init(&cmds[idx++], d.core_id, info->hc_algo);
+
+       if (rctx->has_checkpoint)
+               vcq_add_hc_restore(&cmds[idx++], d.core_id,
+                                  (u64)rctx->ckpt_dma, HC_CONTEXT_SIZE);
+
+       vcq_add_hc_update(&cmds[idx++], d.core_id,
+                         (u64)rctx->data_dma, full_len);
+
+       vcq_add_hc_save(&cmds[idx++], d.core_id,
+                       (u64)rctx->save_dma, HC_CONTEXT_SIZE);
+
+       vcq_add_flush(&cmds[idx++], d.core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_HASH_MAX_PACKED,
+                                           d.mbx_idx,
+                                           cmh_hash_update_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto err_unmap_ckpt;
+
+       return -EINPROGRESS;
+
+err_unmap_ckpt:
+       if (rctx->has_checkpoint)
+               cmh_dma_unmap_single(rctx->ckpt_dma, HC_CONTEXT_SIZE,
+                                    DMA_TO_DEVICE);
+err_unmap_save:
+       cmh_dma_unmap_single(rctx->save_dma, HC_CONTEXT_SIZE,
+                            DMA_FROM_DEVICE);
+err_unmap_data:
+       cmh_dma_unmap_single(rctx->data_dma, full_len, DMA_TO_DEVICE);
+err_free:
+       kfree(rctx->save_buf);
+       rctx->save_buf = NULL;
+       kfree(rctx->data_buf);
+       rctx->data_buf = NULL;
+       rctx->data_len = 0;
+       return ret;
+}
+
+/*
+ * Final completion -- unmap all DMA, copy digest, signal done.
+ */
+static void cmh_hash_final_complete(void *data, int error)
+{
+       struct ahash_request *req = data;
+       struct cmh_hash_reqctx *rctx = ahash_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       if (rctx->has_checkpoint)
+               cmh_dma_unmap_single(rctx->ckpt_dma, HC_CONTEXT_SIZE,
+                                    DMA_TO_DEVICE);
+       if (rctx->data_buf)
+               cmh_dma_unmap_single(rctx->data_dma, rctx->data_len,
+                                    DMA_TO_DEVICE);
+       cmh_dma_unmap_single(rctx->digest_dma, rctx->info->digest_size,
+                            DMA_FROM_DEVICE);
+
+       if (!error)
+               memcpy(req->result, rctx->digest_buf,
+                      rctx->info->digest_size);
+
+       kfree(rctx->digest_buf);
+       rctx->digest_buf = NULL;
+       kfree(rctx->data_buf);
+       rctx->data_buf = NULL;
+       cmh_hash_free_reqctx(rctx);
+       cmh_complete(&req->base, error);
+}
+
+/*
+ * Submit the final VCQ transaction:
+ *   INIT [+ RESTORE] [+ UPDATE(residual)] + FINAL + FLUSH
+ *
+ * @data_buf: linearised residual data, or NULL for empty-hash.
+ *            Ownership transferred -- callback frees it.
+ * @data_len: bytes in data_buf.
+ */
+static int cmh_hash_submit_final(struct ahash_request *req,
+                                u8 *data_buf, u32 data_len)
+{
+       struct cmh_hash_reqctx *rctx = ahash_request_ctx(req);
+       const struct cmh_hash_alg_info *info = rctx->info;
+       struct vcq_cmd cmds[CMH_HASH_MAX_PAYLOAD];
+       struct core_dispatch d;
+       u32 idx;
+       int ret;
+       gfp_t gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                  GFP_KERNEL : GFP_ATOMIC;
+
+       rctx->data_buf = data_buf;
+       rctx->data_len = data_len;
+
+       /* Allocate digest output buffer */
+       rctx->digest_buf = kzalloc(info->digest_size, gfp);
+       if (!rctx->digest_buf) {
+               ret = -ENOMEM;
+               goto err_free_data;
+       }
+
+       rctx->digest_dma = cmh_dma_map_single(rctx->digest_buf,
+                                             info->digest_size,
+                                              DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(rctx->digest_dma)) {
+               ret = -ENOMEM;
+               goto err_free_digest;
+       }
+
+       /* Map residual data for UPDATE */
+       rctx->data_dma = DMA_MAPPING_ERROR;
+       if (data_buf && data_len > 0) {
+               rctx->data_dma = cmh_dma_map_single(data_buf, data_len,
+                                                   DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->data_dma)) {
+                       ret = -ENOMEM;
+                       goto err_unmap_digest;
+               }
+       }
+
+       /* Map checkpoint for RESTORE */
+       rctx->ckpt_dma = DMA_MAPPING_ERROR;
+       if (rctx->has_checkpoint) {
+               rctx->ckpt_dma = cmh_dma_map_single(rctx->checkpoint,
+                                                   HC_CONTEXT_SIZE,
+                                                    DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->ckpt_dma)) {
+                       ret = -ENOMEM;
+                       goto err_unmap_data;
+               }
+       }
+
+       /* Build VCQ: INIT [+ RESTORE] [+ UPDATE] + FINAL + FLUSH */
+       d = cmh_core_select_instance(CMH_CORE_HC);
+       idx = 0;
+
+       vcq_add_hc_init(&cmds[idx++], d.core_id, info->hc_algo);
+
+       if (rctx->has_checkpoint)
+               vcq_add_hc_restore(&cmds[idx++], d.core_id,
+                                  (u64)rctx->ckpt_dma, HC_CONTEXT_SIZE);
+
+       if (data_buf && data_len > 0)
+               vcq_add_hc_update(&cmds[idx++], d.core_id,
+                                 (u64)rctx->data_dma, data_len);
+
+       vcq_add_hc_final(&cmds[idx++], d.core_id,
+                        (u64)rctx->digest_dma, info->digest_size);
+
+       vcq_add_flush(&cmds[idx++], d.core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_HASH_MAX_PACKED,
+                                           d.mbx_idx,
+                                           cmh_hash_final_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto err_unmap_ckpt;
+
+       return -EINPROGRESS;
+
+err_unmap_ckpt:
+       if (rctx->has_checkpoint)
+               cmh_dma_unmap_single(rctx->ckpt_dma, HC_CONTEXT_SIZE,
+                                    DMA_TO_DEVICE);
+err_unmap_data:
+       if (data_buf && data_len > 0)
+               cmh_dma_unmap_single(rctx->data_dma, data_len,
+                                    DMA_TO_DEVICE);
+err_unmap_digest:
+       cmh_dma_unmap_single(rctx->digest_dma, info->digest_size,
+                            DMA_FROM_DEVICE);
+err_free_digest:
+       kfree(rctx->digest_buf);
+       rctx->digest_buf = NULL;
+err_free_data:
+       kfree(data_buf);
+       rctx->data_buf = NULL;
+       cmh_hash_free_reqctx(rctx);
+       return ret;
+}
+
+static int cmh_hash_final(struct ahash_request *req)
+{
+       struct cmh_hash_reqctx *rctx = ahash_request_ctx(req);
+       u8 *data_buf = NULL;
+       u32 data_len = 0;
+       gfp_t gfp;
+
+       if (rctx->error)
+               return rctx->error;
+
+       if (rctx->buf_len > 0) {
+               gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                     GFP_KERNEL : GFP_ATOMIC;
+               data_buf = kmalloc(rctx->buf_len, gfp);
+               if (!data_buf)
+                       return -ENOMEM;
+               memcpy(data_buf, rctx->buf, rctx->buf_len);
+               data_len = rctx->buf_len;
+               rctx->buf_len = 0;
+       }
+
+       return cmh_hash_submit_final(req, data_buf, data_len);
+}
+
+static int cmh_hash_finup(struct ahash_request *req);
+
+/*
+ * One-shot digest -- delegates to init + finup so that all data is
+ * linearised and mapped through cmh_dma_map_single(), which is the
+ * only DMA mapping path aware of all supported DMA backends.
+ */
+static int cmh_hash_digest(struct ahash_request *req)
+{
+       int ret;
+
+       ret = cmh_hash_init(req);
+       if (ret)
+               return ret;
+       return cmh_hash_finup(req);
+}
+
+/*
+ * .finup -- update + final combined into a single transaction.
+ *
+ * Linearises the holdback buffer + new data and submits everything
+ * through the final path.  Avoids the kernel's ahash_def_finup()
+ * which would allocate a subrequest and clone via export/import.
+ */
+static int cmh_hash_finup(struct ahash_request *req)
+{
+       struct cmh_hash_reqctx *rctx = ahash_request_ctx(req);
+       u32 data_len;
+       u8 *data_buf;
+       gfp_t gfp;
+
+       if (rctx->error)
+               return rctx->error;
+
+       data_len = rctx->buf_len + req->nbytes;
+
+       if (data_len == 0)
+               return cmh_hash_submit_final(req, NULL, 0);
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       data_buf = kmalloc(data_len, gfp);
+       if (!data_buf)
+               return -ENOMEM;
+
+       if (rctx->buf_len > 0)
+               memcpy(data_buf, rctx->buf, rctx->buf_len);
+
+       if (req->nbytes > 0) {
+               if (req->base.flags & CRYPTO_AHASH_REQ_VIRT)
+                       memcpy(data_buf + rctx->buf_len,
+                              req->svirt, req->nbytes);
+               else
+                       scatterwalk_map_and_copy(data_buf + rctx->buf_len,
+                                                req->src, 0,
+                                                req->nbytes, 0);
+       }
+
+       rctx->buf_len = 0;
+       return cmh_hash_submit_final(req, data_buf, data_len);
+}
+
+/*
+ * Export -- purely software.
+ *
+ * Serialise the HC checkpoint (if any) and holdback buffer into the
+ * export state structure.  No HW interaction needed because the
+ * incremental model keeps checkpoint up-to-date after each .update().
+ */
+static int cmh_hash_export(struct ahash_request *req, void *out)
+{
+       struct cmh_hash_reqctx *rctx = ahash_request_ctx(req);
+       struct cmh_hash_export_state *state = out;
+
+       if (rctx->hw_started && rctx->has_checkpoint)
+               memcpy(state->checkpoint, rctx->checkpoint, HC_CONTEXT_SIZE);
+       else
+               memset(state->checkpoint, 0, HC_CONTEXT_SIZE);
+
+       if (rctx->buf_len > 0)
+               memcpy(state->buf, rctx->buf, rctx->buf_len);
+
+       state->buf_len = rctx->buf_len;
+       state->hw_started = rctx->hw_started;
+
+       return 0;
+}
+
+/*
+ * Import -- purely software.
+ *
+ * Restore checkpoint and holdback from a previously exported state.
+ * The next .update() or .final() will RESTORE the checkpoint into HW.
+ */
+static int cmh_hash_import(struct ahash_request *req, const void *in)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_hash_reqctx *rctx = ahash_request_ctx(req);
+       const struct cmh_hash_export_state *state = in;
+
+       memset(rctx, 0, sizeof(*rctx));
+       rctx->info = cmh_hash_get_info(tfm);
+
+       if (state->buf_len > CMH_HASH_MAX_BLOCK)
+               return -EINVAL;
+
+       rctx->hw_started = state->hw_started;
+       rctx->buf_len = state->buf_len;
+       memcpy(rctx->buf, state->buf, state->buf_len);
+
+       if (state->hw_started) {
+               memcpy(rctx->checkpoint, state->checkpoint, HC_CONTEXT_SIZE);
+               rctx->has_checkpoint = 1;
+       }
+
+       return 0;
+}
+
+/* Transform init (cra_init) -- set per-request context size */
+
+static int cmh_hash_cra_init(struct crypto_tfm *tfm)
+{
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct cmh_hash_reqctx));
+       return 0;
+}
+
+/* Registration */
+
+static struct cmh_hash_alg_drv cmh_hash_drvs[CMH_HASH_ALG_COUNT];
+
+/**
+ * cmh_hash_register() - Register SHA-256/384/512/3-256/3-384/3-512 hash algorithms
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_hash_register(void)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < CMH_HASH_ALG_COUNT; i++) {
+               const struct cmh_hash_alg_info *info = &cmh_hash_algs_info[i];
+               struct cmh_hash_alg_drv *drv = &cmh_hash_drvs[i];
+               struct ahash_alg *alg = &drv->alg;
+
+               drv->info = info;
+
+               alg->init   = cmh_hash_init;
+               alg->update = cmh_hash_update;
+               alg->final  = cmh_hash_final;
+               alg->finup  = cmh_hash_finup;
+               alg->digest = cmh_hash_digest;
+               alg->export = cmh_hash_export;
+               alg->import = cmh_hash_import;
+
+               alg->halg.digestsize = info->digest_size;
+               alg->halg.statesize  = sizeof(struct cmh_hash_export_state);
+
+               strscpy(alg->halg.base.cra_name, info->alg_name,
+                       CRYPTO_MAX_ALG_NAME);
+               strscpy(alg->halg.base.cra_driver_name, info->drv_name,
+                       CRYPTO_MAX_ALG_NAME);
+               alg->halg.base.cra_priority    = 300;
+               alg->halg.base.cra_flags       = CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                                CRYPTO_ALG_NO_FALLBACK |
+                                                CRYPTO_ALG_ASYNC |
+                                                CRYPTO_ALG_REQ_VIRT;
+               alg->halg.base.cra_blocksize   = info->block_size;
+               alg->halg.base.cra_ctxsize     = 0;
+               alg->halg.base.cra_init        = cmh_hash_cra_init;
+               alg->halg.base.cra_module      = THIS_MODULE;
+
+               ret = crypto_register_ahash(alg);
+               if (ret) {
+                       dev_err(cmh_dev(), "hash: failed to register %s (rc=%d)\n",
+                               info->drv_name, ret);
+                       /* Unregister any already-registered algorithms */
+                       while (i--)
+                               crypto_unregister_ahash(&cmh_hash_drvs[i].alg);
+                       return ret;
+               }
+
+               dev_dbg(cmh_dev(), "hash: registered %s (priority 300)\n",
+                       info->drv_name);
+       }
+
+       dev_info(cmh_dev(), "hash: %zu algorithm(s) registered\n",
+                CMH_HASH_ALG_COUNT);
+       return 0;
+}
+
+/**
+ * cmh_hash_unregister() - Unregister SHA hash algorithms from the crypto framework
+ */
+void cmh_hash_unregister(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < CMH_HASH_ALG_COUNT; i++) {
+               crypto_unregister_ahash(&cmh_hash_drvs[i].alg);
+               dev_dbg(cmh_dev(), "hash: unregistered %s\n",
+                       cmh_hash_algs_info[i].drv_name);
+       }
+
+       dev_info(cmh_dev(), "hash: cleaned up\n");
+}
diff --git a/drivers/crypto/cmh/cmh_main.c b/drivers/crypto/cmh/cmh_main.c
index 307bd7dd304b..e8e30b893932 100644
--- a/drivers/crypto/cmh/cmh_main.c
+++ b/drivers/crypto/cmh/cmh_main.c
@@ -29,6 +29,7 @@
 #include "cmh_mqi.h"
 #include "cmh_txn.h"
 #include "cmh_rh.h"
+#include "cmh_hash.h"
 #include "cmh_mgmt.h"
 #include "cmh_registers.h"
 #include "cmh_debugfs.h"
@@ -191,6 +192,11 @@ static int cmh_probe(struct platform_device *pdev)
        if (ret)
                goto err_rh_init;

+       /* Register hash algorithms with the kernel crypto API */
+       ret = cmh_hash_register();
+       if (ret)
+               goto err_hash_register;
+
        /* Register key management device (/dev/cmh_mgmt) */
        ret = cmh_mgmt_register();
        if (ret)
@@ -203,6 +209,8 @@ static int cmh_probe(struct platform_device *pdev)
        return 0;

 err_mgmt_register:
+       cmh_hash_unregister();
+err_hash_register:
        cmh_rh_cleanup(cfg);
 err_rh_init:
        cmh_tm_cleanup();
@@ -229,6 +237,7 @@ static void cmh_remove(struct platform_device *pdev)
        cfg = &dev->config;

        cmh_mgmt_unregister();
+       cmh_hash_unregister();
        cmh_rh_cleanup(cfg);
        cmh_tm_cleanup();
        cmh_mqi_cleanup(cfg);
diff --git a/drivers/crypto/cmh/include/cmh_hash.h b/drivers/crypto/cmh/include/cmh_hash.h
new file mode 100644
index 000000000000..bf17d3af7787
--- /dev/null
+++ b/drivers/crypto/cmh/include/cmh_hash.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API Hash Driver
+ *
+ * Registers ahash algorithms (SHA-2, SHA-3, and SHAKE families) with the
+ * Linux crypto subsystem.  Uses an incremental HW update model:
+ *
+ *   .init()   -> software-only: zero per-request context
+ *   .update() -> holdback partial blocks; submit full blocks via
+ *                INIT [+ RESTORE] + UPDATE + SAVE + FLUSH
+ *   .final()  -> INIT [+ RESTORE] [+ UPDATE(residual)] + FINAL + FLUSH
+ *   .digest() -> INIT + UPDATE + FINAL + FLUSH (single-shot)
+ *   .export() -> software-only: copy checkpoint + holdback
+ *   .import() -> software-only: restore checkpoint + holdback
+ */
+
+#ifndef CMH_HASH_H
+#define CMH_HASH_H
+
+#include "cmh_config.h"
+
+int  cmh_hash_register(void);
+void cmh_hash_unregister(void);
+
+#endif /* CMH_HASH_H */
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 18/19] selftests: crypto: cmh - add kselftest for management ioctl
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Add a minimal kselftest exercising the /dev/cmh_mgmt ioctl interface:

  - open/close the device node
  - invalid ioctl returns -ENOTTY
  - bad version field returns -EINVAL
  - KEY_NEW + KEY_DELETE lifecycle
  - KIC HKDF1 key derivation
  - ML-KEM-768 keygen via hardware RNG

Tests use the kselftest_harness.h fixture framework and output TAP.
Tests that require hardware features not present on the device under
test are gracefully skipped (SKIP).

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 .../selftests/drivers/crypto/cmh/Makefile     |   6 +
 .../drivers/crypto/cmh/cmh_mgmt_test.c        | 183 ++++++++++++++++++
 .../selftests/drivers/crypto/cmh/config       |   1 +
 3 files changed, 190 insertions(+)
 create mode 100644 tools/testing/selftests/drivers/crypto/cmh/Makefile
 create mode 100644 tools/testing/selftests/drivers/crypto/cmh/cmh_mgmt_test.c
 create mode 100644 tools/testing/selftests/drivers/crypto/cmh/config

diff --git a/tools/testing/selftests/drivers/crypto/cmh/Makefile b/tools/testing/selftests/drivers/crypto/cmh/Makefile
new file mode 100644
index 000000000000..86cb63839b27
--- /dev/null
+++ b/tools/testing/selftests/drivers/crypto/cmh/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := cmh_mgmt_test
+
+CFLAGS += -Wall -Wno-misleading-indentation -O2 $(KHDR_INCLUDES)
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/crypto/cmh/cmh_mgmt_test.c b/tools/testing/selftests/drivers/crypto/cmh/cmh_mgmt_test.c
new file mode 100644
index 000000000000..4514b5a1349a
--- /dev/null
+++ b/tools/testing/selftests/drivers/crypto/cmh/cmh_mgmt_test.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Kselftest for /dev/cmh_mgmt ioctl interface.
+ *
+ * Tests basic ioctl operations on the CRI CryptoManager Hub management
+ * device.  Requires the cmh module loaded on real or emulated hardware.
+ *
+ * Run:  ./cmh_mgmt_test
+ * Output: TAP format (compatible with kselftest harness)
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+#include "kselftest_harness.h"
+#include <linux/cmh_mgmt_ioctl.h>
+
+#define CMH_DEV "/dev/cmh_mgmt"
+
+FIXTURE(cmh_mgmt)
+{
+       int fd;
+};
+
+FIXTURE_SETUP(cmh_mgmt)
+{
+       self->fd = open(CMH_DEV, O_RDWR);
+       if (self->fd < 0 && errno == ENOENT)
+               SKIP(return, "Device " CMH_DEV " not present (module not loaded?)");
+       if (self->fd < 0 && errno == EACCES)
+               SKIP(return, "Permission denied -- run as root or with CAP_SYS_ADMIN");
+       ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_TEARDOWN(cmh_mgmt)
+{
+       if (self->fd >= 0)
+               close(self->fd);
+}
+
+/*
+ * Test 1: open and close succeed.
+ * If we get here, FIXTURE_SETUP already validated the open.
+ */
+TEST_F(cmh_mgmt, open_close)
+{
+       ASSERT_GE(self->fd, 0);
+}
+
+/*
+ * Test 2: invalid ioctl number returns -ENOTTY.
+ */
+TEST_F(cmh_mgmt, invalid_ioctl)
+{
+       int ret;
+       unsigned long bogus_cmd = _IOC(_IOC_READ, 'J', 0xFF, 4);
+
+       ret = ioctl(self->fd, bogus_cmd, NULL);
+       ASSERT_EQ(ret, -1);
+       ASSERT_EQ(errno, ENOTTY);
+}
+
+/*
+ * Test 3: KEY_NEW with bad version field returns -EINVAL.
+ */
+TEST_F(cmh_mgmt, bad_version)
+{
+       struct cmh_ioctl_key_new req;
+       int ret;
+
+       memset(&req, 0, sizeof(req));
+       req.version = 0; /* invalid */
+       req.ds_type = CMH_DS_AES_KEY;
+       req.len = 32;
+       req.flags = CMH_FLAG_PT;
+       req.cid = 0xDEAD;
+
+       ret = ioctl(self->fd, CMH_IOCTL_KEY_NEW, &req);
+       ASSERT_EQ(ret, -1);
+       ASSERT_EQ(errno, EINVAL);
+}
+
+/*
+ * Test 4: KEY_NEW creates a key, KEY_DELETE destroys it.
+ */
+TEST_F(cmh_mgmt, key_new_delete)
+{
+       struct cmh_ioctl_key_new new_req;
+       struct cmh_ioctl_key_grant del_req;
+       int ret;
+
+       memset(&new_req, 0, sizeof(new_req));
+       new_req.version = CMH_MGMT_V1;
+       new_req.ds_type = CMH_DS_AES_KEY;
+       new_req.len = 32;
+       new_req.flags = CMH_FLAG_PT;
+       new_req.cid = 0x5E1F7E57ULL; /* "SELFTEST" */
+
+       ret = ioctl(self->fd, CMH_IOCTL_KEY_NEW, &new_req);
+       ASSERT_EQ(ret, 0);
+       ASSERT_NE(new_req.ref, (uint64_t)0);
+
+       /* Delete the key */
+       memset(&del_req, 0, sizeof(del_req));
+       del_req.version = CMH_MGMT_V1;
+       del_req.ref = new_req.ref;
+
+       ret = ioctl(self->fd, CMH_IOCTL_KEY_DELETE, &del_req);
+       ASSERT_EQ(ret, 0);
+}
+
+/*
+ * Test 5: KIC HKDF1 key derivation from hardware base key.
+ * Requires at least one KIC base key provisioned (KIC_KEY1).
+ */
+TEST_F(cmh_mgmt, kic_hkdf1)
+{
+       struct cmh_ioctl_kic_hkdf1 req;
+       static const char label[] = "kselftest-label";
+       int ret;
+
+       memset(&req, 0, sizeof(req));
+       req.version = CMH_MGMT_V1;
+       req.key_len = 32;
+       req.base_key = CMH_KIC_KEY1;
+       req.cid = 0x4B534C46ULL; /* "KSLF" */
+       req.label = (uint64_t)(uintptr_t)label;
+       req.label_len = sizeof(label) - 1;
+       req.flags = CMH_KIC_FLAG_TEMP;
+
+       ret = ioctl(self->fd, CMH_IOCTL_KIC_HKDF1, &req);
+       if (ret < 0 && errno == EIO)
+               SKIP(return, "KIC base key 1 not provisioned on this device");
+       ASSERT_EQ(ret, 0);
+       ASSERT_NE(req.ref, (uint64_t)0);
+}
+
+/*
+ * Test 6: ML-KEM-768 keygen using hardware RNG.
+ * Verifies the PQC keygen path end-to-end.
+ */
+TEST_F(cmh_mgmt, ml_kem_keygen)
+{
+       struct cmh_ioctl_ml_kem_keygen req;
+       /* ML-KEM-768: ek = 384*3+32 = 1184, dk = 768*3+96 = 2400 */
+       uint8_t ek[1184];
+       uint8_t dk[2400];
+       int ret;
+
+       memset(&req, 0, sizeof(req));
+       req.version = CMH_MGMT_V1;
+       req.k = 3; /* ML-KEM-768 */
+       req.flags = CMH_QSE_FLAG_HW_RNG;
+       req.seed = 0; /* HW RNG */
+       req.z = 0;    /* HW RNG */
+       req.ek = (uint64_t)(uintptr_t)ek;
+       req.dk = (uint64_t)(uintptr_t)dk;
+       req.dk_cid = 0;
+       req.dk_ref = 0;
+
+       memset(ek, 0, sizeof(ek));
+       memset(dk, 0, sizeof(dk));
+
+       ret = ioctl(self->fd, CMH_IOCTL_ML_KEM_KEYGEN, &req);
+       if (ret < 0 && errno == ENODEV)
+               SKIP(return, "QSE core not available on this hardware");
+       ASSERT_EQ(ret, 0);
+
+       /* Verify output is non-zero (extremely unlikely for random keys) */
+       {
+               int i, nonzero = 0;
+
+               for (i = 0; i < 64; i++)
+                       nonzero += (ek[i] != 0);
+               ASSERT_GT(nonzero, 0);
+       }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/drivers/crypto/cmh/config b/tools/testing/selftests/drivers/crypto/cmh/config
new file mode 100644
index 000000000000..063c1dd0e23b
--- /dev/null
+++ b/tools/testing/selftests/drivers/crypto/cmh/config
@@ -0,0 +1 @@
+CONFIG_CRYPTO_DEV_CMH=m
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 14/19] crypto: cmh - add ECDH/X25519 kpp
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Register ECDH and X25519 kpp algorithms using the CMH PKE core.
Supports P-256, P-384, and Curve25519 for key agreement.

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 drivers/crypto/cmh/Makefile       |   3 +-
 drivers/crypto/cmh/cmh_main.c     |   8 +
 drivers/crypto/cmh/cmh_pke_ecdh.c | 698 ++++++++++++++++++++++++++++++
 3 files changed, 708 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/cmh/cmh_pke_ecdh.c

diff --git a/drivers/crypto/cmh/Makefile b/drivers/crypto/cmh/Makefile
index fdbf66b13628..a4cea0a56fc1 100644
--- a/drivers/crypto/cmh/Makefile
+++ b/drivers/crypto/cmh/Makefile
@@ -32,7 +32,8 @@ cmh-y := \
        cmh_rng.o \
        cmh_pke_common.o \
        cmh_pke_rsa.o \
-       cmh_pke_ecdsa.o
+       cmh_pke_ecdsa.o \
+       cmh_pke_ecdh.o

 # Management ioctl device (/dev/cmh_mgmt): key lifecycle, PKE, PQC ioctls.
 cmh-$(CONFIG_CRYPTO_DEV_CMH_MGMT) += \
diff --git a/drivers/crypto/cmh/cmh_main.c b/drivers/crypto/cmh/cmh_main.c
index 939ff5007755..ea0f32b941f5 100644
--- a/drivers/crypto/cmh/cmh_main.c
+++ b/drivers/crypto/cmh/cmh_main.c
@@ -286,6 +286,11 @@ static int cmh_probe(struct platform_device *pdev)
        if (ret)
                goto err_pke_ecdsa_register;

+       /* Register PKE ECDH/X25519 kpp */
+       ret = cmh_pke_ecdh_register();
+       if (ret)
+               goto err_pke_ecdh_register;
+
        /* Register key management device (/dev/cmh_mgmt) */
        ret = cmh_mgmt_register();
        if (ret)
@@ -298,6 +303,8 @@ static int cmh_probe(struct platform_device *pdev)
        return 0;

 err_mgmt_register:
+       cmh_pke_ecdh_unregister();
+err_pke_ecdh_register:
        cmh_pke_ecdsa_unregister();
 err_pke_ecdsa_register:
        cmh_pke_rsa_unregister();
@@ -358,6 +365,7 @@ static void cmh_remove(struct platform_device *pdev)
        cfg = &dev->config;

        cmh_mgmt_unregister();
+       cmh_pke_ecdh_unregister();
        cmh_pke_ecdsa_unregister();
        cmh_pke_rsa_unregister();
        cmh_ccp_poly_unregister();
diff --git a/drivers/crypto/cmh/cmh_pke_ecdh.c b/drivers/crypto/cmh/cmh_pke_ecdh.c
new file mode 100644
index 000000000000..d8b821cc4217
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_pke_ecdh.c
@@ -0,0 +1,698 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- ECDH / X25519 kpp Driver
+ *
+ * Registers "ecdh-nist-p256", "ecdh-nist-p384", and "curve25519"
+ * kpp algorithms with priority 300.
+ *
+ * - set_secret: decodes private key from kpp_secret + ecdh struct
+ *   (NIST curves) or raw 32-byte scalar (Curve25519).
+ *   Stores in cmh_key_ctx: raw keys written via SYS_REF_TEMP.
+ *   Datastore-referenced keys are only reachable through the ioctl
+ *   path (cmh_mgmt.c).
+ *
+ * - generate_public_key: PKE_CMD_ECDH_KEYGEN -> outputs X coordinate
+ *   (NIST Weierstrass) or full public key (Edwards/Montgomery).
+ *   For NIST curves, we generate X||Y by calling ECDSA_PUBGEN instead,
+ *   matching the kernel ecdh.c pattern that outputs uncompressed X||Y.
+ *
+ * - compute_shared_secret: PKE_CMD_ECDH -> shared secret X coordinate.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <crypto/kpp.h>
+#include <crypto/ecdh.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/internal/ecc.h>
+
+#include "cmh_pke.h"
+#include "cmh_sys.h"
+#include "cmh_sys_abi.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+#include "cmh_key.h"
+
+/*
+ * ECDH key format: kpp_secret header + key_size(u16) + key data.
+ * We decode this inline to avoid depending on CONFIG_CRYPTO_ECDH.
+ */
+#define ECDH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + sizeof(unsigned short))
+
+struct cmh_ecdh_tfm_ctx {
+       struct cmh_key_ctx key;
+       u32 curve;              /* PKE_CURVE_* */
+       u32 clen;               /* coordinate length in bytes */
+};
+
+static inline struct cmh_ecdh_tfm_ctx *cmh_ecdh_ctx(struct crypto_kpp *tfm)
+{
+       return kpp_tfm_ctx(tfm);
+}
+
+/*
+ * Per-request context for ECDH/X25519 operations.
+ *
+ * generate_public_key: single-phase async VCQ.
+ * compute_shared_secret: 2-phase async VCQ with callback chaining.
+ *   Phase 1: sys_write(sk) + sys_new(ref) + ecdh(peer) + pflush
+ *            -> phase1 callback reads ref, submits Phase 2.
+ *   Phase 2: sys_data(ref, ss_dma) + sys_flush
+ *            -> phase2 callback extracts shared secret, completes req.
+ *
+ * Both phases target the same mbx_idx so the DS reference remains
+ * valid, since DS objects are MBX-scoped.
+ */
+struct cmh_ecdh_reqctx {
+       /* Buffers */
+       u8 *pk_buf;             /* keygen: output public key */
+       u8 *sk_buf;             /* private key copy */
+       u8 *peer_buf;           /* compute: peer public key */
+       u8 *ss_buf;             /* compute: shared secret output */
+       u64 *ref_buf;           /* compute: DS ref from Phase 1 */
+       /* DMA handles */
+       dma_addr_t pk_dma;
+       dma_addr_t sk_dma;
+       dma_addr_t peer_dma;
+       dma_addr_t ss_dma;
+       dma_addr_t ref_dma;
+       /* Sizes and params for Phase 2 re-submit */
+       u32 out_len;            /* keygen: public key size */
+       u32 clen;
+       u32 peer_len;
+       u32 sk_len;
+       u32 dma_swap;
+       int mbx_idx;            /* pinned MBX for Phase 2 */
+};
+
+/*
+ * set_secret: NIST curves decode kpp_secret + u16 key_size + raw scalar.
+ * Curve25519 uses raw 32-byte scalar directly.
+ */
+static int cmh_ecdh_set_secret_nist(struct crypto_kpp *tfm,
+                                   const void *buf, unsigned int len)
+{
+       struct cmh_ecdh_tfm_ctx *ctx = cmh_ecdh_ctx(tfm);
+       const u8 *ptr = buf;
+       struct kpp_secret secret;
+       unsigned short key_size;
+       int ret;
+
+       if (!buf || len < ECDH_KPP_SECRET_MIN_SIZE)
+               return -EINVAL;
+
+       memcpy(&secret, ptr, sizeof(secret));
+       ptr += sizeof(secret);
+
+       if (secret.type != CRYPTO_KPP_SECRET_TYPE_ECDH)
+               return -EINVAL;
+       if (len < secret.len)
+               return -EINVAL;
+
+       memcpy(&key_size, ptr, sizeof(key_size));
+       ptr += sizeof(key_size);
+
+       if (key_size == 0) {
+               /*
+                * key_size == 0: generate a validated random private key.
+                * Uses the kernel ECC library (FIPS 186-5 A.2.2) to ensure
+                * the scalar is in the valid range [2, n-3] for the curve.
+                */
+               u64 priv[ECC_MAX_DIGITS];
+               unsigned int ndigits = ctx->clen / sizeof(u64);
+               unsigned int curve_id;
+               u8 *rnd;
+
+               if (secret.len != ECDH_KPP_SECRET_MIN_SIZE)
+                       return -EINVAL;
+               if (ndigits > ECC_MAX_DIGITS)
+                       return -EINVAL;
+               /* Reject non-limb-aligned clen to prevent ndigits truncation */
+               if (ctx->clen % sizeof(u64))
+                       return -EINVAL;
+
+               if (ctx->curve == PKE_CURVE_P256)
+                       curve_id = ECC_CURVE_NIST_P256;
+               else if (ctx->curve == PKE_CURVE_P384)
+                       curve_id = ECC_CURVE_NIST_P384;
+               else
+                       return -EINVAL;
+
+               ret = ecc_gen_privkey(curve_id, ndigits, priv);
+               if (ret) {
+                       memzero_explicit(priv, sizeof(priv));
+                       return ret;
+               }
+
+               rnd = kmalloc(ctx->clen, GFP_KERNEL);
+               if (!rnd) {
+                       memzero_explicit(priv, sizeof(priv));
+                       return -ENOMEM;
+               }
+
+               /* Convert VLI (native LE-digit-order) to big-endian bytes */
+               ecc_swap_digits(priv, (u64 *)rnd, ndigits);
+               memzero_explicit(priv, sizeof(priv));
+
+               ret = cmh_key_setkey_raw(&ctx->key, rnd, ctx->clen,
+                                        CORE_ID_PKE);
+               kfree_sensitive(rnd);
+               return ret;
+       }
+
+       if (key_size != ctx->clen)
+               return -EINVAL;
+
+       if (secret.len != ECDH_KPP_SECRET_MIN_SIZE + key_size)
+               return -EINVAL;
+
+       return cmh_key_setkey_raw(&ctx->key, ptr, key_size, CORE_ID_PKE);
+}
+
+static int cmh_ecdh_set_secret_x25519(struct crypto_kpp *tfm,
+                                     const void *buf, unsigned int len)
+{
+       struct cmh_ecdh_tfm_ctx *ctx = cmh_ecdh_ctx(tfm);
+
+       if (len != pke_curve_clen(PKE_CURVE_25519))
+               return -EINVAL;
+
+       return cmh_key_setkey_raw(&ctx->key, buf, len, CORE_ID_PKE);
+}
+
+static void cmh_ecdh_keygen_complete(void *data, int error)
+{
+       struct kpp_request *req = data;
+       struct cmh_ecdh_reqctx *rctx = kpp_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       if (!cmh_dma_map_error(rctx->sk_dma))
+               cmh_dma_unmap_single(rctx->sk_dma, rctx->sk_len,
+                                    DMA_TO_DEVICE);
+       if (!cmh_dma_map_error(rctx->pk_dma))
+               cmh_dma_unmap_single(rctx->pk_dma, rctx->out_len,
+                                    DMA_FROM_DEVICE);
+
+       if (!error) {
+               int nents;
+
+               nents = sg_nents_for_len(req->dst, rctx->out_len);
+               if (nents < 0 ||
+                   sg_copy_from_buffer(req->dst, nents,
+                                       rctx->pk_buf,
+                                       rctx->out_len) != rctx->out_len)
+                       error = -EINVAL;
+               else
+                       req->dst_len = rctx->out_len;
+       }
+
+       kfree_sensitive(rctx->sk_buf);
+       rctx->sk_buf = NULL;
+       kfree(rctx->pk_buf);
+       rctx->pk_buf = NULL;
+       cmh_complete(&req->base, error);
+}
+
+/*
+ * generate_public_key: For NIST ECDH, use ECDH_KEYGEN which outputs
+ * the public key X-coordinate.  But the kernel kpp interface expects
+ * uncompressed X||Y, so we use ECDSA_PUBGEN which gives us (X,Y).
+ * For Curve25519, ECDH_KEYGEN gives us the Montgomery u-coordinate
+ * which is the full public key.
+ */
+static int cmh_ecdh_generate_public_key(struct kpp_request *req)
+{
+       struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+       struct cmh_ecdh_tfm_ctx *ctx = cmh_ecdh_ctx(tfm);
+       struct cmh_ecdh_reqctx *rctx = kpp_request_ctx(req);
+       u32 clen = ctx->clen;
+       bool is_25519 = (ctx->curve == PKE_CURVE_25519);
+       u32 out_len = is_25519 ? clen : 2 * clen;
+       struct vcq_cmd vcq[PKE_VCQ_CMDS_MAX];
+       struct core_dispatch dd;
+       u32 swap, dma_swap;
+       int ret, idx;
+       gfp_t gfp;
+
+       if (ctx->key.mode != CMH_KEY_RAW)
+               return -EINVAL;
+       if (req->dst_len < out_len)
+               return -EINVAL;
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       memset(rctx, 0, sizeof(*rctx));
+       rctx->out_len = out_len;
+       rctx->sk_len = ctx->key.raw.len;
+       rctx->pk_dma = DMA_MAPPING_ERROR;
+       rctx->sk_dma = DMA_MAPPING_ERROR;
+
+       rctx->pk_buf = kzalloc(out_len, gfp);
+       if (!rctx->pk_buf)
+               return -ENOMEM;
+
+       rctx->pk_dma = cmh_dma_map_single(rctx->pk_buf, out_len,
+                                         DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(rctx->pk_dma)) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
+
+       swap = PKE_SWAP_FLAGS;
+       dma_swap = pke_swap_flags(ctx->curve);
+
+       dd = cmh_core_select_instance(CMH_CORE_PKE);
+
+       rctx->sk_buf = kmemdup(ctx->key.raw.data, ctx->key.raw.len, gfp);
+       if (!rctx->sk_buf) {
+               ret = -ENOMEM;
+               goto out_unmap;
+       }
+       rctx->sk_dma = cmh_dma_map_single(rctx->sk_buf, ctx->key.raw.len,
+                                         DMA_TO_DEVICE);
+       if (cmh_dma_map_error(rctx->sk_dma)) {
+               ret = -ENOMEM;
+               goto out_unmap;
+       }
+
+       vcq_set_header(&vcq[0], PKE_VCQ_CMDS_MAX);
+       idx = 1;
+       vcq_add_sys_write(&vcq[idx], SYS_REF_TEMP, rctx->sk_dma,
+                         SYS_REF_NONE, ctx->key.raw.len,
+                         ctx->key.raw.sys_type);
+       vcq[idx].id |= dma_swap;
+       idx++;
+       if (is_25519)
+               vcq_add_pke_ecdh_keygen(&vcq[idx++], dd.core_id, ctx->curve,
+                                       clen, rctx->pk_dma, SYS_REF_TEMP,
+                                       swap);
+       else
+               vcq_add_pke_ecdsa_pubgen(&vcq[idx++], dd.core_id,
+                                        ctx->curve, clen, rctx->pk_dma,
+                                        SYS_REF_TEMP, swap);
+       vcq_add_pke_flush(&vcq[idx++], dd.core_id);
+
+       ret = cmh_tm_submit_async(vcq, PKE_VCQ_CMDS_MAX, 1, dd.mbx_idx,
+                                 cmh_ecdh_keygen_complete, req,
+                                 !!(req->base.flags &
+                                    CRYPTO_TFM_REQ_MAY_BACKLOG), 0);
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (!ret)
+               return -EINPROGRESS;
+
+out_unmap:
+       if (!cmh_dma_map_error(rctx->sk_dma))
+               cmh_dma_unmap_single(rctx->sk_dma, ctx->key.raw.len,
+                                    DMA_TO_DEVICE);
+       if (!cmh_dma_map_error(rctx->pk_dma))
+               cmh_dma_unmap_single(rctx->pk_dma, out_len,
+                                    DMA_FROM_DEVICE);
+
+out_free:
+       kfree_sensitive(rctx->sk_buf);
+       kfree(rctx->pk_buf);
+       return ret;
+}
+
+static void cmh_ecdh_ss_phase2_complete(void *data, int error)
+{
+       struct kpp_request *req = data;
+       struct cmh_ecdh_reqctx *rctx = kpp_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       if (!cmh_dma_map_error(rctx->ss_dma))
+               cmh_dma_unmap_single(rctx->ss_dma, rctx->clen,
+                                    DMA_FROM_DEVICE);
+
+       if (!error) {
+               int nents;
+
+               nents = sg_nents_for_len(req->dst, rctx->clen);
+               if (nents < 0 ||
+                   sg_copy_from_buffer(req->dst, nents,
+                                       rctx->ss_buf,
+                                       rctx->clen) != rctx->clen)
+                       error = -EINVAL;
+               else
+                       req->dst_len = rctx->clen;
+       }
+
+       kfree(rctx->ref_buf);
+       rctx->ref_buf = NULL;
+       kfree_sensitive(rctx->ss_buf);
+       rctx->ss_buf = NULL;
+       cmh_complete(&req->base, error);
+}
+
+static void cmh_ecdh_ss_phase1_complete(void *data, int error)
+{
+       struct kpp_request *req = data;
+       struct cmh_ecdh_reqctx *rctx = kpp_request_ctx(req);
+       struct vcq_cmd vcq[3];
+       int ret;
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       /* Phase 1-only resources: sk, peer -- always clean up */
+       if (!cmh_dma_map_error(rctx->sk_dma))
+               cmh_dma_unmap_single(rctx->sk_dma, rctx->sk_len,
+                                    DMA_TO_DEVICE);
+       kfree_sensitive(rctx->sk_buf);
+       rctx->sk_buf = NULL;
+
+       if (!cmh_dma_map_error(rctx->peer_dma))
+               cmh_dma_unmap_single(rctx->peer_dma, rctx->peer_len,
+                                    DMA_TO_DEVICE);
+       kfree(rctx->peer_buf);
+       rctx->peer_buf = NULL;
+
+       if (error)
+               goto out_cleanup;
+
+       /* Read the DS reference written by Phase 1 */
+       cmh_dma_sync_for_cpu(rctx->ref_dma, sizeof(u64), DMA_FROM_DEVICE);
+       cmh_dma_unmap_single(rctx->ref_dma, sizeof(u64), DMA_FROM_DEVICE);
+       rctx->ref_dma = DMA_MAPPING_ERROR;
+
+       /* Phase 2: extract shared secret from DS */
+       vcq_set_header(&vcq[0], 3);
+       vcq_add_sys_data(&vcq[1], *rctx->ref_buf, rctx->ss_dma,
+                        rctx->clen);
+       vcq[1].id |= rctx->dma_swap;
+       vcq_add_sys_flush(&vcq[2]);
+
+       ret = cmh_tm_submit_async(vcq, 3, 1, rctx->mbx_idx,
+                                 cmh_ecdh_ss_phase2_complete, req,
+                                 true, 0);
+       if (ret == -EBUSY || !ret)
+               return;
+
+       error = ret;
+
+out_cleanup:
+       if (!cmh_dma_map_error(rctx->ref_dma))
+               cmh_dma_unmap_single(rctx->ref_dma, sizeof(u64),
+                                    DMA_FROM_DEVICE);
+       if (!cmh_dma_map_error(rctx->ss_dma))
+               cmh_dma_unmap_single(rctx->ss_dma, rctx->clen,
+                                    DMA_FROM_DEVICE);
+       kfree(rctx->ref_buf);
+       rctx->ref_buf = NULL;
+       kfree_sensitive(rctx->ss_buf);
+       rctx->ss_buf = NULL;
+       cmh_complete(&req->base, error);
+}
+
+/*
+ * compute_shared_secret: PKE_CMD_ECDH.
+ *
+ * req->src = peer public key (X||Y for NIST, raw 32B for Curve25519).
+ * Output = shared secret X coordinate (clen bytes).
+ *
+ * The CMH ECDH command stores the shared secret in a DS object,
+ * not directly to DMA.  We create a DS slot with SYS_CMD_NEW,
+ * reference it via SYS_REF_LAST, then extract the result with a
+ * second VCQ submission using SYS_CMD_DATA with the actual ref.
+ */
+static int cmh_ecdh_compute_shared_secret(struct kpp_request *req)
+{
+       struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+       struct cmh_ecdh_tfm_ctx *ctx = cmh_ecdh_ctx(tfm);
+       struct cmh_ecdh_reqctx *rctx = kpp_request_ctx(req);
+       u32 clen = ctx->clen;
+       bool is_25519 = (ctx->curve == PKE_CURVE_25519);
+       u32 peer_len = is_25519 ? clen : 2 * clen;
+       u32 ss_type = SYS_TYPE_SET(SYS_TYPE_FLAG_PT, CORE_ID_PKE);
+       struct vcq_cmd vcq[5];
+       struct core_dispatch dd;
+       u32 swap, dma_swap;
+       int ret, idx, nents;
+       gfp_t gfp;
+
+       if (ctx->key.mode != CMH_KEY_RAW)
+               return -EINVAL;
+       if (req->src_len < peer_len || req->dst_len < clen)
+               return -EINVAL;
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       memset(rctx, 0, sizeof(*rctx));
+       rctx->clen = clen;
+       rctx->peer_len = peer_len;
+       rctx->sk_len = ctx->key.raw.len;
+       rctx->pk_dma = DMA_MAPPING_ERROR;
+       rctx->sk_dma = DMA_MAPPING_ERROR;
+       rctx->peer_dma = DMA_MAPPING_ERROR;
+       rctx->ss_dma = DMA_MAPPING_ERROR;
+       rctx->ref_dma = DMA_MAPPING_ERROR;
+
+       rctx->peer_buf = kmalloc(peer_len, gfp);
+       rctx->ss_buf = kzalloc(clen, gfp);
+       rctx->ref_buf = kzalloc_obj(u64, gfp);
+       if (!rctx->peer_buf || !rctx->ss_buf || !rctx->ref_buf) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
+
+       nents = sg_nents_for_len(req->src, peer_len);
+       if (nents < 0 ||
+           sg_pcopy_to_buffer(req->src, nents, rctx->peer_buf,
+                              peer_len, 0) != peer_len) {
+               ret = -EINVAL;
+               goto out_free;
+       }
+
+       rctx->peer_dma = cmh_dma_map_single(rctx->peer_buf, peer_len,
+                                           DMA_TO_DEVICE);
+       rctx->ss_dma = cmh_dma_map_single(rctx->ss_buf, clen,
+                                         DMA_FROM_DEVICE);
+       rctx->ref_dma = cmh_dma_map_single(rctx->ref_buf, sizeof(u64),
+                                          DMA_FROM_DEVICE);
+
+       if (cmh_dma_map_error(rctx->peer_dma) ||
+           cmh_dma_map_error(rctx->ss_dma) ||
+           cmh_dma_map_error(rctx->ref_dma)) {
+               ret = -ENOMEM;
+               goto out_unmap;
+       }
+
+       swap = PKE_SWAP_FLAGS;
+       dma_swap = pke_swap_flags(ctx->curve);
+       rctx->dma_swap = dma_swap;
+
+       dd = cmh_core_select_instance(CMH_CORE_PKE);
+       rctx->mbx_idx = dd.mbx_idx;
+
+       rctx->sk_buf = kmemdup(ctx->key.raw.data, ctx->key.raw.len, gfp);
+       if (!rctx->sk_buf) {
+               ret = -ENOMEM;
+               goto out_unmap;
+       }
+       rctx->sk_dma = cmh_dma_map_single(rctx->sk_buf, ctx->key.raw.len,
+                                         DMA_TO_DEVICE);
+       if (cmh_dma_map_error(rctx->sk_dma)) {
+               ret = -ENOMEM;
+               goto out_unmap;
+       }
+
+       vcq_set_header(&vcq[0], 5);
+       idx = 1;
+       vcq_add_sys_write(&vcq[idx], SYS_REF_TEMP, rctx->sk_dma,
+                         SYS_REF_NONE, ctx->key.raw.len,
+                         ctx->key.raw.sys_type);
+       vcq[idx].id |= dma_swap;
+       idx++;
+       vcq_add_sys_new(&vcq[idx++], 0, rctx->ref_dma, clen);
+       vcq_add_pke_ecdh(&vcq[idx++], dd.core_id, ctx->curve, clen,
+                        clen, ss_type, rctx->peer_dma,
+                        SYS_REF_TEMP, SYS_REF_LAST, swap);
+       vcq_add_pke_flush(&vcq[idx++], dd.core_id);
+
+       ret = cmh_tm_submit_async(vcq, 5, 1, dd.mbx_idx,
+                                 cmh_ecdh_ss_phase1_complete, req,
+                                 !!(req->base.flags &
+                                    CRYPTO_TFM_REQ_MAY_BACKLOG), 0);
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (!ret)
+               return -EINPROGRESS;
+
+out_unmap:
+       if (!cmh_dma_map_error(rctx->sk_dma))
+               cmh_dma_unmap_single(rctx->sk_dma, rctx->sk_len,
+                                    DMA_TO_DEVICE);
+       if (!cmh_dma_map_error(rctx->ss_dma))
+               cmh_dma_unmap_single(rctx->ss_dma, clen,
+                                    DMA_FROM_DEVICE);
+       if (!cmh_dma_map_error(rctx->ref_dma))
+               cmh_dma_unmap_single(rctx->ref_dma, sizeof(u64),
+                                    DMA_FROM_DEVICE);
+       if (!cmh_dma_map_error(rctx->peer_dma))
+               cmh_dma_unmap_single(rctx->peer_dma, peer_len,
+                                    DMA_TO_DEVICE);
+
+out_free:
+       kfree_sensitive(rctx->sk_buf);
+       kfree(rctx->ref_buf);
+       kfree_sensitive(rctx->ss_buf);
+       kfree(rctx->peer_buf);
+       return ret;
+}
+
+static unsigned int cmh_ecdh_max_size(struct crypto_kpp *tfm)
+{
+       struct cmh_ecdh_tfm_ctx *ctx = cmh_ecdh_ctx(tfm);
+
+       /* Max output = X||Y for generate_public_key (NIST) */
+       return 2 * ctx->clen;
+}
+
+static unsigned int cmh_x25519_max_size(struct crypto_kpp *tfm)
+{
+       return pke_curve_clen(PKE_CURVE_25519); /* single coordinate */
+}
+
+static int cmh_ecdh_p256_init(struct crypto_kpp *tfm)
+{
+       struct cmh_ecdh_tfm_ctx *ctx = cmh_ecdh_ctx(tfm);
+
+       memset(ctx, 0, sizeof(*ctx));
+       ctx->curve = PKE_CURVE_P256;
+       ctx->clen = pke_curve_clen(PKE_CURVE_P256);
+       tfm->reqsize = sizeof(struct cmh_ecdh_reqctx);
+       return 0;
+}
+
+static int cmh_ecdh_p384_init(struct crypto_kpp *tfm)
+{
+       struct cmh_ecdh_tfm_ctx *ctx = cmh_ecdh_ctx(tfm);
+
+       memset(ctx, 0, sizeof(*ctx));
+       ctx->curve = PKE_CURVE_P384;
+       ctx->clen = pke_curve_clen(PKE_CURVE_P384);
+       tfm->reqsize = sizeof(struct cmh_ecdh_reqctx);
+       return 0;
+}
+
+static int cmh_x25519_init(struct crypto_kpp *tfm)
+{
+       struct cmh_ecdh_tfm_ctx *ctx = cmh_ecdh_ctx(tfm);
+
+       memset(ctx, 0, sizeof(*ctx));
+       ctx->curve = PKE_CURVE_25519;
+       ctx->clen = pke_curve_clen(PKE_CURVE_25519);
+       tfm->reqsize = sizeof(struct cmh_ecdh_reqctx);
+       return 0;
+}
+
+static void cmh_ecdh_exit(struct crypto_kpp *tfm)
+{
+       struct cmh_ecdh_tfm_ctx *ctx = cmh_ecdh_ctx(tfm);
+
+       cmh_key_destroy(&ctx->key);
+}
+
+static struct kpp_alg cmh_ecdh_algs[] = {
+       {
+               .set_secret             = cmh_ecdh_set_secret_nist,
+               .generate_public_key    = cmh_ecdh_generate_public_key,
+               .compute_shared_secret  = cmh_ecdh_compute_shared_secret,
+               .max_size               = cmh_ecdh_max_size,
+               .init                   = cmh_ecdh_p256_init,
+               .exit                   = cmh_ecdh_exit,
+               .base = {
+                       .cra_name         = "ecdh-nist-p256",
+                       .cra_driver_name  = "cri-cmh-ecdh-nist-p256",
+                       .cra_priority     = 300,
+                       .cra_flags        = CRYPTO_ALG_ASYNC,
+                       .cra_module       = THIS_MODULE,
+                       .cra_ctxsize      = sizeof(struct cmh_ecdh_tfm_ctx),
+               },
+       },
+       {
+               .set_secret             = cmh_ecdh_set_secret_nist,
+               .generate_public_key    = cmh_ecdh_generate_public_key,
+               .compute_shared_secret  = cmh_ecdh_compute_shared_secret,
+               .max_size               = cmh_ecdh_max_size,
+               .init                   = cmh_ecdh_p384_init,
+               .exit                   = cmh_ecdh_exit,
+               .base = {
+                       .cra_name         = "ecdh-nist-p384",
+                       .cra_driver_name  = "cri-cmh-ecdh-nist-p384",
+                       .cra_priority     = 300,
+                       .cra_flags        = CRYPTO_ALG_ASYNC,
+                       .cra_module       = THIS_MODULE,
+                       .cra_ctxsize      = sizeof(struct cmh_ecdh_tfm_ctx),
+               },
+       },
+       {
+               .set_secret             = cmh_ecdh_set_secret_x25519,
+               .generate_public_key    = cmh_ecdh_generate_public_key,
+               .compute_shared_secret  = cmh_ecdh_compute_shared_secret,
+               .max_size               = cmh_x25519_max_size,
+               .init                   = cmh_x25519_init,
+               .exit                   = cmh_ecdh_exit,
+               .base = {
+                       .cra_name         = "curve25519",
+                       .cra_driver_name  = "cri-cmh-curve25519",
+                       .cra_priority     = 300,
+                       .cra_flags        = CRYPTO_ALG_ASYNC,
+                       .cra_module       = THIS_MODULE,
+                       .cra_ctxsize      = sizeof(struct cmh_ecdh_tfm_ctx),
+               },
+       },
+};
+
+/**
+ * cmh_pke_ecdh_register() - Register ECDH kpp algorithms with the crypto framework
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_pke_ecdh_register(void)
+{
+       int ret, i;
+
+       for (i = 0; i < ARRAY_SIZE(cmh_ecdh_algs); i++) {
+               ret = crypto_register_kpp(&cmh_ecdh_algs[i]);
+               if (ret) {
+                       dev_err(cmh_dev(), "cmh: failed to register %s (%d)\n",
+                               cmh_ecdh_algs[i].base.cra_name, ret);
+                       goto err_unregister;
+               }
+       }
+
+       return 0;
+
+err_unregister:
+       while (i--)
+               crypto_unregister_kpp(&cmh_ecdh_algs[i]);
+       return ret;
+}
+
+/**
+ * cmh_pke_ecdh_unregister() - Unregister ECDH kpp algorithms from the crypto framework
+ */
+void cmh_pke_ecdh_unregister(void)
+{
+       int i = ARRAY_SIZE(cmh_ecdh_algs);
+
+       while (i--)
+               crypto_unregister_kpp(&cmh_ecdh_algs[i]);
+}
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 19/19] MAINTAINERS: add Rambus CryptoManager Hub (CMH)
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Add MAINTAINERS entry for the CRI CryptoManager Hub (CMH) hardware
crypto accelerator driver under drivers/crypto/cmh/.

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 MAINTAINERS | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 90034eb7874e..ecb389795e3d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6797,6 +6797,25 @@ F:       kernel/cred.c
 F:     rust/kernel/cred.rs
 F:     Documentation/security/credentials.rst

+CRI CRYPTOMANAGER HUB (CMH) HARDWARE CRYPTO ACCELERATOR
+M:     Alex Ousherovitch <aousherovitch@rambus.com>
+M:     Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
+R:     Joel Wittenauer <Joel.Wittenauer@cryptography.com>
+R:     Thi Nguyen <thin@rambus.com>
+L:     linux-crypto@vger.kernel.org
+L:     sipsupport@rambus.com (moderated for non-subscribers)
+S:     Maintained
+T:     git https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
+F:     Documentation/ABI/testing/cmh-mgmt
+F:     Documentation/ABI/testing/debugfs-driver-cmh
+F:     Documentation/ABI/testing/sysfs-driver-cmh
+F:     Documentation/crypto/device_drivers/cmh.rst
+F:     Documentation/devicetree/bindings/crypto/cri,cmh.yaml
+F:     Documentation/userspace-api/ioctl/cmh_mgmt.rst
+F:     drivers/crypto/cmh/
+F:     include/uapi/linux/cmh_mgmt_ioctl.h
+F:     tools/testing/selftests/drivers/crypto/cmh/
+
 INTEL CRPS COMMON REDUNDANT PSU DRIVER
 M:     Ninad Palsule <ninad@linux.ibm.com>
 L:     linux-hwmon@vger.kernel.org
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 08/19] crypto: cmh - add AES skcipher/aead/cmac
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Register AES algorithms using the CMH AES core (core ID 0x03):
- skcipher: AES-ECB, AES-CBC, AES-CTR, AES-XTS, AES-CFB
- aead: AES-GCM, AES-CCM
- ahash: AES-CMAC

Supports 128, 192, and 256-bit keys.  AEAD algorithms handle
associated data, payload, and authentication tag with correct
encrypt/decrypt separation.

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 drivers/crypto/cmh/Makefile          |   5 +-
 drivers/crypto/cmh/cmh_aes.c         | 736 ++++++++++++++++++++
 drivers/crypto/cmh/cmh_aes_aead.c    | 987 +++++++++++++++++++++++++++
 drivers/crypto/cmh/cmh_aes_cmac.c    | 537 +++++++++++++++
 drivers/crypto/cmh/cmh_main.c        |  25 +
 drivers/crypto/cmh/include/cmh_aes.h |  24 +
 6 files changed, 2313 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/cmh/cmh_aes.c
 create mode 100644 drivers/crypto/cmh/cmh_aes_aead.c
 create mode 100644 drivers/crypto/cmh/cmh_aes_cmac.c
 create mode 100644 drivers/crypto/cmh/include/cmh_aes.h

diff --git a/drivers/crypto/cmh/Makefile b/drivers/crypto/cmh/Makefile
index b3018fbcf211..ced8d1748e6c 100644
--- a/drivers/crypto/cmh/Makefile
+++ b/drivers/crypto/cmh/Makefile
@@ -19,7 +19,10 @@ cmh-y := \
        cmh_hmac.o \
        cmh_cshake.o \
        cmh_kmac.o \
-       cmh_sm3.o
+       cmh_sm3.o \
+       cmh_aes.o \
+       cmh_aes_aead.o \
+       cmh_aes_cmac.o

 # Management ioctl device (/dev/cmh_mgmt): key lifecycle, PKE, PQC ioctls.
 cmh-$(CONFIG_CRYPTO_DEV_CMH_MGMT) += \
diff --git a/drivers/crypto/cmh/cmh_aes.c b/drivers/crypto/cmh/cmh_aes.c
new file mode 100644
index 000000000000..b36295763e33
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_aes.c
@@ -0,0 +1,736 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API AES (skcipher) Driver
+ *
+ * Registers skcipher algorithms with the Linux crypto subsystem:
+ *   ecb(aes), cbc(aes), ctr(aes), cfb(aes), xts(aes)
+ *
+ * Uses the CMH AES Core via VCQ commands:
+ *   [SYS_CMD_WRITE] + AES_CMD_INIT + [AES_CMD_UPDATE] + AES_CMD_FINAL
+ *   + VCQ_CMD_FLUSH
+ *
+ * The AES core requires bidirectional DMA -- both input and output
+ * buffers are mapped and passed in a single AES_CMD_FINAL command.
+ *
+ * Raw-key atomicity: SYS_CMD_WRITE to SYS_REF_TEMP is packed into
+ * the same VCQ as AES commands (see cmh_key.h for details).
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/xts.h>
+#include <crypto/scatterwalk.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
+
+#include "cmh_aes.h"
+#include "cmh_vcq.h"
+#include "cmh_aes_abi.h"
+#include "cmh_sys_abi.h"
+#include "cmh_sys.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+#include "cmh_key.h"
+
+/* Algorithm Table */
+
+struct cmh_aes_alg_info {
+       u32         aes_mode;   /* AES_MODE_* */
+       u32         ivsize;             /* bytes (0 for ECB) */
+       u32         min_keysize;        /* minimum key bytes */
+       u32         max_keysize;        /* maximum key bytes */
+       const char *alg_name;   /* Linux crypto name: "ecb(aes)" */
+       const char *drv_name;   /* driver name: "cri-cmh-ecb-aes" */
+};
+
+static const struct cmh_aes_alg_info aes_algs[] = {
+       { AES_MODE_ECB, 0,                AES_KEYSIZE_128, AES_KEYSIZE_256,
+         "ecb(aes)", "cri-cmh-ecb-aes" },
+       { AES_MODE_CBC, CMH_AES_IV_SIZE,  AES_KEYSIZE_128, AES_KEYSIZE_256,
+         "cbc(aes)", "cri-cmh-cbc-aes" },
+       { AES_MODE_CTR, CMH_AES_IV_SIZE,  AES_KEYSIZE_128, AES_KEYSIZE_256,
+         "ctr(aes)", "cri-cmh-ctr-aes" },
+       { AES_MODE_CFB, CMH_AES_IV_SIZE,  AES_KEYSIZE_128, AES_KEYSIZE_256,
+         "cfb(aes)", "cri-cmh-cfb-aes" },
+       { AES_MODE_XTS, CMH_AES_IV_SIZE,  2 * AES_KEYSIZE_128, 2 * AES_KEYSIZE_256,
+         "xts(aes)", "cri-cmh-xts-aes" },
+};
+
+/* Per-transform context (allocated by crypto framework) */
+
+struct cmh_aes_tfm_ctx {
+       struct cmh_key_ctx key;
+};
+
+/* Per-request context (lives in skcipher_request::__ctx) */
+
+/*
+ * Maximum payload commands:
+ *   [SYS_CMD_WRITE] + AES_CMD_INIT + [AES_CMD_UPDATE] + AES_CMD_FINAL
+ *   + VCQ_CMD_FLUSH = 5
+ * UPDATE is used for XTS data > 2 blocks (see cmh_aes_crypt).
+ */
+#define CMH_AES_MAX_PAYLOAD    5
+#define CMH_AES_MAX_PACKED     (CMH_AES_MAX_PAYLOAD * 2)
+
+struct cmh_aes_reqctx {
+       dma_addr_t in_dma;
+       dma_addr_t out_dma;
+       dma_addr_t iv_dma;
+       dma_addr_t iv2_dma;
+       dma_addr_t key_dma;
+       u8 *in_buf;
+       u8 *out_buf;
+       u8 *iv_buf;
+       u8 *iv2_buf;
+       u32 cryptlen;
+       u32 ivsize;
+       u32 keylen;
+       u32 aes_mode;
+       u32 aes_op;
+       /* CTR counter-wrap split state */
+       u32 ctr_chunk1_len;
+       u32 core_id;
+       s32 target_mbx;
+       u64 key_ref;
+       struct vcq_cmd packed[CMH_AES_MAX_PACKED];
+};
+
+/* VCQ Builders -- AES-specific */
+
+static void vcq_add_aes_init(struct vcq_cmd *slot, u32 core_id, u64 key_ref, u64 iv_dma,
+                            u32 keylen, u32 ivlen, u32 mode, u32 op,
+                            u32 iolen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, AES_CMD_INIT);
+       slot->hwc.aes.cmd_init.key = key_ref;
+       slot->hwc.aes.cmd_init.iv = iv_dma;
+       slot->hwc.aes.cmd_init.keylen = keylen;
+       slot->hwc.aes.cmd_init.ivlen = ivlen;
+       slot->hwc.aes.cmd_init.mode = mode;
+       slot->hwc.aes.cmd_init.op = op;
+       slot->hwc.aes.cmd_init.aadlen = 0;
+       slot->hwc.aes.cmd_init.iolen = iolen;
+       slot->hwc.aes.cmd_init.taglen = 0;
+}
+
+static void vcq_add_aes_update(struct vcq_cmd *slot, u32 core_id, u64 input_dma,
+                              u64 output_dma, u32 iolen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, AES_CMD_UPDATE);
+       slot->hwc.aes.cmd_update.input = input_dma;
+       slot->hwc.aes.cmd_update.output = output_dma;
+       slot->hwc.aes.cmd_update.iolen = iolen;
+}
+
+static void vcq_add_aes_final(struct vcq_cmd *slot, u32 core_id, u64 input_dma,
+                             u64 output_dma, u32 iolen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, AES_CMD_FINAL);
+       slot->hwc.aes.cmd_final.input = input_dma;
+       slot->hwc.aes.cmd_final.output = output_dma;
+       slot->hwc.aes.cmd_final.iolen = iolen;
+       slot->hwc.aes.cmd_final.tag = 0;
+       slot->hwc.aes.cmd_final.taglen = 0;
+}
+
+/*
+ * We wrap each skcipher_alg with its info pointer in a compound struct,
+ * then use container_of() in cmh_aes_get_info() to recover it.
+ * This is the same pattern used by hash, hmac, cshake, kmac.
+ */
+struct cmh_aes_alg_drv {
+       struct skcipher_alg             alg;
+       const struct cmh_aes_alg_info  *info;
+};
+
+static bool aes_is_stream_mode(u32 mode)
+{
+       return mode == AES_MODE_CTR || mode == AES_MODE_CFB;
+}
+
+/*
+ * Update req->iv after a successful encrypt/decrypt.
+ *
+ * The Linux skcipher API contract requires that req->iv is updated to
+ * reflect the state needed to continue processing in a chained call:
+ *   CBC encrypt: IV <- last ciphertext block
+ *   CBC decrypt: IV <- last ciphertext block of the *input*
+ *   CTR:         IV <- counter incremented by ceil(cryptlen / blocksize)
+ *   CFB:         IV <- last ciphertext block
+ */
+static void cmh_aes_update_iv(struct skcipher_request *req, u32 mode,
+                             u32 op, const u8 *in_buf, const u8 *out_buf)
+{
+       u32 bs = CMH_AES_BLOCK_SIZE;
+       u32 nblocks;
+
+       switch (mode) {
+       case AES_MODE_CBC:
+               if (op == AES_OP_ENCRYPT)
+                       memcpy(req->iv, out_buf + req->cryptlen - bs, bs);
+               else
+                       memcpy(req->iv, in_buf + req->cryptlen - bs, bs);
+               break;
+       case AES_MODE_CTR:
+               /*
+                * Arithmetic big-endian 128-bit counter increment.
+                * Process from the least-significant byte (index 15)
+                * upward, carrying as needed.
+                */
+               nblocks = DIV_ROUND_UP(req->cryptlen, bs);
+               {
+                       u8 *iv = req->iv;
+                       int i;
+
+                       for (i = bs - 1; i >= 0 && nblocks; i--) {
+                               u32 sum = (u32)iv[i] + (nblocks & 0xff);
+
+                               iv[i] = (u8)sum;
+                               nblocks = (nblocks >> 8) + (sum >> 8);
+                       }
+               }
+               break;
+       case AES_MODE_CFB:
+               /*
+                * CFB-128 chains on the last ciphertext block.  On encrypt,
+                * that is out_buf; on decrypt, it is in_buf.
+                *
+                * For sub-block requests (cryptlen < 16), there is no
+                * complete ciphertext block to chain, so the IV is left
+                * unchanged -- CFB-128 has no defined chaining semantic
+                * for partial blocks (shift-register CFB-n is a different
+                * mode).  Without this guard the pointer arithmetic
+                * underflows and reads before the buffer.
+                */
+               if (req->cryptlen >= bs) {
+                       if (op == AES_OP_ENCRYPT)
+                               memcpy(req->iv, out_buf + req->cryptlen - bs,
+                                      bs);
+                       else
+                               memcpy(req->iv, in_buf + req->cryptlen - bs,
+                                      bs);
+               }
+               break;
+       default:
+               break;
+       }
+}
+
+/* skcipher Operations */
+
+static const struct cmh_aes_alg_info *
+cmh_aes_get_info(struct crypto_skcipher *tfm)
+{
+       struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+
+       return container_of(alg, struct cmh_aes_alg_drv, alg)->info;
+}
+
+static int cmh_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+                         unsigned int keylen)
+{
+       struct cmh_aes_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+       const struct cmh_aes_alg_info *info = cmh_aes_get_info(tfm);
+
+       if (info->aes_mode == AES_MODE_XTS) {
+               int err;
+
+               /* XTS: double key (32, 48, or 64 bytes) */
+               if (keylen != 2 * AES_KEYSIZE_128 &&
+                   keylen != 2 * AES_KEYSIZE_192 &&
+                   keylen != 2 * AES_KEYSIZE_256)
+                       return -EINVAL;
+               err = xts_verify_key(tfm, key, keylen);
+               if (err)
+                       return err;
+       } else {
+               /* Standard: 16, 24, or 32 bytes */
+               if (keylen != AES_KEYSIZE_128 &&
+                   keylen != AES_KEYSIZE_192 &&
+                   keylen != AES_KEYSIZE_256)
+                       return -EINVAL;
+       }
+
+       return cmh_key_setkey_raw(&tctx->key, key, keylen, CORE_ID_AES);
+}
+
+static int cmh_aes_init_tfm(struct crypto_skcipher *tfm)
+{
+       struct cmh_aes_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+
+       memset(tctx, 0, sizeof(*tctx));
+       crypto_skcipher_set_reqsize(tfm, sizeof(struct cmh_aes_reqctx));
+       return 0;
+}
+
+static void cmh_aes_exit_tfm(struct crypto_skcipher *tfm)
+{
+       struct cmh_aes_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+
+       cmh_key_destroy(&tctx->key);
+}
+
+#define CMH_AES_MAX_CRYPTLEN   SZ_32M
+
+/* DMA unmap helper */
+static void cmh_aes_unmap_dma(struct cmh_aes_reqctx *rctx)
+{
+       if (rctx->iv2_buf)
+               cmh_dma_unmap_single(rctx->iv2_dma, rctx->ivsize,
+                                    DMA_TO_DEVICE);
+       if (rctx->ivsize > 0)
+               cmh_dma_unmap_single(rctx->iv_dma, rctx->ivsize,
+                                    DMA_TO_DEVICE);
+       cmh_dma_unmap_single(rctx->out_dma, rctx->cryptlen, DMA_FROM_DEVICE);
+       cmh_dma_unmap_single(rctx->in_dma, rctx->cryptlen, DMA_TO_DEVICE);
+}
+
+static void cmh_aes_free_bufs(struct cmh_aes_reqctx *rctx)
+{
+       kfree(rctx->iv2_buf);
+       rctx->iv2_buf = NULL;
+       kfree(rctx->iv_buf);
+       rctx->iv_buf = NULL;
+       kfree_sensitive(rctx->out_buf);
+       rctx->out_buf = NULL;
+       kfree_sensitive(rctx->in_buf);
+       rctx->in_buf = NULL;
+}
+
+/*
+ * Submit the second CTR chunk after the first completes.
+ * Called from cmh_aes_complete when ctr_chunk1_len > 0.
+ */
+static void cmh_aes_complete(void *data, int error);
+
+static int cmh_aes_ctr_submit_chunk2(struct skcipher_request *req)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct cmh_aes_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+       struct cmh_aes_reqctx *rctx = skcipher_request_ctx(req);
+       struct vcq_cmd cmds[CMH_AES_MAX_PAYLOAD];
+       u32 chunk1 = rctx->ctr_chunk1_len;
+       u32 chunk2 = rctx->cryptlen - chunk1;
+       u64 key_ref;
+       u32 keylen;
+       u32 idx = 0;
+
+       /* Clear split flag so next completion is final */
+       rctx->ctr_chunk1_len = 0;
+
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP,
+                         (u64)rctx->key_dma, SYS_REF_NONE,
+                         tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       keylen = tctx->key.raw.len;
+
+       vcq_add_aes_init(&cmds[idx++], rctx->core_id, key_ref,
+                        (u64)rctx->iv2_dma, keylen, rctx->ivsize,
+                        rctx->aes_mode, rctx->aes_op, 0);
+       vcq_add_aes_final(&cmds[idx++], rctx->core_id,
+                         (u64)(rctx->in_dma + chunk1),
+                         (u64)(rctx->out_dma + chunk1), chunk2);
+       vcq_add_flush(&cmds[idx++], rctx->core_id);
+
+       return cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                            CMH_AES_MAX_PACKED,
+                                            rctx->target_mbx,
+                                            cmh_aes_complete, req,
+                                            !!(req->base.flags &
+                                               CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                            cmh_tm_async_timeout_jiffies());
+}
+
+/*
+ * Async completion callback -- fires from RH threaded IRQ context.
+ *
+ * Unmaps DMA buffers, copies output to req->dst scatterlist,
+ * updates the IV state, frees temporaries, and completes the request.
+ *
+ * For CTR counter-wrap splits, the first chunk completion chains
+ * into a second VCQ submission rather than finalizing immediately.
+ */
+static void cmh_aes_complete(void *data, int error)
+{
+       struct skcipher_request *req = data;
+       struct cmh_aes_reqctx *rctx = skcipher_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       /*
+        * CTR counter-wrap: first chunk completed, submit second.
+        * DMA mappings remain valid (they cover the full buffer).
+        *
+        * Recursion depth bounded: chunk2 clears ctr_chunk1_len before
+        * submission, so the second cmh_aes_complete invocation sees 0
+        * and finalizes (max depth = 2).
+        */
+       if (rctx->ctr_chunk1_len && !error) {
+               int ret;
+
+               ret = cmh_aes_ctr_submit_chunk2(req);
+
+               if (!ret || ret == -EBUSY)
+                       return;
+               /* Submission failed; clean up below */
+               error = ret;
+       }
+
+       cmh_aes_unmap_dma(rctx);
+
+       if (!error) {
+               scatterwalk_map_and_copy(rctx->out_buf, req->dst,
+                                        0, rctx->cryptlen, 1);
+               cmh_aes_update_iv(req, rctx->aes_mode, rctx->aes_op,
+                                 rctx->in_buf, rctx->out_buf);
+       }
+
+       cmh_aes_free_bufs(rctx);
+       cmh_complete(&req->base, error);
+}
+
+/*
+ * Core encrypt/decrypt -- builds a VCQ transaction and submits async.
+ *
+ * Returns -EINPROGRESS on successful submission (completion callback
+ * will fire later).  Returns 0 for trivial cases (zero-length).
+ * Returns negative errno on pre-submission errors.
+ */
+static int cmh_aes_crypt(struct skcipher_request *req, u32 aes_op)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct cmh_aes_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+       const struct cmh_aes_alg_info *info = cmh_aes_get_info(tfm);
+       struct cmh_aes_reqctx *rctx = skcipher_request_ctx(req);
+       struct vcq_cmd cmds[CMH_AES_MAX_PAYLOAD];
+       u64 key_ref;
+       u32 keylen;
+       struct core_dispatch d;
+       s32 target_mbx;
+       u32 core_id;
+       u32 idx;
+       int ret;
+       gfp_t gfp;
+
+       if (tctx->key.mode == CMH_KEY_NONE)
+               return -ENOKEY;
+
+       if (!req->cryptlen)
+               return 0;
+
+       if (req->cryptlen > CMH_AES_MAX_CRYPTLEN)
+               return -EINVAL;
+
+       switch (info->aes_mode) {
+       case AES_MODE_CTR:
+       case AES_MODE_CFB:
+               break;
+       case AES_MODE_XTS:
+               if (req->cryptlen < CMH_AES_BLOCK_SIZE)
+                       return -EINVAL;
+               break;
+       default:
+               if (req->cryptlen & (CMH_AES_BLOCK_SIZE - 1))
+                       return -EINVAL;
+               break;
+       }
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       /* Initialise reqctx */
+       memset(rctx, 0, sizeof(*rctx));
+       rctx->cryptlen = req->cryptlen;
+       rctx->ivsize = info->ivsize;
+       rctx->aes_mode = info->aes_mode;
+       rctx->aes_op = aes_op;
+       rctx->iv2_buf = NULL;
+
+       /* Linearise input from scatterlist */
+       rctx->in_buf = kmalloc(req->cryptlen, gfp);
+       if (!rctx->in_buf)
+               return -ENOMEM;
+
+       scatterwalk_map_and_copy(rctx->in_buf, req->src, 0, req->cryptlen, 0);
+
+       rctx->in_dma = cmh_dma_map_single(rctx->in_buf, req->cryptlen,
+                                         DMA_TO_DEVICE);
+       if (cmh_dma_map_error(rctx->in_dma)) {
+               ret = -ENOMEM;
+               goto out_free_in;
+       }
+
+       /* Allocate and map output buffer */
+       rctx->out_buf = kmalloc(req->cryptlen, gfp);
+       if (!rctx->out_buf) {
+               ret = -ENOMEM;
+               goto out_unmap_in;
+       }
+
+       rctx->out_dma = cmh_dma_map_single(rctx->out_buf, req->cryptlen,
+                                          DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(rctx->out_dma)) {
+               ret = -ENOMEM;
+               goto out_free_out;
+       }
+
+       /* Map IV if required */
+       if (info->ivsize > 0) {
+               rctx->iv_buf = kmemdup(req->iv, info->ivsize, gfp);
+               if (!rctx->iv_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_out;
+               }
+               rctx->iv_dma = cmh_dma_map_single(rctx->iv_buf, info->ivsize,
+                                                 DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->iv_dma)) {
+                       ret = -ENOMEM;
+                       goto out_free_iv;
+               }
+       }
+
+       /* Resolve key reference */
+       idx = 0;
+
+       rctx->key_dma = tctx->key.raw.dma;
+       rctx->keylen = tctx->key.raw.len;
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP,
+                         (u64)rctx->key_dma, SYS_REF_NONE,
+                         tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       keylen = tctx->key.raw.len;
+       d = cmh_core_select_instance(CMH_CORE_AES);
+       target_mbx = d.mbx_idx;
+       core_id = d.core_id;
+
+       /*
+        * iolen in INIT: XTS needs total length upfront for tweak
+        * computation; all other modes use 0 (streaming).
+        */
+       vcq_add_aes_init(&cmds[idx++], core_id, key_ref, (u64)rctx->iv_dma,
+                        keylen, info->ivsize, info->aes_mode, aes_op,
+                        info->aes_mode == AES_MODE_XTS ?
+                        req->cryptlen : 0);
+
+       if (info->aes_mode == AES_MODE_XTS &&
+           req->cryptlen > 2 * CMH_AES_BLOCK_SIZE) {
+               u32 final_len, update_len;
+
+               if (req->cryptlen & (CMH_AES_BLOCK_SIZE - 1))
+                       final_len = CMH_AES_BLOCK_SIZE +
+                                   (req->cryptlen & (CMH_AES_BLOCK_SIZE - 1));
+               else
+                       final_len = 2 * CMH_AES_BLOCK_SIZE;
+
+               update_len = req->cryptlen - final_len;
+
+               vcq_add_aes_update(&cmds[idx++], core_id,
+                                  (u64)rctx->in_dma,
+                                  (u64)rctx->out_dma, update_len);
+               vcq_add_aes_final(&cmds[idx++], core_id,
+                                 (u64)(rctx->in_dma + update_len),
+                                 (u64)(rctx->out_dma + update_len),
+                                 final_len);
+       } else if (info->aes_mode == AES_MODE_CTR) {
+               /*
+                * CTR counter-wrap workaround:
+                * The AES-SCA hardware uses a 64-bit block counter.
+                * If the lower 64 bits of the IV would wrap during
+                * this operation, split into two separate VCQ
+                * transactions -- the completion callback for the
+                * first chunk submits the second.
+                */
+               u64 lower64 = get_unaligned_be64(rctx->iv_buf + 8);
+               u32 nblocks = DIV_ROUND_UP(req->cryptlen,
+                                         CMH_AES_BLOCK_SIZE);
+               u64 bwrap = lower64 ? (~lower64 + 1ULL) : U64_MAX;
+
+               if (nblocks > bwrap) {
+                       u32 chunk1 = (u32)bwrap * CMH_AES_BLOCK_SIZE;
+                       u64 upper64;
+
+                       /* Prepare second IV for chained submission */
+                       rctx->iv2_buf = kmalloc(info->ivsize, gfp);
+                       if (!rctx->iv2_buf) {
+                               ret = -ENOMEM;
+                               goto out_unmap_iv;
+                       }
+                       upper64 = get_unaligned_be64(rctx->iv_buf);
+                       put_unaligned_be64(upper64 + 1, rctx->iv2_buf);
+                       put_unaligned_be64(0, rctx->iv2_buf + 8);
+
+                       rctx->iv2_dma =
+                               cmh_dma_map_single(rctx->iv2_buf,
+                                                  info->ivsize,
+                                                  DMA_TO_DEVICE);
+                       if (cmh_dma_map_error(rctx->iv2_dma)) {
+                               ret = -ENOMEM;
+                               goto out_free_iv2;
+                       }
+
+                       /* Store state for the chained second submission */
+                       rctx->ctr_chunk1_len = chunk1;
+                       rctx->core_id = core_id;
+                       rctx->target_mbx = target_mbx;
+                       rctx->key_ref = key_ref;
+
+                       /* First transaction: only chunk1 */
+                       vcq_add_aes_final(&cmds[idx++], core_id,
+                                         (u64)rctx->in_dma,
+                                         (u64)rctx->out_dma, chunk1);
+               } else {
+                       /* No wrap: single FINAL with all data */
+                       vcq_add_aes_final(&cmds[idx++], core_id,
+                                         (u64)rctx->in_dma,
+                                         (u64)rctx->out_dma,
+                                         req->cryptlen);
+               }
+       } else {
+               vcq_add_aes_final(&cmds[idx++], core_id,
+                                 (u64)rctx->in_dma,
+                                 (u64)rctx->out_dma, req->cryptlen);
+       }
+
+       vcq_add_flush(&cmds[idx++], core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_AES_MAX_PACKED, target_mbx,
+                                           cmh_aes_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto out_cleanup_all;
+
+       return -EINPROGRESS;
+
+out_cleanup_all:
+       if (rctx->iv2_buf) {
+               cmh_dma_unmap_single(rctx->iv2_dma, info->ivsize,
+                                    DMA_TO_DEVICE);
+       }
+out_free_iv2:
+       kfree(rctx->iv2_buf);
+out_unmap_iv:
+       if (info->ivsize > 0)
+               cmh_dma_unmap_single(rctx->iv_dma, info->ivsize,
+                                    DMA_TO_DEVICE);
+out_free_iv:
+       kfree(rctx->iv_buf);
+out_unmap_out:
+       cmh_dma_unmap_single(rctx->out_dma, req->cryptlen, DMA_FROM_DEVICE);
+out_free_out:
+       kfree_sensitive(rctx->out_buf);
+out_unmap_in:
+       cmh_dma_unmap_single(rctx->in_dma, req->cryptlen, DMA_TO_DEVICE);
+out_free_in:
+       kfree_sensitive(rctx->in_buf);
+       return ret;
+}
+
+static int cmh_aes_encrypt(struct skcipher_request *req)
+{
+       return cmh_aes_crypt(req, AES_OP_ENCRYPT);
+}
+
+static int cmh_aes_decrypt(struct skcipher_request *req)
+{
+       return cmh_aes_crypt(req, AES_OP_DECRYPT);
+}
+
+/* Registration */
+
+static struct cmh_aes_alg_drv aes_drv_algs[ARRAY_SIZE(aes_algs)];
+
+/**
+ * cmh_aes_register() - Register AES-CBC/CTR/ECB/XTS skcipher algorithms with the crypto framework
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_aes_register(void)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+               const struct cmh_aes_alg_info *info = &aes_algs[i];
+               struct cmh_aes_alg_drv *drv = &aes_drv_algs[i];
+               struct skcipher_alg *alg = &drv->alg;
+
+               drv->info = info;
+
+               memset(alg, 0, sizeof(*alg));
+
+               alg->setkey      = cmh_aes_setkey;
+               alg->encrypt     = cmh_aes_encrypt;
+               alg->decrypt     = cmh_aes_decrypt;
+               alg->init        = cmh_aes_init_tfm;
+               alg->exit        = cmh_aes_exit_tfm;
+               alg->min_keysize = info->min_keysize;
+               alg->max_keysize = info->max_keysize;
+               alg->ivsize      = info->ivsize;
+
+               strscpy(alg->base.cra_name, info->alg_name,
+                       CRYPTO_MAX_ALG_NAME);
+               strscpy(alg->base.cra_driver_name, info->drv_name,
+                       CRYPTO_MAX_ALG_NAME);
+               alg->base.cra_priority  = 300;
+               alg->base.cra_flags     = CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                         CRYPTO_ALG_ASYNC;
+               alg->base.cra_blocksize = aes_is_stream_mode(info->aes_mode)
+                                         ? 1 : CMH_AES_BLOCK_SIZE;
+               alg->base.cra_ctxsize  = sizeof(struct cmh_aes_tfm_ctx);
+               alg->base.cra_module   = THIS_MODULE;
+
+               ret = crypto_register_skcipher(alg);
+               if (ret) {
+                       dev_err(cmh_dev(), "cmh_aes: failed to register %s (rc=%d)\n",
+                               info->alg_name, ret);
+                       goto err_unregister;
+               }
+
+               dev_dbg(cmh_dev(), "cmh_aes: registered %s\n", info->alg_name);
+       }
+
+       return 0;
+
+err_unregister:
+       while (i--)
+               crypto_unregister_skcipher(&aes_drv_algs[i].alg);
+       return ret;
+}
+
+/**
+ * cmh_aes_unregister() - Unregister AES skcipher algorithms from the crypto framework
+ */
+void cmh_aes_unregister(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+               crypto_unregister_skcipher(&aes_drv_algs[i].alg);
+               dev_dbg(cmh_dev(), "cmh_aes: unregistered %s\n", aes_algs[i].alg_name);
+       }
+}
diff --git a/drivers/crypto/cmh/cmh_aes_aead.c b/drivers/crypto/cmh/cmh_aes_aead.c
new file mode 100644
index 000000000000..0b59c5f7d474
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_aes_aead.c
@@ -0,0 +1,987 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API AES AEAD Driver (GCM/CCM)
+ *
+ * Registers AEAD algorithms with the Linux crypto subsystem:
+ *   gcm(aes), ccm(aes)
+ *
+ * GCM: AES_CMD_INIT(mode=GCM) + [AAD_FINAL] + AES_CMD_FINAL + FLUSH
+ *   - Standard 12-byte IV (nonce), 16-byte tag
+ *   - AES_CMD_INIT carries aadlen/iolen/taglen
+ *   - AES_CMD_FINAL carries tag DMA for encrypt (produce) / decrypt (verify)
+ *
+ * CCM: AES_CMD_CCM_INIT + [AAD_FINAL] + AES_CMD_FINAL + FLUSH
+ *   - Variable nonce (7--13 bytes), variable tag (4--16 bytes)
+ *   - Uses AES_CMD_CCM_INIT (0x0A) with aes_cmd_init struct
+ *   - Nonce passed via IV field, taglen in init
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/cipher.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/utils.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "cmh_aes.h"
+#include "cmh_vcq.h"
+#include "cmh_aes_abi.h"
+#include "cmh_sys_abi.h"
+#include "cmh_sys.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+#include "cmh_key.h"
+
+/*
+ * GCM IV contract:
+ *
+ * The AES core requires exactly 16 bytes loaded into its IV register.
+ * For standard 96-bit nonce GCM, the driver passes:
+ *
+ *   IV[0..11]  = user-supplied 12-byte nonce
+ *   IV[12..15] = 0x00000000
+ *
+ * The hardware internally sets the last 32 bits to the big-endian
+ * counter value 1 (forming J0 = nonce || 0x00000001) before
+ * processing AAD.  The driver must NOT pre-set the counter.
+ *
+ * If the IV format is incorrect, GCM authentication will fail
+ * (encrypt produces wrong ciphertext/tag, decrypt rejects).
+ */
+#define AES_GCM_IV_SIZE                12U     /* GCM nonce size (standard) */
+#define AES_GCM_HW_IV_SIZE     16U     /* HW requires 16-byte IV buffer */
+#define AES_GCM_TAG_SIZE       16U
+
+/* CCM: callers pass a 16-byte IV in RFC 3610 format:
+ * iv[0] = L-1, iv[1..14-iv[0]] = nonce, rest = counter (zeroed).
+ * Nonce length = 14 - iv[0], range 7..13.
+ */
+#define AES_CCM_IV_SIZE        16U
+
+enum cmh_aes_aead_type {
+       CMH_AES_AEAD_GCM,
+       CMH_AES_AEAD_CCM,
+};
+
+struct cmh_aes_aead_info {
+       enum cmh_aes_aead_type type;
+       u32         aes_mode;   /* AES_MODE_GCM or AES_MODE_CCM */
+       u32         ivsize;
+       u32         maxauthsize;
+       const char *alg_name;
+       const char *drv_name;
+};
+
+static const struct cmh_aes_aead_info aes_aead_algs[] = {
+       { CMH_AES_AEAD_GCM, AES_MODE_GCM, AES_GCM_IV_SIZE,
+         AES_GCM_TAG_SIZE, "gcm(aes)", "cri-cmh-gcm-aes" },
+       { CMH_AES_AEAD_CCM, AES_MODE_CCM, AES_CCM_IV_SIZE,
+         AES_GCM_TAG_SIZE, "ccm(aes)", "cri-cmh-ccm-aes" },
+};
+
+struct cmh_aes_aead_tfm_ctx {
+       struct cmh_key_ctx key;
+       u32 authsize;           /* tag length set by setauthsize */
+       struct crypto_cipher *sw_cipher;        /* CCM empty-input fallback */
+       struct crypto_aead *fallback;   /* CCM authsize=10 fallback */
+};
+
+/* Per-request context (lives in aead_request::__ctx) */
+
+/*
+ * Maximum payload commands:
+ *   [SYS_CMD_WRITE] + AES_CMD_INIT + AAD_FINAL + AES_CMD_FINAL + FLUSH = 5
+ */
+#define CMH_AES_AEAD_MAX_PAYLOAD       5
+#define CMH_AES_AEAD_MAX_PACKED                (CMH_AES_AEAD_MAX_PAYLOAD * 2)
+
+struct cmh_aes_aead_reqctx {
+       dma_addr_t in_dma;
+       dma_addr_t out_dma;
+       dma_addr_t iv_dma;
+       dma_addr_t key_dma;
+       dma_addr_t aad_dma;
+       dma_addr_t tag_dma;
+       u8 *in_buf;
+       u8 *out_buf;
+       u8 *iv_buf;
+       u8 *aad_buf;
+       u8 *tag_buf;
+       u32 cryptlen;
+       u32 assoclen;
+       u32 authsize;
+       u32 iv_map_len;
+       u32 keylen;
+       bool encrypting;
+       bool empty_gcm_fallback;
+       struct vcq_cmd packed[CMH_AES_AEAD_MAX_PACKED];
+};
+
+struct cmh_aes_aead_drv {
+       struct aead_alg                  alg;
+       const struct cmh_aes_aead_info  *info;
+};
+
+static const struct cmh_aes_aead_info *
+cmh_aes_aead_get_info(struct crypto_aead *tfm)
+{
+       struct aead_alg *alg = crypto_aead_alg(tfm);
+
+       return container_of(alg, struct cmh_aes_aead_drv, alg)->info;
+}
+
+/* VCQ Builders -- AEAD-specific */
+
+static void vcq_add_aes_aead_init(struct vcq_cmd *slot, u32 core_id, u64 key_ref,
+                                 u64 iv_dma, u32 keylen, u32 ivlen,
+                                 u32 mode, u32 op, u32 aadlen, u32 iolen,
+                                 u32 taglen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, AES_CMD_INIT);
+       slot->hwc.aes.cmd_init.key = key_ref;
+       slot->hwc.aes.cmd_init.iv = iv_dma;
+       slot->hwc.aes.cmd_init.keylen = keylen;
+       slot->hwc.aes.cmd_init.ivlen = ivlen;
+       slot->hwc.aes.cmd_init.mode = mode;
+       slot->hwc.aes.cmd_init.op = op;
+       slot->hwc.aes.cmd_init.aadlen = aadlen;
+       slot->hwc.aes.cmd_init.iolen = iolen;
+       slot->hwc.aes.cmd_init.taglen = taglen;
+}
+
+static void vcq_add_aes_ccm_init(struct vcq_cmd *slot, u32 core_id, u64 key_ref,
+                                u64 nonce_dma, u32 keylen, u32 noncelen,
+                                u32 op, u32 aadlen, u32 iolen, u32 taglen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, AES_CMD_CCM_INIT);
+       slot->hwc.aes.cmd_init.key = key_ref;
+       slot->hwc.aes.cmd_init.iv = nonce_dma;
+       slot->hwc.aes.cmd_init.keylen = keylen;
+       slot->hwc.aes.cmd_init.ivlen = noncelen;
+       slot->hwc.aes.cmd_init.mode = AES_MODE_CCM;
+       slot->hwc.aes.cmd_init.op = op;
+       slot->hwc.aes.cmd_init.aadlen = aadlen;
+       slot->hwc.aes.cmd_init.iolen = iolen;
+       slot->hwc.aes.cmd_init.taglen = taglen;
+}
+
+static void vcq_add_aes_aad_final(struct vcq_cmd *slot, u32 core_id, u64 aad_dma,
+                                 u32 aadlen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, AES_CMD_AAD_FINAL);
+       slot->hwc.aes.cmd_aad_final.data = aad_dma;
+       slot->hwc.aes.cmd_aad_final.datalen = aadlen;
+}
+
+static void vcq_add_aes_aead_final(struct vcq_cmd *slot, u32 core_id, u64 input_dma,
+                                  u64 output_dma, u64 tag_dma,
+                                  u32 iolen, u32 taglen)
+{
+       memset(slot, 0, sizeof(*slot));
+       slot->magic = VCQ_CMD_MAGIC;
+       slot->id = VCQ_CMD_ID(core_id, 0, 1, AES_CMD_FINAL);
+       slot->hwc.aes.cmd_final.input = input_dma;
+       slot->hwc.aes.cmd_final.output = output_dma;
+       slot->hwc.aes.cmd_final.tag = tag_dma;
+       slot->hwc.aes.cmd_final.iolen = iolen;
+       slot->hwc.aes.cmd_final.taglen = taglen;
+}
+
+/* setkey */
+static int cmh_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key,
+                              unsigned int keylen)
+{
+       struct cmh_aes_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       int ret;
+
+       if (keylen != 16 && keylen != 24 && keylen != 32)
+               return -EINVAL;
+
+       /* Keep SW fallback ciphers in sync for CCM edge cases */
+       if (tctx->sw_cipher) {
+               ret = crypto_cipher_setkey(tctx->sw_cipher, key, keylen);
+               if (ret)
+                       return ret;
+       }
+       if (tctx->fallback) {
+               ret = crypto_aead_setkey(tctx->fallback, key, keylen);
+               if (ret)
+                       return ret;
+       }
+
+       ret = cmh_key_setkey_raw(&tctx->key, key, keylen, CORE_ID_AES);
+
+       return ret;
+}
+
+static int cmh_aes_aead_setauthsize(struct crypto_aead *tfm,
+                                   unsigned int authsize)
+{
+       struct cmh_aes_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       const struct cmh_aes_aead_info *info = cmh_aes_aead_get_info(tfm);
+       int ret;
+
+       if (info->type == CMH_AES_AEAD_GCM) {
+               /* GCM: accept 4, 8, 12, 13, 14, 15, 16 per NIST SP 800-38D */
+               if (authsize < 4 || authsize > 16 ||
+                   (authsize > 4 && authsize < 8) ||
+                   (authsize > 8 && authsize < 12))
+                       return -EINVAL;
+       } else {
+               /* CCM: accept all RFC 3610 values {4,6,8,10,12,14,16} */
+               if (authsize < 4 || authsize > 16 || (authsize & 1))
+                       return -EINVAL;
+               /* Forward to SW fallback for authsize=10 (HW unsupported) */
+               if (tctx->fallback) {
+                       ret = crypto_aead_setauthsize(tctx->fallback,
+                                                     authsize);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       tctx->authsize = authsize;
+       return 0;
+}
+
+static int cmh_aes_aead_init_tfm(struct crypto_aead *tfm)
+{
+       struct cmh_aes_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       const struct cmh_aes_aead_info *info = cmh_aes_aead_get_info(tfm);
+
+       memset(tctx, 0, sizeof(*tctx));
+       tctx->authsize = info->maxauthsize;
+
+       if (info->type == CMH_AES_AEAD_CCM) {
+               struct crypto_aead *fb;
+               struct crypto_cipher *ci;
+
+               ci = crypto_alloc_cipher("aes", 0, 0);
+               if (IS_ERR(ci))
+                       return PTR_ERR(ci);
+               tctx->sw_cipher = ci;
+
+               fb = crypto_alloc_aead("ccm(aes)", 0,
+                                      CRYPTO_ALG_NEED_FALLBACK);
+               if (IS_ERR(fb)) {
+                       crypto_free_cipher(ci);
+                       tctx->sw_cipher = NULL;
+                       return PTR_ERR(fb);
+               }
+               tctx->fallback = fb;
+
+               /*
+                * Subreq lives at (rctx + 1).  Alignment is guaranteed
+                * by the crypto framework's __ctx ALIGN mechanism.
+                */
+               crypto_aead_set_reqsize(tfm,
+                                       sizeof(struct cmh_aes_aead_reqctx) +
+                                       sizeof(struct aead_request) +
+                                       crypto_aead_reqsize(fb));
+       } else {
+               crypto_aead_set_reqsize(tfm,
+                                       sizeof(struct cmh_aes_aead_reqctx));
+       }
+
+       return 0;
+}
+
+static void cmh_aes_aead_exit_tfm(struct crypto_aead *tfm)
+{
+       struct cmh_aes_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+
+       if (tctx->fallback)
+               crypto_free_aead(tctx->fallback);
+       if (tctx->sw_cipher)
+               crypto_free_cipher(tctx->sw_cipher);
+       cmh_key_destroy(&tctx->key);
+}
+
+/* DMA unmap helper */
+static void cmh_aes_aead_unmap_dma(struct cmh_aes_aead_reqctx *rctx)
+{
+       u32 tag_map_len;
+
+       cmh_dma_unmap_single(rctx->iv_dma, rctx->iv_map_len, DMA_TO_DEVICE);
+       /*
+        * The empty-GCM fallback maps a full AES block (16 bytes) for the
+        * ECB output regardless of authsize, so unmap with the mapped size.
+        */
+       tag_map_len = rctx->empty_gcm_fallback ?
+                     AES_GCM_HW_IV_SIZE : rctx->authsize;
+       cmh_dma_unmap_single(rctx->tag_dma, tag_map_len,
+                            (rctx->encrypting || rctx->empty_gcm_fallback) ?
+                             DMA_FROM_DEVICE : DMA_TO_DEVICE);
+       if (rctx->cryptlen > 0) {
+               cmh_dma_unmap_single(rctx->out_dma, rctx->cryptlen,
+                                    DMA_FROM_DEVICE);
+               cmh_dma_unmap_single(rctx->in_dma, rctx->cryptlen,
+                                    DMA_TO_DEVICE);
+       }
+       if (rctx->assoclen > 0)
+               cmh_dma_unmap_single(rctx->aad_dma, rctx->assoclen,
+                                    DMA_TO_DEVICE);
+}
+
+static void cmh_aes_aead_free_bufs(struct cmh_aes_aead_reqctx *rctx)
+{
+       kfree(rctx->iv_buf);
+       rctx->iv_buf = NULL;
+       kfree(rctx->tag_buf);
+       rctx->tag_buf = NULL;
+       kfree_sensitive(rctx->out_buf);
+       rctx->out_buf = NULL;
+       kfree_sensitive(rctx->in_buf);
+       rctx->in_buf = NULL;
+       kfree(rctx->aad_buf);
+       rctx->aad_buf = NULL;
+}
+
+static void cmh_aes_aead_complete(void *data, int error)
+{
+       struct aead_request *req = data;
+       struct cmh_aes_aead_reqctx *rctx = aead_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       cmh_aes_aead_unmap_dma(rctx);
+
+       /*
+        * Map HW error on decrypt to -EBADMSG.  The eSW AES core uses a
+        * single error code (-EIO) for both authentication failures and
+        * other core errors (e.g. DMA timeout), so we cannot distinguish
+        * them from the MBX_STATUS alone.  In practice the only error
+        * during a well-formed AEAD decrypt is auth-tag mismatch; a DMA
+        * timeout would indicate a fatal HW problem where -EBADMSG vs
+        * -EIO is moot.  The kernel crypto API requires -EBADMSG for
+        * AEAD authentication failures.
+        */
+       if (error == -EIO && !rctx->encrypting)
+               error = -EBADMSG;
+
+       if (!error) {
+               /* GCM empty-input decrypt: compare computed tag with expected */
+               if (rctx->empty_gcm_fallback && !rctx->encrypting) {
+                       if (crypto_memneq(rctx->tag_buf, rctx->in_buf,
+                                         rctx->authsize))
+                               error = -EBADMSG;
+               }
+               if (!error && rctx->cryptlen > 0)
+                       scatterwalk_map_and_copy(rctx->out_buf, req->dst,
+                                                req->assoclen,
+                                               rctx->cryptlen, 1);
+               if (!error && rctx->encrypting)
+                       scatterwalk_map_and_copy(rctx->tag_buf, req->dst,
+                                                req->assoclen +
+                                               rctx->cryptlen,
+                                               rctx->authsize, 1);
+       }
+
+       cmh_aes_aead_free_bufs(rctx);
+       cmh_complete(&req->base, error);
+}
+
+/*
+ * GCM empty-input fallback.
+ *
+ * When both AAD and plaintext are empty, GCM reduces to:
+ *   tag = E(K, J0) where J0 = nonce || 0x00000001
+ *
+ * The eSW GCM engine rejects this degenerate case, so we compute it
+ * via a single ECB block encryption of J0.
+ *
+ * VCQ: [SYS_CMD_WRITE] + AES_CMD_INIT(ECB) + AES_CMD_FINAL + FLUSH
+ */
+static int cmh_aes_gcm_empty(struct aead_request *req, u32 aes_op)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct cmh_aes_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       struct cmh_aes_aead_reqctx *rctx = aead_request_ctx(req);
+       struct vcq_cmd cmds[CMH_AES_AEAD_MAX_PAYLOAD];
+       u64 key_ref;
+       u32 keylen, authsize;
+       struct core_dispatch d;
+       s32 target_mbx;
+       u32 core_id;
+       u32 idx;
+       int ret;
+       gfp_t gfp;
+
+       authsize = tctx->authsize;
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       memset(rctx, 0, sizeof(*rctx));
+       rctx->cryptlen = 0;
+       rctx->assoclen = 0;
+       rctx->authsize = authsize;
+       rctx->encrypting = (aes_op == AES_OP_ENCRYPT);
+       rctx->empty_gcm_fallback = true;
+
+       /* Build J0 = nonce || 0x00000001 in iv_buf */
+       rctx->iv_buf = kzalloc(AES_GCM_HW_IV_SIZE, gfp);
+       if (!rctx->iv_buf)
+               return -ENOMEM;
+       memcpy(rctx->iv_buf, req->iv, AES_GCM_IV_SIZE);
+       rctx->iv_buf[15] = 0x01; /* big-endian counter = 1 */
+       rctx->iv_map_len = AES_GCM_HW_IV_SIZE;
+
+       rctx->iv_dma = cmh_dma_map_single(rctx->iv_buf, AES_GCM_HW_IV_SIZE,
+                                         DMA_TO_DEVICE);
+       if (cmh_dma_map_error(rctx->iv_dma)) {
+               ret = -ENOMEM;
+               goto out_free_iv;
+       }
+
+       /* Tag buffer -- receives E(K, J0) output */
+       rctx->tag_buf = kzalloc(AES_GCM_HW_IV_SIZE, gfp);
+       if (!rctx->tag_buf) {
+               ret = -ENOMEM;
+               goto out_unmap_iv;
+       }
+       rctx->tag_dma = cmh_dma_map_single(rctx->tag_buf, AES_GCM_HW_IV_SIZE,
+                                          DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(rctx->tag_dma)) {
+               ret = -ENOMEM;
+               goto out_free_tag;
+       }
+
+       /* For decrypt: read expected tag from request for later comparison */
+       if (!rctx->encrypting) {
+               rctx->in_buf = kmalloc(authsize, gfp);
+               if (!rctx->in_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_tag;
+               }
+               scatterwalk_map_and_copy(rctx->in_buf, req->src, 0,
+                                        authsize, 0);
+       }
+
+       /* Resolve key */
+       idx = 0;
+       rctx->key_dma = tctx->key.raw.dma;
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP,
+                         (u64)rctx->key_dma, SYS_REF_NONE,
+                         tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       keylen = tctx->key.raw.len;
+       d = cmh_core_select_instance(CMH_CORE_AES);
+       target_mbx = d.mbx_idx;
+       core_id = d.core_id;
+
+       /* ECB INIT: single block encryption of J0 */
+       vcq_add_aes_aead_init(&cmds[idx++], core_id, key_ref,
+                             0, keylen, 0, AES_MODE_ECB, AES_OP_ENCRYPT,
+                             0, AES_GCM_HW_IV_SIZE, 0);
+
+       /* FINAL: J0 in, E(K,J0) out */
+       vcq_add_aes_aead_final(&cmds[idx++], core_id,
+                              (u64)rctx->iv_dma, (u64)rctx->tag_dma,
+                              0, AES_GCM_HW_IV_SIZE, 0);
+
+       vcq_add_flush(&cmds[idx++], core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_AES_AEAD_MAX_PACKED,
+                                           target_mbx,
+                                           cmh_aes_aead_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto out_free_in;
+
+       return -EINPROGRESS;
+
+out_free_in:
+       kfree_sensitive(rctx->in_buf);
+out_unmap_tag:
+       cmh_dma_unmap_single(rctx->tag_dma, AES_GCM_HW_IV_SIZE,
+                            DMA_FROM_DEVICE);
+out_free_tag:
+       kfree(rctx->tag_buf);
+out_unmap_iv:
+       cmh_dma_unmap_single(rctx->iv_dma, AES_GCM_HW_IV_SIZE, DMA_TO_DEVICE);
+out_free_iv:
+       kfree(rctx->iv_buf);
+       return ret;
+}
+
+/*
+ * CCM empty-input fallback.
+ *
+ * When both AAD and plaintext are empty, CCM reduces to:
+ *   T  = E(K, B0)    -- CBC-MAC of the single formatting block
+ *   S0 = E(K, A0)    -- CTR block zero
+ *   tag = (T XOR S0)[0..authsize-1]
+ *
+ * The eSW rejects this degenerate case, so the driver computes it
+ * synchronously via two crypto_cipher single-block encryptions.
+ */
+static int cmh_aes_ccm_empty(struct aead_request *req, u32 aes_op)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct cmh_aes_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       u32 authsize = tctx->authsize;
+       u8 b0[CMH_AES_BLOCK_SIZE], a0[CMH_AES_BLOCK_SIZE];
+       u8 t[CMH_AES_BLOCK_SIZE], s0[CMH_AES_BLOCK_SIZE];
+       u8 tag[CMH_AES_BLOCK_SIZE];
+       u8 L;
+       u32 i;
+
+       /* Defense-in-depth: iv[0] = L-1, valid L is 2..8 per RFC 3610 S2.1 */
+       if (WARN_ON_ONCE(req->iv[0] < 1 || req->iv[0] > 7))
+               return -EINVAL;
+
+       L = req->iv[0] + 1;
+
+       if (tctx->key.mode != CMH_KEY_RAW)
+               return -EOPNOTSUPP;
+
+       /* B0: flags || nonce || Q(=0).  Adata=0, t=authsize, q=L. */
+       memset(b0, 0, CMH_AES_BLOCK_SIZE);
+       b0[0] = (u8)(8 * ((authsize - 2) / 2) + (L - 1));
+       memcpy(&b0[1], &req->iv[1], 15 - L);
+
+       /* A0: (L-1) || nonce || counter(=0) */
+       memset(a0, 0, CMH_AES_BLOCK_SIZE);
+       a0[0] = (u8)(L - 1);
+       memcpy(&a0[1], &req->iv[1], 15 - L);
+
+       crypto_cipher_encrypt_one(tctx->sw_cipher, t, b0);
+       crypto_cipher_encrypt_one(tctx->sw_cipher, s0, a0);
+
+       for (i = 0; i < authsize; i++)
+               tag[i] = t[i] ^ s0[i];
+
+       if (aes_op == AES_OP_ENCRYPT) {
+               scatterwalk_map_and_copy(tag, req->dst,
+                                        req->assoclen, authsize, 1);
+       } else {
+               u8 expected[CMH_AES_BLOCK_SIZE];
+
+               scatterwalk_map_and_copy(expected, req->src,
+                                        req->assoclen, authsize, 0);
+               if (crypto_memneq(tag, expected, authsize))
+                       return -EBADMSG;
+       }
+
+       return 0;
+}
+
+/*
+ * CCM authsize=10 fallback.
+ *
+ * The eSW AES CCM core does not support authsize=10 (valid per RFC 3610).
+ * Forward the entire request to the generic CCM implementation.
+ */
+static void cmh_aes_ccm_fb_done(void *data, int err)
+{
+       struct aead_request *req = data;
+
+       cmh_complete(&req->base, err);
+}
+
+static int cmh_aes_ccm_fallback(struct aead_request *req, u32 aes_op)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct cmh_aes_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       struct cmh_aes_aead_reqctx *rctx = aead_request_ctx(req);
+       struct aead_request *subreq = (void *)(rctx + 1);
+
+       aead_request_set_tfm(subreq, tctx->fallback);
+       aead_request_set_callback(subreq, req->base.flags,
+                                 cmh_aes_ccm_fb_done, req);
+       aead_request_set_crypt(subreq, req->src, req->dst,
+                              req->cryptlen, req->iv);
+       aead_request_set_ad(subreq, req->assoclen);
+
+       return (aes_op == AES_OP_ENCRYPT) ?
+               crypto_aead_encrypt(subreq) : crypto_aead_decrypt(subreq);
+}
+
+/*
+ * Core AEAD encrypt/decrypt -- async path.
+ *
+ * Encrypt: plaintext -> ciphertext + tag appended
+ * Decrypt: ciphertext + tag -> plaintext (tag verified by eSW)
+ *
+ * VCQ: [SYS_CMD_WRITE] + INIT/CCM_INIT + [AAD_FINAL] + FINAL + FLUSH
+ */
+static int cmh_aes_aead_crypt(struct aead_request *req, u32 aes_op)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct cmh_aes_aead_tfm_ctx *tctx = crypto_aead_ctx(tfm);
+       const struct cmh_aes_aead_info *info = cmh_aes_aead_get_info(tfm);
+       struct cmh_aes_aead_reqctx *rctx = aead_request_ctx(req);
+       struct vcq_cmd cmds[CMH_AES_AEAD_MAX_PAYLOAD];
+       u64 key_ref;
+       u32 keylen, authsize, cryptlen;
+       struct core_dispatch d;
+       s32 target_mbx;
+       u32 core_id;
+       u32 idx;
+       int ret;
+       gfp_t gfp;
+
+       if (tctx->key.mode == CMH_KEY_NONE)
+               return -ENOKEY;
+
+       authsize = tctx->authsize;
+
+       if (aes_op == AES_OP_ENCRYPT) {
+               cryptlen = req->cryptlen;
+       } else {
+               if (req->cryptlen < authsize)
+                       return -EINVAL;
+               cryptlen = req->cryptlen - authsize;
+       }
+
+       /*
+        * Validate CCM IV format early -- the empty-input fallback and
+        * nonce extraction both depend on iv[0] being in range [1,7].
+        */
+       if (info->type == CMH_AES_AEAD_CCM) {
+               if (req->iv[0] < 1 || req->iv[0] > 7)
+                       return -EINVAL;
+       }
+
+       /*
+        * The CMH eSW rejects GCM/CCM when both aadlen and iolen are zero.
+        * For GCM, the tag is simply E(K, J0) -- handle with ECB fallback.
+        * For CCM, compute tag = E(K,B0) XOR E(K,A0) in software.
+        */
+       if (cryptlen == 0 && req->assoclen == 0) {
+               if (info->type == CMH_AES_AEAD_GCM)
+                       return cmh_aes_gcm_empty(req, aes_op);
+               return cmh_aes_ccm_empty(req, aes_op);
+       }
+
+       /*
+        * HW does not support authsize=10 for CCM.  Forward the entire
+        * request to the generic CCM implementation.
+        */
+       if (info->type == CMH_AES_AEAD_CCM && authsize == 10)
+               return cmh_aes_ccm_fallback(req, aes_op);
+
+       /*
+        * HW uses a proprietary LLI scatter-gather format that is
+        * incompatible with struct scatterlist, so the payload is
+        * linearised into contiguous buffers for DMA.  Cap total
+        * size to prevent excessive memory consumption.
+        */
+       if ((u64)cryptlen + req->assoclen > SZ_1M)
+               return -EINVAL;
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       memset(rctx, 0, sizeof(*rctx));
+       rctx->cryptlen = cryptlen;
+       rctx->assoclen = req->assoclen;
+       rctx->authsize = authsize;
+       rctx->encrypting = (aes_op == AES_OP_ENCRYPT);
+
+       /* Linearise AAD */
+       if (req->assoclen > 0) {
+               rctx->aad_buf = kmalloc(req->assoclen, gfp);
+               if (!rctx->aad_buf)
+                       return -ENOMEM;
+               scatterwalk_map_and_copy(rctx->aad_buf, req->src,
+                                        0, req->assoclen, 0);
+               rctx->aad_dma = cmh_dma_map_single(rctx->aad_buf,
+                                                  req->assoclen,
+                                                   DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->aad_dma)) {
+                       ret = -ENOMEM;
+                       goto out_free_aad;
+               }
+       }
+
+       /* Linearise input */
+       if (cryptlen > 0) {
+               rctx->in_buf = kmalloc(cryptlen, gfp);
+               if (!rctx->in_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_aad;
+               }
+               scatterwalk_map_and_copy(rctx->in_buf, req->src,
+                                        req->assoclen, cryptlen, 0);
+               rctx->in_dma = cmh_dma_map_single(rctx->in_buf, cryptlen,
+                                                 DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->in_dma)) {
+                       ret = -ENOMEM;
+                       goto out_free_in;
+               }
+       }
+
+       /* Allocate output buffer */
+       if (cryptlen > 0) {
+               rctx->out_buf = kmalloc(cryptlen, gfp);
+               if (!rctx->out_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_in;
+               }
+               rctx->out_dma = cmh_dma_map_single(rctx->out_buf, cryptlen,
+                                                  DMA_FROM_DEVICE);
+               if (cmh_dma_map_error(rctx->out_dma)) {
+                       ret = -ENOMEM;
+                       goto out_free_out;
+               }
+       }
+
+       /* Tag buffer */
+       rctx->tag_buf = kmalloc(authsize, gfp);
+       if (!rctx->tag_buf) {
+               ret = -ENOMEM;
+               goto out_unmap_out;
+       }
+
+       if (!rctx->encrypting) {
+               scatterwalk_map_and_copy(rctx->tag_buf, req->src,
+                                        req->assoclen + cryptlen,
+                                       authsize, 0);
+       } else {
+               memset(rctx->tag_buf, 0, authsize);
+       }
+
+       rctx->tag_dma = cmh_dma_map_single(rctx->tag_buf, authsize,
+                                          rctx->encrypting ?
+                                           DMA_FROM_DEVICE : DMA_TO_DEVICE);
+       if (cmh_dma_map_error(rctx->tag_dma)) {
+               ret = -ENOMEM;
+               goto out_free_tag;
+       }
+
+       /* Map IV/nonce */
+       if (info->type == CMH_AES_AEAD_GCM) {
+               rctx->iv_buf = kzalloc(AES_GCM_HW_IV_SIZE, gfp);
+               if (!rctx->iv_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_tag;
+               }
+               memcpy(rctx->iv_buf, req->iv, AES_GCM_IV_SIZE);
+               rctx->iv_map_len = AES_GCM_HW_IV_SIZE;
+               rctx->iv_dma = cmh_dma_map_single(rctx->iv_buf,
+                                                 rctx->iv_map_len,
+                                                  DMA_TO_DEVICE);
+       } else {
+               u32 noncelen;
+
+               if (req->iv[0] < 1 || req->iv[0] > 7) {
+                       ret = -EINVAL;
+                       goto out_unmap_tag;
+               }
+               noncelen = 14 - req->iv[0];
+
+               rctx->iv_buf = kmemdup(req->iv + 1, noncelen, gfp);
+               if (!rctx->iv_buf) {
+                       ret = -ENOMEM;
+                       goto out_unmap_tag;
+               }
+               rctx->iv_map_len = noncelen;
+               rctx->iv_dma = cmh_dma_map_single(rctx->iv_buf,
+                                                 rctx->iv_map_len,
+                                                  DMA_TO_DEVICE);
+       }
+       if (cmh_dma_map_error(rctx->iv_dma)) {
+               ret = -ENOMEM;
+               goto out_free_iv;
+       }
+
+       /* Resolve key reference */
+       idx = 0;
+
+       rctx->key_dma = tctx->key.raw.dma;
+       rctx->keylen = tctx->key.raw.len;
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP,
+                         (u64)rctx->key_dma, SYS_REF_NONE,
+                         tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       keylen = tctx->key.raw.len;
+       d = cmh_core_select_instance(CMH_CORE_AES);
+       target_mbx = d.mbx_idx;
+       core_id = d.core_id;
+
+       /* Build INIT command */
+       if (info->type == CMH_AES_AEAD_CCM) {
+               vcq_add_aes_ccm_init(&cmds[idx++], core_id, key_ref,
+                                    (u64)rctx->iv_dma, keylen,
+                                    rctx->iv_map_len, aes_op,
+                                    req->assoclen, cryptlen, authsize);
+       } else {
+               vcq_add_aes_aead_init(&cmds[idx++], core_id, key_ref,
+                                     (u64)rctx->iv_dma, keylen,
+                                     AES_GCM_HW_IV_SIZE, info->aes_mode,
+                                     aes_op, req->assoclen, cryptlen,
+                                     authsize);
+       }
+
+       if (req->assoclen > 0)
+               vcq_add_aes_aad_final(&cmds[idx++], core_id,
+                                     (u64)rctx->aad_dma, req->assoclen);
+
+       vcq_add_aes_aead_final(&cmds[idx++], core_id,
+                              cryptlen > 0 ? (u64)rctx->in_dma : 0,
+                              cryptlen > 0 ? (u64)rctx->out_dma : 0,
+                              (u64)rctx->tag_dma, cryptlen, authsize);
+
+       vcq_add_flush(&cmds[idx++], core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_AES_AEAD_MAX_PACKED,
+                                           target_mbx,
+                                           cmh_aes_aead_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto out_cleanup_all;
+
+       return -EINPROGRESS;
+
+out_cleanup_all:
+       cmh_dma_unmap_single(rctx->iv_dma, rctx->iv_map_len, DMA_TO_DEVICE);
+out_free_iv:
+       kfree(rctx->iv_buf);
+out_unmap_tag:
+       cmh_dma_unmap_single(rctx->tag_dma, authsize,
+                            rctx->encrypting ? DMA_FROM_DEVICE :
+                                              DMA_TO_DEVICE);
+out_free_tag:
+       kfree(rctx->tag_buf);
+out_unmap_out:
+       if (cryptlen > 0)
+               cmh_dma_unmap_single(rctx->out_dma, cryptlen, DMA_FROM_DEVICE);
+out_free_out:
+       kfree_sensitive(rctx->out_buf);
+out_unmap_in:
+       if (cryptlen > 0)
+               cmh_dma_unmap_single(rctx->in_dma, cryptlen, DMA_TO_DEVICE);
+out_free_in:
+       kfree_sensitive(rctx->in_buf);
+out_unmap_aad:
+       if (req->assoclen > 0)
+               cmh_dma_unmap_single(rctx->aad_dma, req->assoclen,
+                                    DMA_TO_DEVICE);
+out_free_aad:
+       kfree(rctx->aad_buf);
+       return ret;
+}
+
+static int cmh_aes_aead_encrypt(struct aead_request *req)
+{
+       return cmh_aes_aead_crypt(req, AES_OP_ENCRYPT);
+}
+
+static int cmh_aes_aead_decrypt(struct aead_request *req)
+{
+       return cmh_aes_aead_crypt(req, AES_OP_DECRYPT);
+}
+
+/* Registration */
+
+static struct cmh_aes_aead_drv aes_aead_drv_algs[ARRAY_SIZE(aes_aead_algs)];
+
+/**
+ * cmh_aes_aead_register() - Register AES-GCM/CCM AEAD algorithms with the crypto framework
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_aes_aead_register(void)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(aes_aead_algs); i++) {
+               const struct cmh_aes_aead_info *info = &aes_aead_algs[i];
+               struct cmh_aes_aead_drv *drv = &aes_aead_drv_algs[i];
+               struct aead_alg *alg = &drv->alg;
+
+               drv->info = info;
+
+               memset(alg, 0, sizeof(*alg));
+
+               alg->setkey      = cmh_aes_aead_setkey;
+               alg->setauthsize = cmh_aes_aead_setauthsize;
+               alg->encrypt     = cmh_aes_aead_encrypt;
+               alg->decrypt     = cmh_aes_aead_decrypt;
+               alg->init        = cmh_aes_aead_init_tfm;
+               alg->exit        = cmh_aes_aead_exit_tfm;
+               alg->ivsize      = info->ivsize;
+               alg->maxauthsize = info->maxauthsize;
+
+               strscpy(alg->base.cra_name, info->alg_name,
+                       CRYPTO_MAX_ALG_NAME);
+               strscpy(alg->base.cra_driver_name, info->drv_name,
+                       CRYPTO_MAX_ALG_NAME);
+               alg->base.cra_priority  = 300;
+               alg->base.cra_flags     = CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                         CRYPTO_ALG_ASYNC;
+               if (info->type == CMH_AES_AEAD_CCM) {
+                       alg->base.cra_flags |= CRYPTO_ALG_NEED_FALLBACK;
+                       /*
+                        * Bump priority above 300 so we beat the generic
+                        * ccm_base template instance.  That template inherits
+                        * priority (ctr + cbcmac) / 2 = 300 when both
+                        * constituents are at 300, and list ordering would
+                        * otherwise let it shadow our driver.
+                        */
+                       alg->base.cra_priority = 301;
+               }
+               alg->base.cra_blocksize = 1;
+               alg->base.cra_ctxsize  = sizeof(struct cmh_aes_aead_tfm_ctx);
+               alg->base.cra_module   = THIS_MODULE;
+
+               ret = crypto_register_aead(alg);
+               if (ret) {
+                       dev_err(cmh_dev(), "cmh_aes_aead: failed to register %s (rc=%d)\n",
+                               info->alg_name, ret);
+                       goto err_unregister;
+               }
+
+               dev_dbg(cmh_dev(), "cmh_aes_aead: registered %s\n", info->alg_name);
+       }
+
+       return 0;
+
+err_unregister:
+       while (i--)
+               crypto_unregister_aead(&aes_aead_drv_algs[i].alg);
+       return ret;
+}
+
+/**
+ * cmh_aes_aead_unregister() - Unregister AES AEAD algorithms from the crypto framework
+ */
+void cmh_aes_aead_unregister(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(aes_aead_algs); i++) {
+               crypto_unregister_aead(&aes_aead_drv_algs[i].alg);
+               dev_dbg(cmh_dev(), "cmh_aes_aead: unregistered %s\n",
+                       aes_aead_algs[i].alg_name);
+       }
+}
diff --git a/drivers/crypto/cmh/cmh_aes_cmac.c b/drivers/crypto/cmh/cmh_aes_cmac.c
new file mode 100644
index 000000000000..a711c575398d
--- /dev/null
+++ b/drivers/crypto/cmh/cmh_aes_cmac.c
@@ -0,0 +1,537 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- Kernel Crypto API AES-CMAC (ahash) Driver
+ *
+ * Registers cmac(aes) as an ahash algorithm.
+ *
+ * CMAC produces a 16-byte tag (MAC) from a key and message.
+ * VCQ sequence: [SYS_CMD_WRITE] + AES_CMD_INIT(CMAC) +
+ *               AES_CMD_AAD_FINAL_AUTH + FLUSH
+ *
+ * The ahash interface accumulates data in a kernel buffer via .update(),
+ * then .final() builds and submits the VCQ asynchronously.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "cmh_aes.h"
+#include "cmh_vcq.h"
+#include "cmh_aes_abi.h"
+#include "cmh_sys_abi.h"
+#include "cmh_sys.h"
+#include "cmh_txn.h"
+#include "cmh_dma.h"
+#include "cmh_key.h"
+
+#define AES_CMAC_DIGEST_SIZE   16U
+#define AES_CMAC_BLOCK_SIZE    16U
+
+/*
+ * Maximum accumulated data for CMAC -- driver-imposed, not HW.
+ *
+ * The AES core does not expose external save/restore VCQ commands,
+ * so the driver must accumulate all data in kernel memory via
+ * .update() and submit it atomically in .final().  This cap limits
+ * the per-request kernel allocation.
+ */
+#define AES_CMAC_MAX_DATA      (64 * 1024)
+
+/* Per-transform context */
+struct cmh_aes_cmac_tfm_ctx {
+       struct cmh_key_ctx key;
+       spinlock_t         chunk_lock;  /* protects all_chunks */
+       struct list_head   all_chunks;  /* orphan-safe chunk tracking */
+};
+
+/* One chunk per .update() call -- data is embedded via flexible array */
+struct cmh_aes_cmac_chunk {
+       struct list_head list;
+       struct list_head tfm_node; /* per-tfm orphan tracking */
+       u32 len;
+       u8 data[];
+};
+
+/* Per-request context (lives in ahash_request::__ctx) */
+
+/*
+ * Maximum payload commands:
+ *   [SYS_CMD_WRITE] + AES_CMD_INIT + AES_CMD_AAD_FINAL_AUTH + FLUSH = 4
+ */
+#define CMH_AES_CMAC_MAX_PAYLOAD       4
+#define CMH_AES_CMAC_MAX_PACKED                (CMH_AES_CMAC_MAX_PAYLOAD * 2)
+
+struct cmh_aes_cmac_reqctx {
+       struct list_head chunks;
+       u32  total_len;
+       u8  *buf;       /* linearised in final() for DMA */
+       /* DMA state for async final */
+       dma_addr_t key_dma;
+       dma_addr_t in_dma;
+       dma_addr_t tag_dma;
+       u8 *tag_buf;
+       u32 keylen;
+       struct vcq_cmd packed[CMH_AES_CMAC_MAX_PACKED];
+};
+
+/* Flat state for export/import -- holds accumulated input data only */
+struct cmh_aes_cmac_export_state {
+       u32 total_len;
+       u8  data[];
+};
+
+/*
+ * Flat state buffer for export/import.  The CMH AES core does not
+ * support save/restore of intermediate CMAC state, so this driver
+ * accumulates input in SW and serialises the buffer on export.
+ *
+ * PAGE_SIZE (4096) caps the exportable accumulated-data window.
+ * Full-range export is not feasible because the crypto subsystem
+ * pre-allocates statesize bytes per request.  Export returns -EINVAL
+ * if the caller has accumulated more than CMH_AES_CMAC_EXPORT_MAX.
+ */
+#define CMH_AES_CMAC_STATE_SIZE 4096
+#define CMH_AES_CMAC_EXPORT_MAX \
+       (CMH_AES_CMAC_STATE_SIZE - sizeof(struct cmh_aes_cmac_export_state))
+
+/*
+ * Export/import: not supported.
+ *
+ * The AES core lacks external save/restore VCQ commands, so there is
+ * no way to checkpoint intermediate CMAC state to host memory.
+ * Pending eSW ABI extension to add save/restore for the AES core.
+ */
+
+static int cmh_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+                              unsigned int keylen)
+{
+       struct cmh_aes_cmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+
+       if (keylen != 16 && keylen != 24 && keylen != 32)
+               return -EINVAL;
+
+       return cmh_key_setkey_raw(&tctx->key, key, keylen, CORE_ID_AES);
+}
+
+static void cmh_aes_cmac_free_chunks(struct cmh_aes_cmac_reqctx *rctx,
+                                    struct cmh_aes_cmac_tfm_ctx *tctx)
+{
+       struct cmh_aes_cmac_chunk *c, *tmp;
+
+       spin_lock_bh(&tctx->chunk_lock);
+       list_for_each_entry_safe(c, tmp, &rctx->chunks, list) {
+               list_del(&c->list);
+               list_del(&c->tfm_node);
+               kfree_sensitive(c);
+       }
+       spin_unlock_bh(&tctx->chunk_lock);
+       rctx->total_len = 0;
+}
+
+static int cmh_aes_cmac_init(struct ahash_request *req)
+{
+       struct cmh_aes_cmac_reqctx *rctx = ahash_request_ctx(req);
+
+       memset(rctx, 0, sizeof(*rctx));
+       INIT_LIST_HEAD(&rctx->chunks);
+       return 0;
+}
+
+static int cmh_aes_cmac_update(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_aes_cmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_aes_cmac_reqctx *rctx = ahash_request_ctx(req);
+       struct cmh_aes_cmac_chunk *chunk;
+       gfp_t gfp;
+       int ret;
+
+       if (!req->nbytes)
+               return 0;
+
+       if (req->nbytes > AES_CMAC_MAX_DATA - rctx->total_len) {
+               ret = -EINVAL;
+               goto err_free_chunks;
+       }
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       chunk = kmalloc(sizeof(*chunk) + req->nbytes, gfp);
+       if (!chunk) {
+               ret = -ENOMEM;
+               goto err_free_chunks;
+       }
+
+       chunk->len = req->nbytes;
+       if (req->base.flags & CRYPTO_AHASH_REQ_VIRT)
+               memcpy(chunk->data, req->svirt, req->nbytes);
+       else
+               scatterwalk_map_and_copy(chunk->data, req->src,
+                                        0, req->nbytes, 0);
+
+       list_add_tail(&chunk->list, &rctx->chunks);
+       spin_lock_bh(&tctx->chunk_lock);
+       list_add_tail(&chunk->tfm_node, &tctx->all_chunks);
+       spin_unlock_bh(&tctx->chunk_lock);
+       rctx->total_len += req->nbytes;
+       return 0;
+
+err_free_chunks:
+       /*
+        * Terminal error -- free all previously accumulated chunks.
+        * callers may not call .final() on error, so they would leak.
+        */
+       cmh_aes_cmac_free_chunks(rctx, tctx);
+       return ret;
+}
+
+static void cmh_aes_cmac_complete(void *data, int error)
+{
+       struct ahash_request *req = data;
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_aes_cmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_aes_cmac_reqctx *rctx = ahash_request_ctx(req);
+
+       if (error == -EINPROGRESS) {
+               cmh_complete(&req->base, error);
+               return;
+       }
+
+       /* Unmap DMA */
+       if (rctx->total_len > 0)
+               cmh_dma_unmap_single(rctx->in_dma, rctx->total_len,
+                                    DMA_TO_DEVICE);
+       cmh_dma_unmap_single(rctx->tag_dma, AES_CMAC_DIGEST_SIZE,
+                            DMA_FROM_DEVICE);
+
+       if (!error)
+               memcpy(req->result, rctx->tag_buf, AES_CMAC_DIGEST_SIZE);
+
+       kfree(rctx->tag_buf);
+       rctx->tag_buf = NULL;
+       kfree_sensitive(rctx->buf);
+       rctx->buf = NULL;
+       cmh_aes_cmac_free_chunks(rctx, tctx);
+       cmh_complete(&req->base, error);
+}
+
+static int cmh_aes_cmac_final(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_aes_cmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_aes_cmac_reqctx *rctx = ahash_request_ctx(req);
+       struct vcq_cmd cmds[CMH_AES_CMAC_MAX_PAYLOAD];
+       u64 key_ref;
+       u32 keylen;
+       struct core_dispatch d;
+       s32 target_mbx;
+       u32 core_id;
+       u32 idx;
+       int ret;
+       gfp_t gfp;
+
+       if (tctx->key.mode == CMH_KEY_NONE) {
+               ret = -ENOKEY;
+               goto out_free_buf;
+       }
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+             GFP_KERNEL : GFP_ATOMIC;
+
+       /* Linearise accumulated chunks into a contiguous buffer for DMA */
+       if (rctx->total_len > 0) {
+               struct cmh_aes_cmac_chunk *c;
+               u32 off = 0;
+
+               rctx->buf = kmalloc(rctx->total_len, gfp);
+               if (!rctx->buf) {
+                       ret = -ENOMEM;
+                       goto out_free_chunks;
+               }
+               list_for_each_entry(c, &rctx->chunks, list) {
+                       memcpy(rctx->buf + off, c->data, c->len);
+                       off += c->len;
+               }
+       }
+
+       /* Tag output buffer */
+       rctx->tag_buf = kzalloc(AES_CMAC_DIGEST_SIZE, gfp);
+       if (!rctx->tag_buf) {
+               ret = -ENOMEM;
+               goto out_free_buf;
+       }
+
+       rctx->tag_dma = cmh_dma_map_single(rctx->tag_buf,
+                                          AES_CMAC_DIGEST_SIZE,
+                                           DMA_FROM_DEVICE);
+       if (cmh_dma_map_error(rctx->tag_dma)) {
+               ret = -ENOMEM;
+               goto out_free_tag;
+       }
+
+       /* Map input data (may be zero-length for empty CMAC) */
+       if (rctx->total_len > 0) {
+               rctx->in_dma = cmh_dma_map_single(rctx->buf, rctx->total_len,
+                                                 DMA_TO_DEVICE);
+               if (cmh_dma_map_error(rctx->in_dma)) {
+                       ret = -ENOMEM;
+                       goto out_unmap_tag;
+               }
+       }
+
+       /* Resolve key */
+       idx = 0;
+
+       rctx->key_dma = tctx->key.raw.dma;
+       rctx->keylen = tctx->key.raw.len;
+       vcq_add_sys_write(&cmds[idx++], SYS_REF_TEMP,
+                         (u64)rctx->key_dma, SYS_REF_NONE,
+                         tctx->key.raw.len,
+                         tctx->key.raw.sys_type);
+       key_ref = SYS_REF_TEMP;
+       keylen = tctx->key.raw.len;
+       d = cmh_core_select_instance(CMH_CORE_AES);
+       target_mbx = d.mbx_idx;
+       core_id = d.core_id;
+
+       /*
+        * INIT: mode=CMAC, op=ENCRYPT (CMAC always "encrypts")
+        * CMAC data goes through the AAD path:
+        *   aadlen = total data length, iolen = 0
+        */
+       {
+               struct vcq_cmd *slot = &cmds[idx++];
+
+               memset(slot, 0, sizeof(*slot));
+               slot->magic = VCQ_CMD_MAGIC;
+               slot->id = VCQ_CMD_ID(core_id, 0, 1, AES_CMD_INIT);
+               slot->hwc.aes.cmd_init.key = key_ref;
+               slot->hwc.aes.cmd_init.iv = 0;
+               slot->hwc.aes.cmd_init.keylen = keylen;
+               slot->hwc.aes.cmd_init.ivlen = 0;
+               slot->hwc.aes.cmd_init.mode = AES_MODE_CMAC;
+               slot->hwc.aes.cmd_init.op = AES_OP_ENCRYPT;
+               slot->hwc.aes.cmd_init.aadlen = rctx->total_len;
+               slot->hwc.aes.cmd_init.iolen = 0;
+               slot->hwc.aes.cmd_init.taglen = AES_CMAC_DIGEST_SIZE;
+       }
+
+       /* AAD_FINAL_AUTH: final AAD + tag extraction in one atomic step */
+       {
+               struct vcq_cmd *slot = &cmds[idx++];
+
+               memset(slot, 0, sizeof(*slot));
+               slot->magic = VCQ_CMD_MAGIC;
+               slot->id = VCQ_CMD_ID(core_id, 0, 1, AES_CMD_AAD_FINAL_AUTH);
+               slot->hwc.aes.cmd_aad_final_auth.data =
+                       rctx->total_len > 0 ? (u64)rctx->in_dma : 0;
+               slot->hwc.aes.cmd_aad_final_auth.datalen = rctx->total_len;
+               slot->hwc.aes.cmd_aad_final_auth.tag = (u64)rctx->tag_dma;
+               slot->hwc.aes.cmd_aad_final_auth.taglen = AES_CMAC_DIGEST_SIZE;
+       }
+
+       vcq_add_flush(&cmds[idx++], core_id);
+
+       ret = cmh_vcq_pack_and_submit_async(cmds, idx, rctx->packed,
+                                           CMH_AES_CMAC_MAX_PACKED,
+                                           target_mbx,
+                                           cmh_aes_cmac_complete, req,
+                                           !!(req->base.flags &
+                                              CRYPTO_TFM_REQ_MAY_BACKLOG),
+                                           cmh_tm_async_timeout_jiffies());
+       /* -EBUSY = backlogged; ownership transferred to callback. */
+       if (ret == -EBUSY)
+               return -EBUSY;
+       if (ret)
+               goto out_cleanup_all;
+
+       return -EINPROGRESS;
+
+out_cleanup_all:
+       if (rctx->total_len > 0 && !cmh_dma_map_error(rctx->in_dma))
+               cmh_dma_unmap_single(rctx->in_dma, rctx->total_len,
+                                    DMA_TO_DEVICE);
+out_unmap_tag:
+       cmh_dma_unmap_single(rctx->tag_dma, AES_CMAC_DIGEST_SIZE,
+                            DMA_FROM_DEVICE);
+out_free_tag:
+       kfree(rctx->tag_buf);
+out_free_buf:
+out_free_chunks:
+       cmh_aes_cmac_free_chunks(rctx, tctx);
+       kfree_sensitive(rctx->buf);
+       rctx->buf = NULL;
+       rctx->total_len = 0;
+       return ret;
+}
+
+/*
+ * ahash .export()/.import(): serialize/deserialize the software
+ * accumulation buffer.  No HW state is involved -- the AES core
+ * does not support save/restore, but we only export the input queue.
+ */
+
+static int cmh_aes_cmac_export(struct ahash_request *req, void *out)
+{
+       struct cmh_aes_cmac_reqctx *rctx = ahash_request_ctx(req);
+       struct cmh_aes_cmac_export_state *state = out;
+       struct cmh_aes_cmac_chunk *chunk;
+       u32 offset = 0;
+
+       if (rctx->total_len > CMH_AES_CMAC_EXPORT_MAX)
+               return -ENOSPC;
+
+       state->total_len = rctx->total_len;
+       list_for_each_entry(chunk, &rctx->chunks, list) {
+               memcpy(state->data + offset, chunk->data, chunk->len);
+               offset += chunk->len;
+       }
+       return 0;
+}
+
+static int cmh_aes_cmac_import(struct ahash_request *req, const void *in)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cmh_aes_cmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_aes_cmac_reqctx *rctx = ahash_request_ctx(req);
+       const struct cmh_aes_cmac_export_state *state = in;
+       struct cmh_aes_cmac_chunk *chunk;
+
+       /*
+        * Do NOT call free_chunks() here: the crypto API does not
+        * guarantee the request context is in a valid state before
+        * import(), so the list pointers may be stale or invalid.
+        * Re-initialize from scratch instead.  Any pre-existing chunks
+        * are tracked on tctx->all_chunks and freed in exit_tfm.
+        */
+       memset(rctx, 0, sizeof(*rctx));
+       INIT_LIST_HEAD(&rctx->chunks);
+
+       if (state->total_len > CMH_AES_CMAC_EXPORT_MAX)
+               return -EINVAL;
+
+       if (state->total_len) {
+               chunk = kmalloc(sizeof(*chunk) + state->total_len, GFP_KERNEL);
+               if (!chunk)
+                       return -ENOMEM;
+               chunk->len = state->total_len;
+               memcpy(chunk->data, state->data, state->total_len);
+               list_add_tail(&chunk->list, &rctx->chunks);
+               spin_lock_bh(&tctx->chunk_lock);
+               list_add_tail(&chunk->tfm_node, &tctx->all_chunks);
+               spin_unlock_bh(&tctx->chunk_lock);
+               rctx->total_len = state->total_len;
+       }
+       return 0;
+}
+
+static int cmh_aes_cmac_finup(struct ahash_request *req)
+{
+       int err;
+
+       err = cmh_aes_cmac_update(req);
+       if (err)
+               return err;
+       return cmh_aes_cmac_final(req);
+}
+
+static int cmh_aes_cmac_digest(struct ahash_request *req)
+{
+       int err;
+
+       err = cmh_aes_cmac_init(req);
+       if (err)
+               return err;
+       return cmh_aes_cmac_finup(req);
+}
+
+static int cmh_aes_cmac_init_tfm(struct crypto_ahash *tfm)
+{
+       struct cmh_aes_cmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+
+       memset(tctx, 0, sizeof(*tctx));
+       spin_lock_init(&tctx->chunk_lock);
+       INIT_LIST_HEAD(&tctx->all_chunks);
+       crypto_ahash_set_reqsize(tfm, sizeof(struct cmh_aes_cmac_reqctx));
+       return 0;
+}
+
+static void cmh_aes_cmac_exit_tfm(struct crypto_ahash *tfm)
+{
+       struct cmh_aes_cmac_tfm_ctx *tctx = crypto_ahash_ctx(tfm);
+       struct cmh_aes_cmac_chunk *c, *tmp;
+
+       /* Free any orphaned chunks (e.g. testmgr export/reimport poison) */
+       spin_lock_bh(&tctx->chunk_lock);
+       list_for_each_entry_safe(c, tmp, &tctx->all_chunks, tfm_node) {
+               list_del(&c->tfm_node);
+               kfree_sensitive(c);
+       }
+       spin_unlock_bh(&tctx->chunk_lock);
+
+       cmh_key_destroy(&tctx->key);
+}
+
+static struct ahash_alg cmh_aes_cmac_alg = {
+       .init           = cmh_aes_cmac_init,
+       .update         = cmh_aes_cmac_update,
+       .final          = cmh_aes_cmac_final,
+       .finup          = cmh_aes_cmac_finup,
+       .digest         = cmh_aes_cmac_digest,
+       .export         = cmh_aes_cmac_export,
+       .import         = cmh_aes_cmac_import,
+       .setkey         = cmh_aes_cmac_setkey,
+       .init_tfm       = cmh_aes_cmac_init_tfm,
+       .exit_tfm       = cmh_aes_cmac_exit_tfm,
+       .halg           = {
+               .digestsize     = AES_CMAC_DIGEST_SIZE,
+               .statesize      = CMH_AES_CMAC_STATE_SIZE,
+               .base           = {
+                       .cra_name        = "cmac(aes)",
+                       .cra_driver_name = "cri-cmh-cmac-aes",
+                       .cra_priority    = 300,
+                       .cra_flags       = CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                          CRYPTO_ALG_NO_FALLBACK |
+                                          CRYPTO_ALG_ASYNC |
+                                          CRYPTO_ALG_REQ_VIRT,
+                       .cra_blocksize   = AES_CMAC_BLOCK_SIZE,
+                       .cra_ctxsize     = sizeof(struct cmh_aes_cmac_tfm_ctx),
+                       .cra_module      = THIS_MODULE,
+               },
+       },
+};
+
+/**
+ * cmh_aes_cmac_register() - Register AES-CMAC hash algorithm with the crypto framework
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int cmh_aes_cmac_register(void)
+{
+       int ret;
+
+       ret = crypto_register_ahash(&cmh_aes_cmac_alg);
+       if (ret)
+               dev_err(cmh_dev(), "cmh_aes_cmac: failed to register cmac(aes) (rc=%d)\n",
+                       ret);
+       else
+               dev_dbg(cmh_dev(), "cmh_aes_cmac: registered cmac(aes)\n");
+
+       return ret;
+}
+
+/**
+ * cmh_aes_cmac_unregister() - Unregister AES-CMAC hash algorithm from the crypto framework
+ */
+void cmh_aes_cmac_unregister(void)
+{
+       crypto_unregister_ahash(&cmh_aes_cmac_alg);
+       dev_dbg(cmh_dev(), "cmh_aes_cmac: unregistered cmac(aes)\n");
+}
diff --git a/drivers/crypto/cmh/cmh_main.c b/drivers/crypto/cmh/cmh_main.c
index 56541e0d4219..1edd8d14c666 100644
--- a/drivers/crypto/cmh/cmh_main.c
+++ b/drivers/crypto/cmh/cmh_main.c
@@ -34,6 +34,7 @@
 #include "cmh_cshake.h"
 #include "cmh_kmac.h"
 #include "cmh_sm3.h"
+#include "cmh_aes.h"
 #include "cmh_mgmt.h"
 #include "cmh_registers.h"
 #include "cmh_debugfs.h"
@@ -221,6 +222,21 @@ static int cmh_probe(struct platform_device *pdev)
        if (ret)
                goto err_sm3_register;

+       /* Register AES skcipher algorithms */
+       ret = cmh_aes_register();
+       if (ret)
+               goto err_aes_register;
+
+       /* Register AES AEAD algorithms (GCM, CCM) */
+       ret = cmh_aes_aead_register();
+       if (ret)
+               goto err_aes_aead_register;
+
+       /* Register AES CMAC algorithm */
+       ret = cmh_aes_cmac_register();
+       if (ret)
+               goto err_aes_cmac_register;
+
        /* Register key management device (/dev/cmh_mgmt) */
        ret = cmh_mgmt_register();
        if (ret)
@@ -233,6 +249,12 @@ static int cmh_probe(struct platform_device *pdev)
        return 0;

 err_mgmt_register:
+       cmh_aes_cmac_unregister();
+err_aes_cmac_register:
+       cmh_aes_aead_unregister();
+err_aes_aead_register:
+       cmh_aes_unregister();
+err_aes_register:
        cmh_sm3_unregister();
 err_sm3_register:
        cmh_kmac_unregister();
@@ -269,6 +291,9 @@ static void cmh_remove(struct platform_device *pdev)
        cfg = &dev->config;

        cmh_mgmt_unregister();
+       cmh_aes_cmac_unregister();
+       cmh_aes_aead_unregister();
+       cmh_aes_unregister();
        cmh_sm3_unregister();
        cmh_kmac_unregister();
        cmh_cshake_unregister();
diff --git a/drivers/crypto/cmh/include/cmh_aes.h b/drivers/crypto/cmh/include/cmh_aes.h
new file mode 100644
index 000000000000..591afaa36f85
--- /dev/null
+++ b/drivers/crypto/cmh/include/cmh_aes.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2026 Cryptography Research, Inc. (CRI).
+ * CMH LKM -- AES Crypto API Drivers
+ *
+ * Registers AES algorithms with the Linux crypto subsystem:
+ *   skcipher: ecb/cbc/ctr/cfb/xts(aes)
+ *   aead:     gcm/ccm(aes)
+ *   shash:    cmac(aes)
+ */
+
+#ifndef CMH_AES_H
+#define CMH_AES_H
+
+int  cmh_aes_register(void);
+void cmh_aes_unregister(void);
+
+int  cmh_aes_aead_register(void);
+void cmh_aes_aead_unregister(void);
+
+int  cmh_aes_cmac_register(void);
+void cmh_aes_cmac_unregister(void);
+
+#endif /* CMH_AES_H */
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 01/19] dt-bindings: crypto: add Rambus CryptoManager Hub
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen
In-Reply-To: <20260625173328.1140487-1-skrishnamoorthy@rambus.com>

From: Alex Ousherovitch <aousherovitch@rambus.com>

Add device tree binding schema for the CRI CryptoManager Hub (CMH)
hardware crypto accelerator.  The binding covers the parent SoC-level
node with register region, interrupt, DMA properties, and per-core
child nodes identified by compatible string and unit address.

Register the 'cri' vendor prefix for Cryptography Research, Inc.

Co-developed-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
Signed-off-by: Alex Ousherovitch <aousherovitch@rambus.com>
Reviewed-by: Joel Wittenauer <Joel.Wittenauer@cryptography.com>
Reviewed-by: Thi Nguyen <thin@rambus.com>
---
 .../devicetree/bindings/crypto/cri,cmh.yaml   | 222 ++++++++++++++++++
 .../devicetree/bindings/vendor-prefixes.yaml  |   2 +
 2 files changed, 224 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/crypto/cri,cmh.yaml

diff --git a/Documentation/devicetree/bindings/crypto/cri,cmh.yaml b/Documentation/devicetree/bindings/crypto/cri,cmh.yaml
new file mode 100644
index 000000000000..db41132e0591
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/cri,cmh.yaml
@@ -0,0 +1,222 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/cri,cmh.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: CRI CryptoManager Hub (CMH) Hardware Crypto Accelerator
+
+maintainers:
+  - Alex Ousherovitch <aousherovitch@rambus.com>
+  - Saravanakrishnan Krishnamoorthy <skrishnamoorthy@rambus.com>
+  - Joel Wittenauer <Joel.Wittenauer@cryptography.com>
+
+description: |
+  The CRI CryptoManager Hub (CMH) is a hardware cryptographic accelerator accessed
+  via a mailbox-based VCQ (Virtual Command Queue) interface.  The host
+  writes VCQ command sequences into per-mailbox DMA queue buffers and
+  rings a doorbell; the CMH eSW processes them and signals completion
+  via interrupt.
+
+  Supported algorithm families: SHA-2, SHA-3, SM3, AES, SM4,
+  ChaCha20-Poly1305, RSA, ECDSA, EdDSA, ECDH, SM2, ML-KEM, ML-DSA,
+  SLH-DSA, LMS, XMSS, DRBG.
+
+properties:
+  compatible:
+    const: cri,cmh
+
+  reg:
+    maxItems: 1
+    description:
+      SIC (System Interface Controller) MMIO region.  Mailbox instance
+      registers are at offsets N * 0x1000 within this region.
+
+  interrupts:
+    minItems: 1
+    maxItems: 64
+    description:
+      Per-mailbox completion/error interrupts from the CryptoManager Hub,
+      matching the real CMH ch_sys_interrupt_mbx[N-1:0] topology.
+      Entry i corresponds to MBX instance i.  The driver maps each
+      configured mailbox (cri,mbx-instances) to its DT interrupt
+      index and registers a separate threaded IRQ handler per MBX.
+
+  interrupt-names:
+    minItems: 1
+    maxItems: 64
+    items:
+      pattern: '^mbx[0-9]+$'
+    description:
+      Names for each mailbox interrupt, matching the interrupts array.
+      Format is "mbxN" where N is the mailbox instance index.
+
+  cri,mbx-instances:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 1
+    maxItems: 64
+    description:
+      Array of 0-based mailbox instance indices to configure.
+      Each index N maps to register offset N * 0x1000 within the
+      SIC region.  If absent, defaults to instances 0 and 1.
+
+  cri,mbx-slots-log2:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 1
+    maxItems: 64
+    description:
+      Per-mailbox slot count as log2.  Valid range 1..15.
+      Array length must match cri,mbx-instances.
+      Default is 5 (32 slots).
+
+  cri,mbx-strides-log2:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 1
+    maxItems: 64
+    description:
+      Per-mailbox stride (bytes per slot) as log2.  Valid range 7..10.
+      Array length must match cri,mbx-instances.
+      Default is 7 (128 bytes per slot).
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+patternProperties:
+  "^(hc|aes|sm4|sm3|hcq|qse|pke|drbg|ccp)@[0-9a-f]+$":
+    type: object
+    description:
+      Per-core-type child nodes.  Each child represents one crypto core
+      instance available in the hardware.  The driver enumerates these at
+      probe to discover which algorithm families are present.
+
+    properties:
+      reg:
+        maxItems: 1
+        description:
+          Hardware core ID for this core type (e.g. 0x02 for HC, 0x03 for AES).
+          Must match the CORE_ID_* values defined by the CMH hardware.
+
+      cri,mbx:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description:
+          Pin this core instance to a specific mailbox instance index.
+          Multiple child nodes of the same core type may each specify a
+          different cri,mbx value to spread instances across mailboxes.
+          When absent, the driver auto-assigns a mailbox via round-robin
+          across the instances listed in cri,mbx-instances.
+
+    required:
+      - reg
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - "#address-cells"
+  - "#size-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        crypto@a4800000 {
+            compatible = "cri,cmh";
+            reg = <0x0 0xa4800000 0x0 0x41000>;
+            interrupts = <1 2>;
+            interrupt-names = "mbx0", "mbx1";
+            cri,mbx-instances = <0 1>;
+            cri,mbx-slots-log2 = <5 5>;
+            cri,mbx-strides-log2 = <7 7>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            hc@2 {
+                reg = <0x02>;
+            };
+
+            aes@3 {
+                reg = <0x03>;
+            };
+
+            sm4@4 {
+                reg = <0x04>;
+            };
+
+            sm3@5 {
+                reg = <0x05>;
+            };
+
+            hcq@8 {
+                reg = <0x08>;
+            };
+
+            qse@9 {
+                reg = <0x09>;
+            };
+
+            pke@a {
+                reg = <0x0a>;
+                cri,mbx = <1>;
+            };
+
+            drbg@f {
+                reg = <0x0f>;
+            };
+
+            ccp@18 {
+                reg = <0x18>;
+            };
+        };
+    };
+
+  - |
+    /* Multi-instance: two AES cores on separate MBXes (future eSW support) */
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        crypto@a4800000 {
+            compatible = "cri,cmh";
+            reg = <0x0 0xa4800000 0x0 0x41000>;
+            interrupts = <1 2>;
+            interrupt-names = "mbx0", "mbx1";
+            cri,mbx-instances = <0 1>;
+            cri,mbx-slots-log2 = <5 5>;
+            cri,mbx-strides-log2 = <7 7>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            hc@2 {
+                reg = <0x02>;
+            };
+
+            aes@3 {
+                reg = <0x03>;
+                cri,mbx = <0>;
+            };
+
+            /* Second AES instance at core ID 0x06, pinned to MBX 1 */
+            aes@6 {
+                reg = <0x06>;
+                cri,mbx = <1>;
+            };
+
+            pke@a {
+                reg = <0x0a>;
+                cri,mbx = <1>;
+            };
+
+            drbg@f {
+                reg = <0x0f>;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 28784d66ae7b..3402adba3e49 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -375,6 +375,8 @@ patternProperties:
     description: Crane Connectivity Solutions
   "^creative,.*":
     description: Creative Technology Ltd
+  "^cri,.*":
+    description: Cryptography Research, Inc.
   "^crystalfontz,.*":
     description: Crystalfontz America, Inc.
   "^csky,.*":
--
2.43.7


** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply related

* [PATCH 00/19] crypto: cmh - add CRI CryptoManager Hub driver
From: Saravanakrishnan Krishnamoorthy @ 2026-06-25 17:33 UTC (permalink / raw)
  To: Albert Ou, Alex Ousherovitch, Conor Dooley, David S. Miller,
	Herbert Xu, Jonathan Corbet, Krzysztof Kozlowski, Palmer Dabbelt,
	Paul Walmsley, Rob Herring, Saravanakrishnan Krishnamoorthy,
	Shuah Khan
  Cc: Alexandre Ghiti, devicetree, Joel Wittenauer, linux-api,
	linux-crypto, linux-doc, linux-kernel, linux-kselftest,
	linux-riscv, Shuah Khan, sipsupport, Thi Nguyen

From: Alex Ousherovitch <aousherovitch@rambus.com>

crypto: cmh - add CRI CryptoManager Hub hardware crypto accelerator

This series adds a driver for the CRI CryptoManager Hub (CMH), a
hardware cryptographic accelerator IP from Cryptography Research at
Rambus Inc. (https://www.rambus.com/cryptographyresearch/).
CMH provides a broad set of symmetric, asymmetric, and post-quantum
cryptographic algorithms accelerated in hardware, accessed via a
mailbox-based Virtual Command Queue (VCQ) interface.

The hardware is a platform device matched via device tree
(compatible = "cri,cmh").  It exposes a single MMIO register region
(SIC) with per-mailbox doorbell, status, and command registers.
Each mailbox has DMA-coherent queue memory for VCQ command
submission and completion.

Driver architecture:

  In-kernel users                       /dev/cmh_mgmt (ioctl)
  (dm-crypt, IPsec, kTLS, fscrypt)      (key management)
       |                                        |
       v                                        v
  +----------------------------------------------------+
  |        Kernel Crypto API + hwrng (72 total)        |
  |   ahash | skcipher | aead | akcipher | sig | kpp   |
  +----------------------------------------------------+
       |                                           |
       v                                           v
  +------------------+    +------------------------+
  | Transaction Mgr  |--->| Key / Mgmt subsystem   |
  | (kthread, CMQ)   |    | (datastore, ioctl ops) |
  +------------------+    +------------------------+
       |
       v
  +------------------+     +-------------------+
  | MQI (VCQ pack,   |---->| Response Handler  |
  |  DMA map, submit)|     | (threaded IRQ,    |
  +------------------+     |  watchdog, unmap) |
       |                   +-------------------+
       v                          ^
  +-----------+              +-----------+
  | Hardware  |--- IRQ ----->| Hardware  |
  | (mailbox) |              | (mailbox) |
  +-----------+              +-----------+

The transaction manager runs as a dedicated kthread that pulls
requests from a central command queue, packs VCQ entries, maps DMA
buffers, and submits to the least-loaded mailbox.  Completion is
handled by per-mailbox threaded IRQs.  The driver returns
-EINPROGRESS for async crypto requests and supports the
CRYPTO_TFM_REQ_MAY_BACKLOG flag for queue-full backpressure.

Registered algorithms (72 total):

  Type       Count  Algorithms
  ---------  -----  --------------------------------------------------
  ahash         15  SHA-{224,256,384,512}, SHA3-{224,256,384,512},
                     SHAKE-{128,256}, cSHAKE-{128,256},
                     KMAC-{128,256}, SM3
  ahash(HMAC)    8  HMAC-SHA-{224,256,384,512},
                     HMAC-SHA3-{224,256,384,512}
  ahash(MAC)     4  CMAC(AES), CMAC(SM4), XCBC(SM4), Poly1305
  skcipher      11  AES-{ECB,CBC,CTR,CFB,XTS},
                     SM4-{ECB,CBC,CTR,CFB,XTS}, ChaCha20
  aead           6  AES-{GCM,CCM}, SM4-{GCM,CCM},
                     rfc7539(chacha20,poly1305),
                     rfc7539esp(chacha20,poly1305)
  akcipher       1  RSA (2048--4096 bit; 512/1024 legacy/test)
  sig           23  ECDSA P-{256,384,521}, SM2 (verify-only),
                     ML-DSA-{44,65,87},
                     SLH-DSA (12 parameter sets),
                     LMS, LMS-HSS, XMSS, XMSS-MT
  kpp            3  ECDH P-{256,384}, X25519
  hwrng          1  DRBG-backed /dev/hwrng

Ioctl-only algorithms (not registered with the crypto API at all):
  - EdDSA (Ed25519, Ed448): sign and verify
  - ML-KEM (ML-KEM-512/768/1024): no standard kernel KEM API exists

The driver also exposes /dev/cmh_mgmt, a misc device providing 44
ioctl commands.  Relative to the in-kernel crypto API these fall into
two groups; the distinction matters because some commands name the
same primitives the driver also registers, and that overlap is
deliberate and bounded:

(1) Operations with no crypto API representation - the large
    majority.  The crypto API has no transform type or verb for
    these, so a character device is the only available UAPI:
      - hardware key lifecycle: create, import, export, derive,
        destroy, enumerate (keystore CRUD) - no keystore verb
      - KIC key derivation (HKDF, AES-CMAC-KDF, DKEK)
      - asymmetric key generation (RSA, EC, EdDSA, ML-DSA, SLH-DSA)
        and public-key derivation - the crypto API has no keygen verb
      - ML-KEM encapsulate/decapsulate - no kernel KEM API exists
      - SM2 encrypt/decrypt and key exchange (multi-step GM/T 0003)
      - EdDSA sign/verify - not registered with the crypto API
      - EAC Chip Authentication and DRBG (re)configuration

(2) Hardware-held-key operations on algorithms that ARE also
    registered (RSA decrypt, ECDSA/ML-DSA/SLH-DSA sign, ECDH).  These
    name the same primitives as the registered akcipher/sig/kpp
    transforms, but the crypto API's set_priv_key()/set_secret()
    accept only raw key bytes supplied by the caller; they cannot
    reference a private key that is generated inside, and never
    leaves, the hardware datastore - the central security property of
    this device.  The ioctl path keeps the private key
    hardware-resident, while the registered transforms serve raw-key
    in-kernel users.  The two paths are complementary, not redundant.

The device requires CAP_SYS_ADMIN.

/dev/cmh_mgmt is built conditionally on CONFIG_CRYPTO_DEV_CMH_MGMT
(default n); when disabled the ioctl interface is absent while all
kernel crypto API algorithms remain registered.

The ML-DSA sig algorithms are registered at priority 5001.  The
kernel's crypto/mldsa.c registers at priority 5000 with verify-only
(sign returns -EOPNOTSUPP).  Our driver provides full HW-accelerated
sign + verify, so the higher priority ensures the hardware
implementation is preferred when the driver is loaded.

Power management uses DEFINE_SIMPLE_DEV_PM_OPS.  On suspend the
transaction manager drains in-flight requests (configurable 10s
timeout, returns -ECANCELED on timeout), stops the kthread, and
masks IRQs.  On resume it re-verifies SIC/boot status and restarts
the kthread.

Dependencies:
  - Kernel 7.1+ (based on Herbert Xu's cryptodev-2.6 tree, 7.1.0-rc2)
  - sig_alg backend (upstream since 6.13)
  - CRYPTO_AHASH_REQ_VIRT (native support, no fallback needed)
  - CMH eSW loaded independently by hardware before driver probe

The driver registers all algorithms through the standard in-kernel
crypto API; in-kernel users (dm-crypt, fscrypt, IPsec, etc.) consume
them directly.  Key provisioning and hardware-held-key operations are
exposed to user space via /dev/cmh_mgmt ioctls.

Public hardware documentation:
  Product brief: https://go.rambus.com/ch-7xx-and-cc-7xx-product-brief
  No public datasheets are currently available.  The driver was
  developed against the CRI CryptoManager Hub Hardware Reference
  Manual (Rambus Inc. confidential).  Detailed hardware reference is
  available under NDA from Rambus Inc.; contact the maintainers listed
  in MAINTAINERS for access during review.

Tested on RISC-V and ARM64 QEMU emulation with the CMH hardware
model (QEMU TCG, 512 MiB RAM).  Also exercised on Xilinx VMK180
FPGA board with real CMH IP.

  - testmgr: 41 CMH algorithm registrations matched by upstream
    test vectors, all pass; 30 names report "No test for" (PQC
    families, KMAC, cSHAKE - no upstream vectors yet).
  - kselftest tools/testing/selftests/drivers/crypto/cmh:
    6 pass, 0 fail.

checkpatch.pl --strict: 0 errors, 0 warnings, 0 checks on all
files (the only output is the expected per-file "does MAINTAINERS
need updating?" reminder, satisfied by the MAINTAINERS patch).
sparse (C=2): 0 warnings.
W=1 -Werror: clean.
make dt_binding_check: clean (dtschema validates the
cri,cmh.yaml binding).

Tested with the following debug options enabled simultaneously
(submit-checklist "Test your code" item 1):
  CONFIG_PROVE_LOCKING, CONFIG_PROVE_RCU, CONFIG_DEBUG_LOCK_ALLOC,
  CONFIG_DEBUG_OBJECTS_RCU_HEAD, CONFIG_SLUB_DEBUG,
  CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, CONFIG_DEBUG_SPINLOCK,
  CONFIG_DEBUG_PREEMPT, CONFIG_DEBUG_ATOMIC_SLEEP.
  Result: no lockdep warnings, no ODEBUG splats, no slab corruption.

Additionally tested (separate passes - mutually exclusive configs):
  - CONFIG_KASAN + CONFIG_UBSAN + CONFIG_DEBUG_KMEMLEAK + CONFIG_KFENCE:
    no sanitizer findings; KMEMLEAK scan reports 0 unreferenced objects.
  - CONFIG_KCSAN (arm64; riscv64 lacks HAVE_ARCH_KCSAN):
    0 data-race reports attributed to the driver.

Stack usage: worst-case under 1 KB on both riscv64 and arm64
(scripts/checkstack.pl).  Hardware command buffers live in
per-request context (heap-allocated by the crypto framework).

Alex Ousherovitch (19):
  dt-bindings: crypto: add Rambus CryptoManager Hub
  crypto: cmh - add core platform driver
  crypto: cmh - add key provisioning and management
  crypto: cmh - add SHA-2/SHA-3/SHAKE ahash
  crypto: cmh - add HMAC ahash
  crypto: cmh - add CSHAKE/KMAC ahash
  crypto: cmh - add SM3 ahash
  crypto: cmh - add AES skcipher/aead/cmac
  crypto: cmh - add SM4 skcipher/aead/cmac/xcbc
  crypto: cmh - add ChaCha20-Poly1305
  crypto: cmh - add DRBG hwrng
  crypto: cmh - add RSA akcipher
  crypto: cmh - add ECDSA/SM2 sig
  crypto: cmh - add ECDH/X25519 kpp
  crypto: cmh - add ML-KEM/ML-DSA (QSE)
  crypto: cmh - add SLH-DSA/LMS/XMSS (HCQ)
  Documentation: ioctl: add CMH ioctl documentation and register 'J'
  selftests: crypto: cmh - add kselftest for management ioctl
  MAINTAINERS: add Rambus CryptoManager Hub (CMH)

base-commit: 6ea0ce3a19f9c37a014099e2b0a46b27fa164564
--
2.43.7

** This message and any attachments are for the sole use of the intended recipient(s). It may contain information that is confidential and privileged. If you are not the intended recipient of this message, you are prohibited from printing, copying, forwarding or saving it. Please delete the message and attachments and notify the sender immediately. **

Rambus Inc.<http://www.rambus.com>

^ permalink raw reply

* Re: [PATCH v9 3/6] x86/sev: Disable CPU hotplug while SNP is active
From: Borislav Petkov @ 2026-06-25 15:02 UTC (permalink / raw)
  To: Ashish Kalra
  Cc: tglx, mingo, dave.hansen, x86, hpa, seanjc, peterz,
	thomas.lendacky, herbert, davem, ardb, pbonzini, aik,
	Michael.Roth, KPrateek.Nayak, Tycho.Andersen, Nathan.Fontenot,
	ackerleytng, jackyli, pgonda, rientjes, jacobhxu, xin,
	pawan.kumar.gupta, babu.moger, dyoung, nikunj, john.allen, darwi,
	linux-kernel, linux-crypto, kvm, linux-coco
In-Reply-To: <ba146ca15b7f76eee386c8c073fb3f1cc36e5781.1782336473.git.ashish.kalra@amd.com>

On Wed, Jun 24, 2026 at 09:56:49PM +0000, Ashish Kalra wrote:
> +/* Set while SNP has CPU hotplug disabled (kernel-lifetime; survives ccp reload). */
> +static bool snp_cpu_hotplug_disabled;

Do you really need this?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply

* Re: [PATCH v3 1/7] list: Add mutable iterator variants
From: Jani Nikula @ 2026-06-25 11:00 UTC (permalink / raw)
  To: Kaitao Cheng, David Laight, Christian König,
	David Hildenbrand (Arm), Alexei Starovoitov
  Cc: Andrew Morton, David Hildenbrand, Jens Axboe, Tejun Heo,
	Alexander Viro, Christian Brauner, Daniel Borkmann,
	Andrii Nakryiko, Johannes Weiner, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Thomas Gleixner,
	Juri Lelli, Vincent Guittot, Paul Moore, Andy Shevchenko,
	Paul E. McKenney, Shakeel Butt, David Howells, Simona Vetter,
	Randy Dunlap, Luca Ceresoli, Philipp Stanner, linux-block,
	linux-kernel, cgroups, linux-ntfs-dev, linux-fsdevel, io-uring,
	audit, bpf, netdev, dri-devel, linux-perf-users,
	linux-trace-kernel, kexec, live-patching, linux-modules,
	linux-crypto, linux-pm, rcu, sched-ext, linux-mm, virtualization,
	damon, llvm, Kaitao Cheng, Muchun Song
In-Reply-To: <0ed6b5c3-e955-46e2-9fc6-075a0dfd1c4f@linux.dev>

On Thu, 25 Jun 2026, Kaitao Cheng <kaitao.cheng@linux.dev> wrote:
> 在 2026/6/24 22:23, David Laight 写道:
>> On Wed, 24 Jun 2026 15:23:47 +0200
>> Christian König <christian.koenig@amd.com> wrote:
>>> On 6/24/26 15:14, Kaitao Cheng wrote:
>>>> 在 2026/6/22 16:42, David Laight 写道:  
>>>>> On Mon, 22 Jun 2026 12:05:31 +0800
>>>>> Kaitao Cheng <kaitao.cheng@linux.dev> wrote:
>>>>>  
>>>>>> From: Kaitao Cheng <chengkaitao@kylinos.cn>
>>>>>>
>>>>>> The list_for_each*_safe() helpers are used when the loop body may
>>>>>> remove the current entry.  Their API exposes the temporary cursor at
>>>>>> every call site, even though most users only need it for the iterator
>>>>>> implementation and never reference it in the loop body.
>>>>>>
>>>>>> Add *_mutable() variants for list and hlist iteration.  The new helpers
>>>>>> support both forms: callers may keep passing an explicit temporary cursor
>>>>>> when they need to inspect or reset it, or omit it and let the helper use
>>>>>> a unique internal cursor.  
>>>>>
>>>>> I'm not really sure 'mutable' means anything either.
>>>>> It is possible to make it valid for the loop body (or even other threads)
>>>>> to delete arbitrary list items - but that needs significant extra overheads.
>>>>>
>>>>> It might be worth doing something that doesn't need the extra variable,
>>>>> but there is little point doing all the churn just to rename things.
>>>>>  
>>>>>>
>>>>>> This makes call sites that only mutate the list through the current entry
>>>>>> less noisy, while keeping the existing *_safe() helpers available for
>>>>>> compatibility.
>>>>>>
>>>>>> Signed-off-by: Kaitao Cheng <chengkaitao@kylinos.cn>
>>>>>> ---
>>>>>>  include/linux/list.h | 269 +++++++++++++++++++++++++++++++++++++------
>>>>>>  1 file changed, 231 insertions(+), 38 deletions(-)
>>>>>>
>>>>>> diff --git a/include/linux/list.h b/include/linux/list.h
>>>>>> index 09d979976b3b..1081def7cea9 100644
>>>>>> --- a/include/linux/list.h
>>>>>> +++ b/include/linux/list.h
>>>>>> @@ -7,6 +7,7 @@
>>>>>>  #include <linux/stddef.h>
>>>>>>  #include <linux/poison.h>
>>>>>>  #include <linux/const.h>
>>>>>> +#include <linux/args.h>
>>>>>>  
>>>>>>  #include <asm/barrier.h>
>>>>>>  
>>>>>> @@ -763,28 +764,72 @@ static inline void list_splice_tail_init(struct list_head *list,
>>>>>>  #define list_for_each_prev(pos, head) \
>>>>>>  	for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev)
>>>>>>  
>>>>>> -/**
>>>>>> - * list_for_each_safe - iterate over a list safe against removal of list entry
>>>>>> - * @pos:	the &struct list_head to use as a loop cursor.
>>>>>> - * @n:		another &struct list_head to use as temporary storage
>>>>>> - * @head:	the head for your list.
>>>>>> +/*
>>>>>> + * list_for_each_safe is an old interface, use list_for_each_mutable instead.
>>>>>>   */
>>>>>>  #define list_for_each_safe(pos, n, head) \
>>>>>>  	for (pos = (head)->next, n = pos->next; \
>>>>>>  	     !list_is_head(pos, (head)); \
>>>>>>  	     pos = n, n = pos->next)
>>>>>>  
>>>>>> +#define __list_for_each_mutable_internal(pos, tmp, head)		\
>>>>>> +	for (typeof(pos) tmp = (pos = (head)->next)->next;		\  
>>>>>
>>>>> Use auto
>>>>>  
>>>>>> +	     !list_is_head(pos, (head));				\
>>>>>> +	     pos = tmp, tmp = pos->next)
>>>>>> +
>>>>>> +#define __list_for_each_mutable1(pos, head)				\
>>>>>> +	__list_for_each_mutable_internal(pos, __UNIQUE_ID(next), head)
>>>>>> +
>>>>>> +#define __list_for_each_mutable2(pos, next, head)			\
>>>>>> +	list_for_each_safe(pos, next, head)
>>>>>> +
>>>>>>  /**
>>>>>> - * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
>>>>>> + * list_for_each_mutable - iterate over a list safe against entry removal
>>>>>>   * @pos:	the &struct list_head to use as a loop cursor.
>>>>>> - * @n:		another &struct list_head to use as temporary storage
>>>>>> - * @head:	the head for your list.
>>>>>> + * @...:	either (head) or (next, head)
>>>>>> + *
>>>>>> + * next:	another &struct list_head to use as optional temporary storage.
>>>>>> + *		The temporary cursor is internal unless explicitly supplied by
>>>>>> + *		the caller.
>>>>>> + * head:	the head for your list.
>>>>>> + */
>>>>>> +#define list_for_each_mutable(pos, ...)					\
>>>>>> +	CONCATENATE(__list_for_each_mutable, COUNT_ARGS(__VA_ARGS__))	\
>>>>>> +		(pos, __VA_ARGS__)  
>>>>>
>>>>> The variable argument count logic really just slows down compilation.
>>>>> Maybe there aren't enough copies of this code to make that significant.
>>>>> But just because you can do it doesn't mean it is a gooD idea.
>>>>> I'm also not sure it really adds anything to the readability.
>>>>>
>>>>> And, it you are going to make the middle argument optional there is
>>>>> no need to change the macro name.  
>>>>
>>>> Christian König and Jani Nikula also disagree with the variadic-argument
>>>> implementation approach. If we abandon that method, it means we will
>>>> inevitably need to add some new macros. If mutable is not a good name,
>>>> suggestions for better alternatives would be welcome; coming up with a
>>>> suitable name is indeed rather tricky.  
>>>
>>> I don't think you need to add a new macro for the specific use case that people want to modify the next element of the iteration.
>>>
>>> If I remember your numbers correctly that is a really corner case and keeping using the existing *_safe() macros for that sounds perfectly fine to me.
>> 
>> IIRC currently you have a choice of either:
>> 	define               Item that can't be deleted
>> 	list_for_each()	     The current item.
>> 	list_for_each_safe() The next item.
>> There is also likely to be code that updates the variables to allow
>> for other scenarios.
>> 
>> Note that if increase a reference count and release a lock then list_for_each()
>> is likely safer than list_for_each_safe() :-)
>> 
>> list.h has 9 variants of the 'safe' loop.
>> The bloat of another 9 is getting excessive.
>> 
>> It has to be said that this is one of my least favourite type of list...
>
> Hi Christian König, David Laight, Jani Nikula, David Hildenbrand,
> Andy Shevchenko, Alexei Starovoitov
>
> For ease of discussion, I need to summarize the currently possible
> approaches and briefly describe their respective pros and cons,
> using the list_for_each_entry* interfaces as examples.
>
> 1. Add list_for_each_entry_mutable, while keeping list_for_each_entry
> and list_for_each_entry_safe unchanged. list_for_each_entry_mutable
> would be used specifically for safe deletion scenarios that do not
> need to expose the temporary cursor externally. The code can refer to
> the v1 version.
>
> Pros: Does not depend on immediate per-subsystem adaptation and can be
>       merged directly.
> Cons: Requires adding a whole set of mutable interfaces, which makes the
>       code somewhat redundant.

Seems fine, and the original _safe naming is ambiguous anyway.

> 2. Directly optimize away the temporary cursor in list_for_each_entry_safe
> and define it inside the loop instead, changing the interface from four
> arguments to three.
>
> Pros: Does not add redundant interfaces.
> Cons: (1) Users need to manually update special cases that use the
>       traversal variable of list_for_each_entry_safe, the new
>       list_for_each_entry_safe would no longer apply there and would
>       need to be open-coded.
>       (2) Because the macro arguments changes, all list_for_each_entry_safe
>       callers would need to be modified and merged together, making it
>       difficult to merge such a large amount of code at once.

This won't fly because there are literally thousands of
list_for_each_entry_safe() users.

> 3. Use a variadic macro approach to optimize list_for_each_entry_safe,
> so that it supports both three and four arguments.
>
> Pros: (1) Does not add redundant interfaces.
>       (2) Does not depend on immediate per-subsystem adaptation and can
>       be merged directly.
> Cons: (1) Increases compile time.
>       (2) Makes the interface harder for users to use.

Basically I'm against any variadic macro tricks where the optional
argument is not the last argument. That's just way too surprising, and
goes against common practice in just about all other languages.

> 4. Optimize list_for_each_entry by defining the temporary cursor internally,
> making it compatible with the functionality of list_for_each_entry_safe.
> The code can refer to the v2 version.
>
> Pros: (1) Does not add redundant interfaces.
>       (2) The number of externally visible arguments of list_for_each_entry
>       remains unchanged, still three.
> Cons: (1) list_for_each_entry and list_for_each_entry_safe would be merged
>       into one, and list_for_each_entry_safe would gradually be deprecated.
>       (2) Users need to manually update special cases that use the traversal
>       variable of list_for_each_entry, the new list_for_each_entry would no
>       longer apply there and would need to be open-coded. There are 15 such
>       cases in total.

This sounds good to me, though I take it there's some code size increase
and/or performance penalty?

Maybe the 15 cases are questionable anyway?

> 5. Use a variadic macro approach to optimize list_for_each_entry, so that
> it supports both three and four arguments.
>
> Pros: (1) Does not add redundant interfaces.
>       (2) Does not depend on immediate per-subsystem adaptation and can be
>       merged directly.
> Cons: (1) Increases compile time.
>       (2) list_for_each_entry and list_for_each_entry_safe would be merged
>       into one, and list_for_each_entry_safe would gradually be deprecated.

Please don't do the macro tricks.

> 6. Make no changes, keep the current logic unchanged, and close the current
> email discussion.

I like hiding the temporary stuff when possible.


BR,
Jani.

-- 
Jani Nikula, Intel

^ permalink raw reply

* Re: [PATCH 12/21] nvme-auth: common: use crypto library in nvme_auth_derive_tls_psk()
From: John Garry @ 2026-06-25  9:02 UTC (permalink / raw)
  To: Eric Biggers, linux-nvme, Chaitanya Kulkarni, Sagi Grimberg,
	Christoph Hellwig, Hannes Reinecke
  Cc: linux-crypto, linux-kernel, Ard Biesheuvel, Jason A . Donenfeld,
	Herbert Xu
In-Reply-To: <20260302075959.338638-13-ebiggers@kernel.org>

On 02/03/2026 07:59, Eric Biggers wrote:
>   int nvme_auth_derive_tls_psk(int hmac_id, const u8 *psk, size_t psk_len,
>   			     const char *psk_digest, u8 **ret_psk)
>   {
> -	struct crypto_shash *hmac_tfm;
> -	const char *hmac_name;
> -	const char *label = "nvme-tls-psk";
>   	static const u8 default_salt[NVME_AUTH_MAX_DIGEST_SIZE];
> -	size_t prk_len;
> -	const char *ctx;
> -	u8 *prk, *tls_key;
> +	static const char label[] = "tls13 nvme-tls-psk";
> +	const size_t label_len = sizeof(label) - 1;
> +	u8 prk[NVME_AUTH_MAX_DIGEST_SIZE];
> +	size_t hash_len, ctx_len;
> +	u8 *hmac_data = NULL, *tls_key;
> +	size_t i;
>   	int ret;
>   
> -	hmac_name = nvme_auth_hmac_name(hmac_id);
> -	if (!hmac_name) {
> +	hash_len = nvme_auth_hmac_hash_len(hmac_id);
> +	if (hash_len == 0) {
>   		pr_warn("%s: invalid hash algorithm %d\n",

...

> +	i = 0;
> +	hmac_data[i++] = hash_len >> 8;
> +	hmac_data[i++] = hash_len;
> +
> +	/* label */
> +	static_assert(label_len <= 255);

JFYI, this is generating a C=1 warning for me:

   CHECK   drivers/nvme/common/auth.c
drivers/nvme/common/auth.c:746:9: error: bad constant expression

The following fixes/avoids it:

/* label */
-       static_assert(label_len <= 255);
+       static_assert(sizeof(label) - 1 <= 255);

Even though label_len is declared as const, label_len <= 255 is not a 
constant expression.

^ permalink raw reply

* Re: [PATCH v9 3/6] x86/sev: Disable CPU hotplug while SNP is active
From: Kalra, Ashish @ 2026-06-25  5:38 UTC (permalink / raw)
  To: K Prateek Nayak, tglx, mingo, bp, dave.hansen, x86, hpa, seanjc,
	peterz, thomas.lendacky, herbert, davem, ardb
  Cc: pbonzini, aik, Michael.Roth, Tycho.Andersen, Nathan.Fontenot,
	ackerleytng, jackyli, pgonda, rientjes, jacobhxu, xin,
	pawan.kumar.gupta, babu.moger, dyoung, nikunj, john.allen, darwi,
	linux-kernel, linux-crypto, kvm, linux-coco
In-Reply-To: <fe9927ad-a06a-4a4b-8122-12644513ed14@amd.com>

Hello Prateek,

On 6/24/2026 10:45 PM, K Prateek Nayak wrote:
> Hello Ashish,
> 
> On 6/25/2026 3:26 AM, Ashish Kalra wrote:
>> From: Ashish Kalra <ashish.kalra@amd.com>
>>
>> While SNP is active, every memory write is checked against the RMP to
>> protect the integrity of SEV-SNP guest memory.  By the SNP architecture
>> these checks cannot be disabled on a subset of CPUs: they are gated
>> per-core by SYSCFG[SNP_EN], which the SEV firmware requires to be set on
>> every present CPU before SNP initialization.  A CPU that does not have
>> SNP_EN set and was not initialized via SNP_INIT performs no RMP checks at
>> all, so there is no valid configuration with SNP active and any CPU exempt
>> from RMP checks.
>>
>> The firmware determines which CPUs are present from the processor and the
>> BIOS/UEFI configuration (e.g. SMT disabled in the BIOS) and enumerates
>> them at SNP init; it is not aware of the OS bringing CPUs online or
>> offline afterwards.  A CPU brought online after SNP init was not
>> enumerated at SNP_INIT and does not have SNP_EN set, so writes from it are
>> not RMP-checked and could corrupt SEV-SNP guest memory, and there is no
>> way to keep work off such a CPU once it is online.  OS CPU hotplug can thus
>> diverge from the firmware's expectations and break SNP.
> 
> If this is true ...
> 
> [..snip..]
> 
>> diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
>> index 217b6b19802e..66475145b3fa 100644
>> --- a/drivers/crypto/ccp/sev-dev.c
>> +++ b/drivers/crypto/ccp/sev-dev.c
>> @@ -1479,6 +1479,9 @@ static int __sev_snp_init_locked(int *error, unsigned int max_snp_asid)
>>  
>>  	snp_hv_fixed_pages_state_update(sev, HV_FIXED);
>>  
>> +	/* Disable CPU hotplug while SNP is active (see snp_disable_cpu_hotplug). */
>> +	snp_disable_cpu_hotplug();
> 
> ... then this should be done at snp_prepare() before
> on_each_cpu(snp_enable) right?
> 
> If not, then any CPU hotplug between the cpus_read_unlock() there and
> the snp_disable_cpu_hotplug() here will not have the SNP_EN set.
> 
> Isn't that a concern?

yes — it's a concern, and i agree the disable belongs in snp_prepare() before SNP_EN is programmed.

snp_enable runs via on_each_cpu() over the set that is online at snp_prepare() time, and SNP_INIT_EX runs
right after. With the disable where it is now (after SNP_INIT_EX/DF_FLUSH), there's a window starting at
snp_prepare()'s cpus_read_unlock() in which a CPU can come online that never had snp_enable run on it, i.e.
with SNP_EN clear. .

So hotplug needs to be frozen before SNP_EN is programmed, so the online set that gets SNP_EN cannot change underneath us.

I'll move the disable into snp_prepare(), before cpus_read_lock() rather than just before on_each_cpu(snp_enable):
cpu_hotplug_disable() takes cpu_add_remove_lock, which nests above cpu_hotplug_lock, so calling it under
cpus_read_lock() would invert the order, causing deadlock.

On the failure paths where SNP does not end up active, i.e., SNP_INIT_EX/DF_FLUSH error, then I'll
re-enable hotplug so a failed init doesn't leave it permanently disabled; the success path continues to re-enable
only on the full shutdown path.

Will fix in v10.

Thanks,
Ashish

> 
> Also, this patch can probably go first since the FW assumptions on
> hotplug exists independent of RMPOPT bits.
> 
>> +
>>  	snp_setup_rmpopt();
>>  
>>  	sev->snp_initialized = true;
> 

^ permalink raw reply

* Test Campaign 1782362571457-4323
From: ZamaJuli @ 2026-06-25  4:42 UTC (permalink / raw)
  To: linux-crypto

Please reply to this email.

^ permalink raw reply

* Re: [PATCH v9 3/6] x86/sev: Disable CPU hotplug while SNP is active
From: K Prateek Nayak @ 2026-06-25  3:45 UTC (permalink / raw)
  To: Ashish Kalra, tglx, mingo, bp, dave.hansen, x86, hpa, seanjc,
	peterz, thomas.lendacky, herbert, davem, ardb
  Cc: pbonzini, aik, Michael.Roth, Tycho.Andersen, Nathan.Fontenot,
	ackerleytng, jackyli, pgonda, rientjes, jacobhxu, xin,
	pawan.kumar.gupta, babu.moger, dyoung, nikunj, john.allen, darwi,
	linux-kernel, linux-crypto, kvm, linux-coco
In-Reply-To: <ba146ca15b7f76eee386c8c073fb3f1cc36e5781.1782336473.git.ashish.kalra@amd.com>

Hello Ashish,

On 6/25/2026 3:26 AM, Ashish Kalra wrote:
> From: Ashish Kalra <ashish.kalra@amd.com>
> 
> While SNP is active, every memory write is checked against the RMP to
> protect the integrity of SEV-SNP guest memory.  By the SNP architecture
> these checks cannot be disabled on a subset of CPUs: they are gated
> per-core by SYSCFG[SNP_EN], which the SEV firmware requires to be set on
> every present CPU before SNP initialization.  A CPU that does not have
> SNP_EN set and was not initialized via SNP_INIT performs no RMP checks at
> all, so there is no valid configuration with SNP active and any CPU exempt
> from RMP checks.
> 
> The firmware determines which CPUs are present from the processor and the
> BIOS/UEFI configuration (e.g. SMT disabled in the BIOS) and enumerates
> them at SNP init; it is not aware of the OS bringing CPUs online or
> offline afterwards.  A CPU brought online after SNP init was not
> enumerated at SNP_INIT and does not have SNP_EN set, so writes from it are
> not RMP-checked and could corrupt SEV-SNP guest memory, and there is no
> way to keep work off such a CPU once it is online.  OS CPU hotplug can thus
> diverge from the firmware's expectations and break SNP.

If this is true ...

[..snip..]

> diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
> index 217b6b19802e..66475145b3fa 100644
> --- a/drivers/crypto/ccp/sev-dev.c
> +++ b/drivers/crypto/ccp/sev-dev.c
> @@ -1479,6 +1479,9 @@ static int __sev_snp_init_locked(int *error, unsigned int max_snp_asid)
>  
>  	snp_hv_fixed_pages_state_update(sev, HV_FIXED);
>  
> +	/* Disable CPU hotplug while SNP is active (see snp_disable_cpu_hotplug). */
> +	snp_disable_cpu_hotplug();

... then this should be done at snp_prepare() before
on_each_cpu(snp_enable) right?

If not, then any CPU hotplug between the cpus_read_unlock() there and
the snp_disable_cpu_hotplug() here will not have the SNP_EN set.

Isn't that a concern?

Also, this patch can probably go first since the FW assumptions on
hotplug exists independent of RMPOPT bits.

> +
>  	snp_setup_rmpopt();
>  
>  	sev->snp_initialized = true;

-- 
Thanks and Regards,
Prateek


^ permalink raw reply

* Re: [PATCH v3 1/7] list: Add mutable iterator variants
From: Kaitao Cheng @ 2026-06-25  3:01 UTC (permalink / raw)
  To: David Laight, Christian König, Jani Nikula,
	David Hildenbrand (Arm), Alexei Starovoitov
  Cc: Andrew Morton, David Hildenbrand, Jens Axboe, Tejun Heo,
	Alexander Viro, Christian Brauner, Daniel Borkmann,
	Andrii Nakryiko, Johannes Weiner, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Thomas Gleixner,
	Juri Lelli, Vincent Guittot, Paul Moore, Andy Shevchenko,
	Paul E. McKenney, Shakeel Butt, David Howells, Simona Vetter,
	Randy Dunlap, Luca Ceresoli, Philipp Stanner, linux-block,
	linux-kernel, cgroups, linux-ntfs-dev, linux-fsdevel, io-uring,
	audit, bpf, netdev, dri-devel, linux-perf-users,
	linux-trace-kernel, kexec, live-patching, linux-modules,
	linux-crypto, linux-pm, rcu, sched-ext, linux-mm, virtualization,
	damon, llvm, Kaitao Cheng, Muchun Song
In-Reply-To: <20260624152324.3def88ce@pumpkin>

在 2026/6/24 22:23, David Laight 写道:
> On Wed, 24 Jun 2026 15:23:47 +0200
> Christian König <christian.koenig@amd.com> wrote:
>> On 6/24/26 15:14, Kaitao Cheng wrote:
>>> 在 2026/6/22 16:42, David Laight 写道:  
>>>> On Mon, 22 Jun 2026 12:05:31 +0800
>>>> Kaitao Cheng <kaitao.cheng@linux.dev> wrote:
>>>>  
>>>>> From: Kaitao Cheng <chengkaitao@kylinos.cn>
>>>>>
>>>>> The list_for_each*_safe() helpers are used when the loop body may
>>>>> remove the current entry.  Their API exposes the temporary cursor at
>>>>> every call site, even though most users only need it for the iterator
>>>>> implementation and never reference it in the loop body.
>>>>>
>>>>> Add *_mutable() variants for list and hlist iteration.  The new helpers
>>>>> support both forms: callers may keep passing an explicit temporary cursor
>>>>> when they need to inspect or reset it, or omit it and let the helper use
>>>>> a unique internal cursor.  
>>>>
>>>> I'm not really sure 'mutable' means anything either.
>>>> It is possible to make it valid for the loop body (or even other threads)
>>>> to delete arbitrary list items - but that needs significant extra overheads.
>>>>
>>>> It might be worth doing something that doesn't need the extra variable,
>>>> but there is little point doing all the churn just to rename things.
>>>>  
>>>>>
>>>>> This makes call sites that only mutate the list through the current entry
>>>>> less noisy, while keeping the existing *_safe() helpers available for
>>>>> compatibility.
>>>>>
>>>>> Signed-off-by: Kaitao Cheng <chengkaitao@kylinos.cn>
>>>>> ---
>>>>>  include/linux/list.h | 269 +++++++++++++++++++++++++++++++++++++------
>>>>>  1 file changed, 231 insertions(+), 38 deletions(-)
>>>>>
>>>>> diff --git a/include/linux/list.h b/include/linux/list.h
>>>>> index 09d979976b3b..1081def7cea9 100644
>>>>> --- a/include/linux/list.h
>>>>> +++ b/include/linux/list.h
>>>>> @@ -7,6 +7,7 @@
>>>>>  #include <linux/stddef.h>
>>>>>  #include <linux/poison.h>
>>>>>  #include <linux/const.h>
>>>>> +#include <linux/args.h>
>>>>>  
>>>>>  #include <asm/barrier.h>
>>>>>  
>>>>> @@ -763,28 +764,72 @@ static inline void list_splice_tail_init(struct list_head *list,
>>>>>  #define list_for_each_prev(pos, head) \
>>>>>  	for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev)
>>>>>  
>>>>> -/**
>>>>> - * list_for_each_safe - iterate over a list safe against removal of list entry
>>>>> - * @pos:	the &struct list_head to use as a loop cursor.
>>>>> - * @n:		another &struct list_head to use as temporary storage
>>>>> - * @head:	the head for your list.
>>>>> +/*
>>>>> + * list_for_each_safe is an old interface, use list_for_each_mutable instead.
>>>>>   */
>>>>>  #define list_for_each_safe(pos, n, head) \
>>>>>  	for (pos = (head)->next, n = pos->next; \
>>>>>  	     !list_is_head(pos, (head)); \
>>>>>  	     pos = n, n = pos->next)
>>>>>  
>>>>> +#define __list_for_each_mutable_internal(pos, tmp, head)		\
>>>>> +	for (typeof(pos) tmp = (pos = (head)->next)->next;		\  
>>>>
>>>> Use auto
>>>>  
>>>>> +	     !list_is_head(pos, (head));				\
>>>>> +	     pos = tmp, tmp = pos->next)
>>>>> +
>>>>> +#define __list_for_each_mutable1(pos, head)				\
>>>>> +	__list_for_each_mutable_internal(pos, __UNIQUE_ID(next), head)
>>>>> +
>>>>> +#define __list_for_each_mutable2(pos, next, head)			\
>>>>> +	list_for_each_safe(pos, next, head)
>>>>> +
>>>>>  /**
>>>>> - * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
>>>>> + * list_for_each_mutable - iterate over a list safe against entry removal
>>>>>   * @pos:	the &struct list_head to use as a loop cursor.
>>>>> - * @n:		another &struct list_head to use as temporary storage
>>>>> - * @head:	the head for your list.
>>>>> + * @...:	either (head) or (next, head)
>>>>> + *
>>>>> + * next:	another &struct list_head to use as optional temporary storage.
>>>>> + *		The temporary cursor is internal unless explicitly supplied by
>>>>> + *		the caller.
>>>>> + * head:	the head for your list.
>>>>> + */
>>>>> +#define list_for_each_mutable(pos, ...)					\
>>>>> +	CONCATENATE(__list_for_each_mutable, COUNT_ARGS(__VA_ARGS__))	\
>>>>> +		(pos, __VA_ARGS__)  
>>>>
>>>> The variable argument count logic really just slows down compilation.
>>>> Maybe there aren't enough copies of this code to make that significant.
>>>> But just because you can do it doesn't mean it is a gooD idea.
>>>> I'm also not sure it really adds anything to the readability.
>>>>
>>>> And, it you are going to make the middle argument optional there is
>>>> no need to change the macro name.  
>>>
>>> Christian König and Jani Nikula also disagree with the variadic-argument
>>> implementation approach. If we abandon that method, it means we will
>>> inevitably need to add some new macros. If mutable is not a good name,
>>> suggestions for better alternatives would be welcome; coming up with a
>>> suitable name is indeed rather tricky.  
>>
>> I don't think you need to add a new macro for the specific use case that people want to modify the next element of the iteration.
>>
>> If I remember your numbers correctly that is a really corner case and keeping using the existing *_safe() macros for that sounds perfectly fine to me.
> 
> IIRC currently you have a choice of either:
> 	define               Item that can't be deleted
> 	list_for_each()	     The current item.
> 	list_for_each_safe() The next item.
> There is also likely to be code that updates the variables to allow
> for other scenarios.
> 
> Note that if increase a reference count and release a lock then list_for_each()
> is likely safer than list_for_each_safe() :-)
> 
> list.h has 9 variants of the 'safe' loop.
> The bloat of another 9 is getting excessive.
> 
> It has to be said that this is one of my least favourite type of list...

Hi Christian König, David Laight, Jani Nikula, David Hildenbrand,
Andy Shevchenko, Alexei Starovoitov

For ease of discussion, I need to summarize the currently possible
approaches and briefly describe their respective pros and cons,
using the list_for_each_entry* interfaces as examples.

1. Add list_for_each_entry_mutable, while keeping list_for_each_entry
and list_for_each_entry_safe unchanged. list_for_each_entry_mutable
would be used specifically for safe deletion scenarios that do not
need to expose the temporary cursor externally. The code can refer to
the v1 version.

Pros: Does not depend on immediate per-subsystem adaptation and can be
      merged directly.
Cons: Requires adding a whole set of mutable interfaces, which makes the
      code somewhat redundant.

2. Directly optimize away the temporary cursor in list_for_each_entry_safe
and define it inside the loop instead, changing the interface from four
arguments to three.

Pros: Does not add redundant interfaces.
Cons: (1) Users need to manually update special cases that use the
      traversal variable of list_for_each_entry_safe, the new
      list_for_each_entry_safe would no longer apply there and would
      need to be open-coded.
      (2) Because the macro arguments changes, all list_for_each_entry_safe
      callers would need to be modified and merged together, making it
      difficult to merge such a large amount of code at once.

3. Use a variadic macro approach to optimize list_for_each_entry_safe,
so that it supports both three and four arguments.

Pros: (1) Does not add redundant interfaces.
      (2) Does not depend on immediate per-subsystem adaptation and can
      be merged directly.
Cons: (1) Increases compile time.
      (2) Makes the interface harder for users to use.

4. Optimize list_for_each_entry by defining the temporary cursor internally,
making it compatible with the functionality of list_for_each_entry_safe.
The code can refer to the v2 version.

Pros: (1) Does not add redundant interfaces.
      (2) The number of externally visible arguments of list_for_each_entry
      remains unchanged, still three.
Cons: (1) list_for_each_entry and list_for_each_entry_safe would be merged
      into one, and list_for_each_entry_safe would gradually be deprecated.
      (2) Users need to manually update special cases that use the traversal
      variable of list_for_each_entry, the new list_for_each_entry would no
      longer apply there and would need to be open-coded. There are 15 such
      cases in total.

5. Use a variadic macro approach to optimize list_for_each_entry, so that
it supports both three and four arguments.

Pros: (1) Does not add redundant interfaces.
      (2) Does not depend on immediate per-subsystem adaptation and can be
      merged directly.
Cons: (1) Increases compile time.
      (2) list_for_each_entry and list_for_each_entry_safe would be merged
      into one, and list_for_each_entry_safe would gradually be deprecated.

6. Make no changes, keep the current logic unchanged, and close the current
email discussion.


Which of the six solutions above do people prefer?

-- 
Thanks
Kaitao Cheng


^ permalink raw reply

* Re: [PATCH] KEYS: asymmetric: fix OOB read in KEYCTL_PKEY_DECRYPT on zero-length message
From: Jarkko Sakkinen @ 2026-06-24 22:39 UTC (permalink / raw)
  To: Lukas Wunner
  Cc: azraelxuemo, linux-crypto, herbert, dhowells, Ignat Korchagin,
	keyrings
In-Reply-To: <ajpRLY4unsqxS46e@wunner.de>

On Tue, Jun 23, 2026 at 11:26:05AM +0200, Lukas Wunner wrote:
> [cc += Ignat, Jarkko, keyrings; start of thread is here:
> https://lore.kernel.org/r/20260622025002.798934-1-xuemo@xuemo.com
> ]
> 
> On Mon, Jun 22, 2026 at 02:50:02AM +0000, azraelxuemo wrote:
> > When ret is replaced with maxsize, the caller keyctl_pkey_e_d_s()
> > does copy_to_user(_out, out, ret) with ret = key_size (e.g. 256
> > for RSA-2048) on a buffer allocated with kmalloc(params.out_len),
> > which can be as small as 1 byte.  This reads key_size - out_len
> > bytes beyond the allocation.
> 
> It would probably make sense to tighten security in keyctl_pkey_e_d_s()
> by using kzalloc() instead of kmalloc() and by capping the amount of
> data copied with min(ret, params.out_len).
> 
> > Fixes: 63ba4d67594a ("KEYS: asymmetric: Use new crypto interface without scatterlists")
> > Signed-off-by: HanQuan <eilaimemedsnaimel@gmail.com>
> 
> Please add:
> 
> Cc: stable@vger.kernel.org # v6.5+
> 
> You don't need to cc that address when submitting the patch,
> but including the tag in the commit message helps stable
> maintainers identify patches that need backporting.
> 
> > +++ b/crypto/asymmetric_keys/public_key.c
> > @@ -358,7 +358,10 @@ static int software_key_eds_op(struct kernel_pkey_params *params,
> >  		BUG();
> >  	}
> >  
> > -	if (!issig && ret == 0)
> > +	/* Decrypt may legitimately return 0 (zero-length message); only
> > +	 * replace ret with maxsize for encrypt, which returns 0 on success.
> > +	 */
> > +	if (!issig && ret == 0 && params->op == kernel_pkey_encrypt)
> >  		ret = crypto_akcipher_maxsize(tfm);
> 
> Given that out of 3 operations (encrypt, decrypt, sign),
> 2 already return the size, I think a better approach would be
> to let crypto_akcipher_sync_encrypt() return crypto_akcipher_maxsize()
> on success, i.e.:
> 
> 	return crypto_akcipher_sync_prep(&data) ?:
> 	       crypto_akcipher_sync_post(&data,
> -					 crypto_akcipher_encrypt(data.req));
> +					 crypto_akcipher_encrypt(data.req)) ?:
> +	       crypto_akcipher_maxsize(tfm);
> 
> and then remove the if-clause in software_key_eds_op() altogether
> which overwrites ret with the maxsize.
> 
> Do you agree?
> 
> Your patch wasn't cc'ed to all maintainers of this file.
> Please double-check that you've checked out Linus' current
> master when running scripts/get_maintainer.pl.
> 
> Thanks,
> 
> Lukas

Lukas, thank you for forwarding this, and with a quick sim I do.

Can this be next time be also CC'd to keyrings@vger.kernel.org, in
addition of me and rest? What I can then do is to look this in detail,
test the change, apply and eventually forward to Linus...

BR, Jarkko

^ permalink raw reply

* Re: [PATCH v2] asymmetric_keys: check asymmetric_key_ids() for NULL before dereference
From: Jarkko Sakkinen @ 2026-06-24 22:34 UTC (permalink / raw)
  To: Ignat Korchagin
  Cc: Herbert Xu, Weiming Shi, David Howells, Lukas Wunner,
	David S . Miller, keyrings, linux-crypto, Xiang Mei
In-Reply-To: <CAOs+rJVutdn6vqjSxidx-fA_R8PYsqJbbpMRUW+ijJeXoavCYA@mail.gmail.com>

On Mon, Jun 22, 2026 at 09:21:04PM +0100, Ignat Korchagin wrote:
> On Mon, Jun 22, 2026 at 3:56 PM Jarkko Sakkinen <jarkko@kernel.org> wrote:
> >
> > On Mon, Jun 22, 2026 at 12:16:07PM +0800, Herbert Xu wrote:
> > > On Sat, May 02, 2026 at 09:33:29AM -0700, Weiming Shi wrote:
> > > >
> > > > diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
> > > > index 16a7ae16593c..22f04656d529 100644
> > > > --- a/crypto/asymmetric_keys/asymmetric_type.c
> > > > +++ b/crypto/asymmetric_keys/asymmetric_type.c
> > > > @@ -109,6 +109,8 @@ struct key *find_asymmetric_key(struct key *keyring,
> > > >     if (id_0 && id_1) {
> > > >             const struct asymmetric_key_ids *kids = asymmetric_key_ids(key);
> > > >
> > > > +           if (!kids)
> > > > +                   goto reject;
> > >
> > > This check is actually unnecessary because we've already matched
> > > the key against the kid so it must be present.
> > >
> > > I'd get rid of this check or perhaps add a comment instead.
> >
> > +1
> >
> > >
> > > >             if (!kids->id[1]) {
> > > >                     pr_debug("First ID matches, but second is missing\n");
> > > >                     goto reject;
> > > > diff --git a/crypto/asymmetric_keys/restrict.c b/crypto/asymmetric_keys/restrict.c
> > > > index 86292965f493..ccf1084f720e 100644
> > > > --- a/crypto/asymmetric_keys/restrict.c
> > > > +++ b/crypto/asymmetric_keys/restrict.c
> > > > @@ -243,10 +243,14 @@ static int key_or_keyring_common(struct key *dest_keyring,
> > > >                     if (IS_ERR(key))
> > > >                             key = NULL;
> > > >             } else if (trusted->type == &key_type_asymmetric) {
> > > > +                   const struct asymmetric_key_ids *kids;
> > > >                     const struct asymmetric_key_id **signer_ids;
> > > >
> > > > -                   signer_ids = (const struct asymmetric_key_id **)
> > > > -                           asymmetric_key_ids(trusted)->id;
> > > > +                   kids = asymmetric_key_ids(trusted);
> > > > +                   if (!kids)
> > > > +                           goto skip_trusted;
> > >
> > > Yes this is definitely buggy.
> > >
> > > I think it was introduced by these two commits:
> > >
> > > commit 3c58b2362ba828ee2970c66c6a6fd7b04fde4413
> > > Author: David Howells <dhowells@redhat.com>
> > > Date:   Tue Oct 9 17:47:46 2018 +0100
> > >
> > >     KEYS: Implement PKCS#8 RSA Private Key parser [ver #2]
> > >
> > > and
> > >
> > > commit 7e3c4d22083f6e7316c5229b6197ca2d5335aa35
> > > Author: Mat Martineau <martineau@kernel.org>
> > > Date:   Mon Jun 27 16:45:16 2016 -0700
> > >
> > >     KEYS: Restrict asymmetric key linkage using a specific keychain
> > >
> > > So the Fixes header should point to them.
> >
> > +1
> >
> > >
> > > > @@ -290,6 +294,7 @@ static int key_or_keyring_common(struct key *dest_keyring,
> > > >             }
> > > >     }
> > > >
> > > > +skip_trusted:
> > > >     if (check_dest && !key) {
> > > >             /* See if the destination has a key that signed this one. */
> > > >             key = find_asymmetric_key(dest_keyring, sig->auth_ids[0],
> > >
> > > I'm not sure continuing here is a good idea.  Having a private key
> > > here makes no sense whatsoever and we should just bail out right
> > > away.
> > >
> > > I would recommend returning an error of some sort if kids is NULL.
> > >
> > > David/Lukas/Ignat, any opinions?
> 
> As I reread the original submission (somehow I never got the V2) it
> seems we're restricting a keyring with a private key?! Which indeed
> does not make sense.
> 
> > I think with a quick skim that you are right. I'll work on this area
> > for the next version.

BTW I was writing two emails simulatenously this last sentence was for 
private email discussion.

> >
> > >
> > > Thanks,
> > > --
> > > Email: Herbert Xu <herbert@gondor.apana.org.au>
> > > Home Page: http://gondor.apana.org.au/~herbert/
> > > PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
> >
> > Thanks for the review!

And this.

> >
> > BR, Jarkko
> >
> >

+1's were for the discussion. Sorry, I should not multitask with
emails...

BR, Jarkko

^ permalink raw reply

* [PATCH v9 6/6] KVM: SEV: Perform RMP optimizations on SNP guest shutdown
From: Ashish Kalra @ 2026-06-24 21:59 UTC (permalink / raw)
  To: tglx, mingo, bp, dave.hansen, x86, hpa, seanjc, peterz,
	thomas.lendacky, herbert, davem, ardb
  Cc: pbonzini, aik, Michael.Roth, KPrateek.Nayak, Tycho.Andersen,
	Nathan.Fontenot, ackerleytng, jackyli, pgonda, rientjes, jacobhxu,
	xin, pawan.kumar.gupta, babu.moger, dyoung, nikunj, john.allen,
	darwi, linux-kernel, linux-crypto, kvm, linux-coco
In-Reply-To: <cover.1782336473.git.ashish.kalra@amd.com>

From: Ashish Kalra <ashish.kalra@amd.com>

Pages are converted from shared to private as SNP guests are launched.
This destroys exisiting RMPOPT optimizations in the regions where
pages are converted.

Conversely, guest pages are converted back to shared during SNP guest
termination and their region may become eligible for RMPOPT
optimization.

To take advantage of this, perform RMPOPT after guest termination.
Do it after a delay so that a single RMPOPT pass can be done if
multiple guests terminate in a short period of time.

Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 arch/x86/kvm/svm/sev.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index e107f368ed2d..23e236b13ccd 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3005,6 +3005,16 @@ void sev_vm_destroy(struct kvm *kvm)
 		 */
 		if (snp_decommission_context(kvm))
 			return;
+
+		/*
+		 * Perform RMP optimizations on memory freed by terminating
+		 * guests.  The scan is deferred, so it normally runs after
+		 * sev_gmem_invalidate() has converted this guest's pages back to
+		 * shared, and picks them up then.  A very large guest whose
+		 * conversion has not finished by then is picked up by a later
+		 * teardown's scan.
+		 */
+		snp_rmpopt_all_physmem();
 	} else {
 		sev_unbind_asid(kvm, sev->handle);
 	}
-- 
2.43.0


^ permalink raw reply related

* [PATCH v9 5/6] x86/sev: Add interface to re-enable RMP optimizations.
From: Ashish Kalra @ 2026-06-24 21:58 UTC (permalink / raw)
  To: tglx, mingo, bp, dave.hansen, x86, hpa, seanjc, peterz,
	thomas.lendacky, herbert, davem, ardb
  Cc: pbonzini, aik, Michael.Roth, KPrateek.Nayak, Tycho.Andersen,
	Nathan.Fontenot, ackerleytng, jackyli, pgonda, rientjes, jacobhxu,
	xin, pawan.kumar.gupta, babu.moger, dyoung, nikunj, john.allen,
	darwi, linux-kernel, linux-crypto, kvm, linux-coco
In-Reply-To: <cover.1782336473.git.ashish.kalra@amd.com>

From: Ashish Kalra <ashish.kalra@amd.com>

RMPOPT table is a per-CPU table which indicates if 1GB regions of
physical memory are entirely hypervisor-owned or not.

When performing host memory accesses in hypervisor mode as well as
non-SNP guest mode, the processor may consult the RMPOPT table to
potentially skip an RMP access and improve performance.

Normal guest events clear RMP optimizations: pages are converted from
shared to private as SNP guests are launched, and large pages are split
and collapsed during guest operation -- both clear the RMPOPT
optimizations for the affected 1GB regions.  Conversely, guest pages are
converted back to shared during SNP guest termination, so those regions
may become eligible for RMPOPT optimization again.

Without some intervention, all RMP optimizations would eventually be
lost.  Add an interface to re-optimize all of physical memory.

The interface uses mod_delayed_work() instead of queue_delayed_work()
so that the delay timer is reset on each call. This provides proper
batching semantics: re-optimization runs 10 seconds after the *last*
VM termination rather than after the first. mod_delayed_work() also
re-queues work that is already in-flight, so a re-scan request
during an active scan is not silently dropped.

Reviewed-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 arch/x86/include/asm/sev.h |  2 ++
 arch/x86/virt/svm/sev.c    | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 440c813fedde..d40beafbebb6 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -662,6 +662,7 @@ static inline void snp_leak_pages(u64 pfn, unsigned int pages)
 	__snp_leak_pages(pfn, pages, true);
 }
 int snp_prepare(void);
+void snp_rmpopt_all_physmem(void);
 void snp_setup_rmpopt(void);
 void snp_clear_rmpopt_capable(void);
 void snp_disable_cpu_hotplug(void);
@@ -683,6 +684,7 @@ static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
 static inline void kdump_sev_callback(void) { }
 static inline void snp_fixup_e820_tables(void) {}
 static inline int snp_prepare(void) { return -ENODEV; }
+static inline void snp_rmpopt_all_physmem(void) {}
 static inline void snp_setup_rmpopt(void) {}
 static inline void snp_clear_rmpopt_capable(void) {}
 static inline void snp_disable_cpu_hotplug(void) {}
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 5f99cbbc6cbd..4661e5271a2d 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -743,6 +743,21 @@ static void rmpopt_work_handler(struct work_struct *work)
 	free_cpumask_var(follower_mask);
 }

+void snp_rmpopt_all_physmem(void)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_RMPOPT) || !rmpopt_capable)
+		return;
+
+	guard(mutex)(&rmpopt_wq_mutex);
+
+	if (!rmpopt_wq)
+		return;
+
+	mod_delayed_work(rmpopt_wq, &rmpopt_delayed_work,
+			 msecs_to_jiffies(RMPOPT_WORK_TIMEOUT));
+}
+EXPORT_SYMBOL_FOR_MODULES(snp_rmpopt_all_physmem, "kvm-amd");
+
 void snp_setup_rmpopt(void)
 {
 	u64 rmpopt_base;
-- 
2.43.0

^ permalink raw reply related

* [PATCH v9 4/6] x86/sev: Add support to perform RMP optimizations asynchronously
From: Ashish Kalra @ 2026-06-24 21:57 UTC (permalink / raw)
  To: tglx, mingo, bp, dave.hansen, x86, hpa, seanjc, peterz,
	thomas.lendacky, herbert, davem, ardb
  Cc: pbonzini, aik, Michael.Roth, KPrateek.Nayak, Tycho.Andersen,
	Nathan.Fontenot, ackerleytng, jackyli, pgonda, rientjes, jacobhxu,
	xin, pawan.kumar.gupta, babu.moger, dyoung, nikunj, john.allen,
	darwi, linux-kernel, linux-crypto, kvm, linux-coco
In-Reply-To: <cover.1782336473.git.ashish.kalra@amd.com>

From: Ashish Kalra <ashish.kalra@amd.com>

When SEV-SNP is enabled, all writes to memory are checked to ensure
integrity of SNP guest memory. This imposes performance overhead on the
whole system.

RMPOPT is a new instruction that minimizes the performance overhead of
RMP checks on the hypervisor and on non-SNP guests by allowing RMP
checks to be skipped for 1GB regions of memory that are known not to
contain any SEV-SNP guest memory.

Add support for performing RMP optimizations asynchronously using a
dedicated workqueue.

Enable RMPOPT optimizations for up to 2TB of system RAM starting from
the lowest physical memory address aligned down to a 1GB boundary at
RMP initialization time. RMP checks can initially be skipped for 1GB
memory ranges that do not contain SEV-SNP guest memory (excluding
preassigned pages such as the RMP table and firmware pages). As SNP
guests are launched, RMPUPDATE will disable the corresponding RMPOPT
optimizations.

Suggested-by: Thomas Lendacky <thomas.lendacky@amd.com>
Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 arch/x86/virt/svm/sev.c | 165 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 162 insertions(+), 3 deletions(-)

diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 60984f76b4e9..5f99cbbc6cbd 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -19,6 +19,7 @@
 #include <linux/iommu.h>
 #include <linux/amd-iommu.h>
 #include <linux/nospec.h>
+#include <linux/workqueue.h>
 
 #include <asm/sev.h>
 #include <asm/processor.h>
@@ -125,9 +126,20 @@ static void *rmp_bookkeeping __ro_after_init;
 static u64 probed_rmp_base, probed_rmp_size;
 
 static cpumask_var_t rmpopt_cpumask;
-static phys_addr_t rmpopt_pa_start;
+static phys_addr_t rmpopt_pa_start, rmpopt_pa_end;
 static bool rmpopt_capable;
 
+enum rmpopt_function {
+	RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS,
+	RMPOPT_FUNC_REPORT_STATUS
+};
+
+#define RMPOPT_WORK_TIMEOUT	10000
+
+static struct workqueue_struct *rmpopt_wq;
+static struct delayed_work rmpopt_delayed_work;
+static DEFINE_MUTEX(rmpopt_wq_mutex);
+
 static LIST_HEAD(snp_leaked_pages_list);
 static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
 
@@ -571,13 +583,22 @@ static void rmpopt_cleanup(void)
 {
 	int cpu;
 
+	guard(mutex)(&rmpopt_wq_mutex);
+
+	if (!rmpopt_wq)
+		return;
+
+	cancel_delayed_work_sync(&rmpopt_delayed_work);
+	destroy_workqueue(rmpopt_wq);
+
 	scoped_guard(cpus_read_lock) {
 		for_each_cpu(cpu, rmpopt_cpumask)
 			wrmsrq_on_cpu(cpu, MSR_AMD64_RMPOPT_BASE, 0);
 	}
 
 	free_cpumask_var(rmpopt_cpumask);
-	rmpopt_pa_start = 0;
+	rmpopt_pa_start = rmpopt_pa_end = 0;
+	rmpopt_wq = NULL;
 }
 
 /*
@@ -627,6 +648,101 @@ void snp_clear_rmpopt_capable(void)
 	rmpopt_capable = false;
 }
 
+/*
+ * RMPOPT: F2 0F 01 FC
+ *   Input:  RAX = system physical address (1GB aligned)
+ *           RCX = operation type
+ *   Output: CF set if the range was optimized
+ */
+static inline bool __rmpopt(u64 pa_start, u64 op_type)
+{
+	bool optimized;
+
+	asm volatile(".byte 0xf2, 0x0f, 0x01, 0xfc"
+		     : "=@ccc" (optimized)
+		     : "a" (pa_start), "c" (op_type)
+		     : "memory", "cc");
+
+	return optimized;
+}
+
+static void rmpopt(u64 pa)
+{
+	u64 pa_start = ALIGN_DOWN(pa, SZ_1G);
+	u64 op_type = RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS;
+
+	__rmpopt(pa_start, op_type);
+}
+
+/*
+ * 'val' is a system physical address.
+ */
+static void rmpopt_smp(void *val)
+{
+	rmpopt((u64)val);
+}
+
+/*
+ * RMPOPT optimizations skip RMP checks at 1GB granularity if this
+ * range of memory does not contain any SNP guest memory.
+ */
+static void rmpopt_work_handler(struct work_struct *work)
+{
+	cpumask_var_t follower_mask;
+	phys_addr_t pa;
+	int this_cpu;
+
+	pr_info("Attempt RMP optimizations on physical address range @1GB alignment [0x%016llx - 0x%016llx]\n",
+		rmpopt_pa_start, rmpopt_pa_end);
+
+	if (!alloc_cpumask_var(&follower_mask, GFP_KERNEL))
+		return;
+
+	/*
+	 * RMPOPT scans the RMP table, stores the result of the scan in the
+	 * reserved processor memory. The RMP scan is the most expensive
+	 * part. If a second RMPOPT occurs, it can skip the expensive scan
+	 * if they can see a cached result in the reserved processor memory.
+	 *
+	 * Do RMPOPT on one CPU alone. Then, follow that up with RMPOPT
+	 * on every other primary thread. Followers are "designed to"
+	 * skip the scan if they see the "cached" scan results.
+	 *
+	 * Pin the worker to the current CPU for the leader loop so that
+	 * this_cpu remains valid and the RMPOPT instruction executes on
+	 * the correct CPU.  Use migrate_disable() rather than get_cpu() to
+	 * prevent migration while still allowing preemption.
+	 */
+	migrate_disable();
+	this_cpu = smp_processor_id();
+
+	cpumask_andnot(follower_mask, rmpopt_cpumask,
+		       topology_sibling_cpumask(this_cpu));
+
+	for (pa = rmpopt_pa_start; pa < rmpopt_pa_end; pa += SZ_1G) {
+		rmpopt(pa);
+		cond_resched();
+	}
+	migrate_enable();
+
+	/*
+	 * Followers: run RMPOPT on remaining cores.  CPUs cannot go offline
+	 * while SNP is active, so the follower set stays valid across the
+	 * scan and cpus_read_lock() is uncontended.
+	 */
+	scoped_guard(cpus_read_lock) {
+		for (pa = rmpopt_pa_start; pa < rmpopt_pa_end; pa += SZ_1G) {
+			on_each_cpu_mask(follower_mask, rmpopt_smp,
+					 (void *)pa, true);
+
+			/* Give a chance for other threads to run */
+			cond_resched();
+		}
+	}
+
+	free_cpumask_var(follower_mask);
+}
+
 void snp_setup_rmpopt(void)
 {
 	u64 rmpopt_base;
@@ -635,14 +751,42 @@ void snp_setup_rmpopt(void)
 	if (!cpu_feature_enabled(X86_FEATURE_RMPOPT) || !rmpopt_capable)
 		return;
 
+	guard(mutex)(&rmpopt_wq_mutex);
+
+	/*
+	 * Guard against re-initialization.  When SNP_SHUTDOWN_EX is issued
+	 * with x86_snp_shutdown=0, snp_shutdown() is not called and
+	 * rmpopt_cleanup() is skipped, but snp_initialized is still cleared.
+	 * A subsequent __sev_snp_init_locked() would call snp_setup_rmpopt()
+	 * again, leaking the existing workqueue, delayed work, and cpumask
+	 * state.
+	 */
+	if (rmpopt_wq)
+		return;
+
+	/*
+	 * Create an RMPOPT-specific workqueue to avoid scheduling
+	 * RMPOPT workitem on the global system workqueue.
+	 */
+	rmpopt_wq = alloc_workqueue("rmpopt_wq", WQ_UNBOUND, 1);
+	if (!rmpopt_wq) {
+		pr_err("Failed to allocate RMPOPT workqueue\n");
+		return;
+	}
+
+	INIT_DELAYED_WORK(&rmpopt_delayed_work, rmpopt_work_handler);
+
 	if (!zalloc_cpumask_var(&rmpopt_cpumask, GFP_KERNEL)) {
 		pr_err("Failed to allocate RMPOPT cpumask\n");
+		destroy_workqueue(rmpopt_wq);
+		rmpopt_wq = NULL;
 		return;
 	}
 
 	/*
 	 * The RMPOPT_BASE MSR is per-core, so only one thread per core needs
-	 * to set up the RMPOPT_BASE MSR.
+	 * to set up the RMPOPT_BASE MSR. Likewise, only one thread per core
+	 * needs to issue the RMPOPT instruction.
 	 *
 	 * Note: only online primary threads are included.  If a core's
 	 * primary thread is offline, that core is not covered.  CPU hotplug
@@ -665,6 +809,21 @@ void snp_setup_rmpopt(void)
 		for_each_cpu(cpu, rmpopt_cpumask)
 			wrmsrq_on_cpu(cpu, MSR_AMD64_RMPOPT_BASE, rmpopt_base);
 	}
+
+	rmpopt_pa_end = ALIGN(PFN_PHYS(max_pfn), SZ_1G);
+
+	/* Limit memory scanning to 2TB of RAM */
+	if ((rmpopt_pa_end - rmpopt_pa_start) > SZ_2T) {
+		pr_info("RMPOPT coverage limited to 2TB; memory above 0x%llx not optimized\n",
+			rmpopt_pa_start + SZ_2T);
+		rmpopt_pa_end = rmpopt_pa_start + SZ_2T;
+	}
+
+	/*
+	 * Once all per-CPU RMPOPT tables have been configured, enable RMPOPT
+	 * optimizations on all physical memory.
+	 */
+	queue_delayed_work(rmpopt_wq, &rmpopt_delayed_work, 0);
 }
 EXPORT_SYMBOL_FOR_MODULES(snp_setup_rmpopt, "ccp");
 
-- 
2.43.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox