All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] add Hisilicon accelerator DMA driver
@ 2025-08-27  9:27 Chengwen Feng
  2025-08-27  9:27 ` [PATCH 1/4] dma/acc: add probe and remove Chengwen Feng
                   ` (8 more replies)
  0 siblings, 9 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-08-27  9:27 UTC (permalink / raw)
  To: thomas, liuyonglong; +Cc: dev

This patchset adds Hisilicon accelerator DMA driver which based on
UACCE bus.

Chengwen Feng (4):
  dma/acc: add probe and remove
  dma/acc: add control path ops
  dma/acc: add data path ops
  dma/acc: add doc

 MAINTAINERS                            |   5 +
 doc/guides/dmadevs/acc.rst             |  63 +++
 doc/guides/dmadevs/index.rst           |   1 +
 doc/guides/rel_notes/release_25_11.rst |   6 +
 drivers/dma/acc/acc_dmadev.c           | 721 +++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h           | 156 ++++++
 drivers/dma/acc/meson.build            |  21 +
 drivers/dma/meson.build                |   1 +
 8 files changed, 974 insertions(+)
 create mode 100644 doc/guides/dmadevs/acc.rst
 create mode 100644 drivers/dma/acc/acc_dmadev.c
 create mode 100644 drivers/dma/acc/acc_dmadev.h
 create mode 100644 drivers/dma/acc/meson.build

-- 
2.17.1


^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH 1/4] dma/acc: add probe and remove
  2025-08-27  9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
@ 2025-08-27  9:27 ` Chengwen Feng
  2025-08-27  9:27 ` [PATCH 2/4] dma/acc: add control path ops Chengwen Feng
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-08-27  9:27 UTC (permalink / raw)
  To: thomas, liuyonglong; +Cc: dev

This patch adds probe and remove operation for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                  |   4 +
 drivers/dma/acc/acc_dmadev.c | 281 +++++++++++++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h |  53 +++++++
 drivers/dma/acc/meson.build  |  21 +++
 drivers/dma/meson.build      |   1 +
 5 files changed, 360 insertions(+)
 create mode 100644 drivers/dma/acc/acc_dmadev.c
 create mode 100644 drivers/dma/acc/acc_dmadev.h
 create mode 100644 drivers/dma/acc/meson.build

diff --git a/MAINTAINERS b/MAINTAINERS
index 7aca98c537..42717363a0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1363,6 +1363,10 @@ M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/hisilicon/
 F: doc/guides/dmadevs/hisilicon.rst
 
+HiSilicon Accelerator DMA
+M: Chengwen Feng <fengchengwen@huawei.com>
+F: drivers/dma/acc/
+
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
 T: git://dpdk.org/next/dpdk-next-net-mrvl
diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
new file mode 100644
index 0000000000..b479d52c91
--- /dev/null
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <rte_byteorder.h>
+#include <rte_eal.h>
+#include <rte_io.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+
+#include <rte_dmadev_pmd.h>
+
+#include "acc_dmadev.h"
+
+RTE_LOG_REGISTER_DEFAULT(acc_dma_logtype, INFO);
+#define RTE_LOGTYPE_ACC_DMA acc_dma_logtype
+#define ACC_DMA_LOG(level, ...) \
+	RTE_LOG_LINE_PREFIX(level, ACC_DMA, "%s(): ", __func__, __VA_ARGS__)
+#define ACC_DMA_DEV_LOG(hw, level, ...) \
+	RTE_LOG_LINE_PREFIX(level, ACC_DMA, "%s %s(): ", \
+		(hw)->data->dev_name RTE_LOG_COMMA __func__, __VA_ARGS__)
+#define ACC_DMA_DEBUG(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, DEBUG, __VA_ARGS__)
+#define ACC_DMA_INFO(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, INFO, __VA_ARGS__)
+#define ACC_DMA_WARN(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, WARNING, __VA_ARGS__)
+#define ACC_DMA_ERR(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
+
+static void
+acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
+		     uint16_t queue_id, char *dev_name, size_t size)
+{
+	memset(dev_name, 0, size);
+	(void)snprintf(dev_name, size, "%s-dma%u", uacce_dev->device.name, queue_id);
+}
+
+static int
+acc_dma_get_qp_info(struct acc_dma_dev *hw)
+{
+#define CMD_QM_GET_QP_CTX	_IOWR('H', 10, struct acc_dma_qp_contex)
+#define CMD_QM_GET_QP_INFO	_IOWR('H', 11, struct acc_dma_qp_info)
+#define QP_ALG_TYPE		2
+	struct acc_dma_qp_contex {
+		uint16_t id;
+		uint16_t qc_type;
+	} qp_ctx;
+	struct acc_dma_qp_info {
+		uint32_t sqe_size;
+		uint16_t sq_depth;
+		uint16_t cq_depth;
+		uint64_t reserved;
+	} qp_info;
+	int ret;
+
+	memset(&qp_ctx, 0, sizeof(qp_ctx));
+	qp_ctx.qc_type = QP_ALG_TYPE;
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_CTX, &qp_ctx);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "get qm qp context fail!");
+		return -EINVAL;
+	}
+	hw->sqn = qp_ctx.id;
+
+	memset(&qp_info, 0, sizeof(qp_info));
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_INFO, &qp_info);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "get qm qp info fail!");
+		return -EINVAL;
+	}
+	if ((qp_info.sq_depth & (qp_info.sq_depth - 1)) != 0) {
+		ACC_DMA_ERR(hw, "sq depth is not 2's power!");
+		return -EINVAL;
+	}
+	hw->sqe_size = qp_info.sqe_size;
+	hw->sq_depth = qp_info.sq_depth;
+	hw->cq_depth = qp_info.cq_depth;
+	hw->sq_depth_mask = hw->sq_depth - 1;
+
+	return 0;
+}
+
+static int
+acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
+{
+	char name[RTE_DEV_NAME_MAX_LEN];
+	struct rte_dma_dev *dev;
+	struct acc_dma_dev *hw;
+	int ret;
+
+	acc_dma_gen_dev_name(uacce_dev, queue_id, name, sizeof(name));
+	dev = rte_dma_pmd_allocate(name, uacce_dev->device.numa_node,
+				   sizeof(struct acc_dma_dev));
+	if (dev == NULL) {
+		ACC_DMA_LOG(ERR, "%s allocate dmadev fail!", name);
+		return -EINVAL;
+	}
+
+	dev->device = &uacce_dev->device;
+	dev->fp_obj->dev_private = dev->data->dev_private;
+
+	hw = dev->data->dev_private;
+	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
+
+	ret = rte_uacce_queue_alloc(uacce_dev, &hw->qctx);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "alloc queue fail!");
+		goto release_dma_pmd;
+	}
+
+	ret = acc_dma_get_qp_info(hw);
+	if (ret != 0)
+		goto free_uacce_queue;
+
+	hw->io_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	if (hw->io_base == NULL) {
+		ACC_DMA_ERR(hw, "mmap MMIO region fail!");
+		ret = -EINVAL;
+		goto free_uacce_queue;
+	}
+	hw->doorbell_reg = (void *)((uintptr_t)hw->io_base + 0x1000);
+
+	hw->dus_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	if (hw->dus_base == NULL) {
+		ACC_DMA_ERR(hw, "mmap DUS region fail!");
+		ret = -EINVAL;
+		goto unmap_mmio;
+	}
+	hw->sqe = hw->dus_base;
+	hw->cqe = (void *)((uintptr_t)hw->dus_base + hw->sqe_size * hw->sq_depth);
+	hw->sq_status = (uint32_t *)((uintptr_t)hw->dus_base +
+			uacce_dev->qfrt_sz[RTE_UACCE_QFRT_DUS] - sizeof(uint32_t));
+	hw->cq_status = hw->sq_status - 1;
+
+	hw->status = rte_zmalloc_socket(NULL, sizeof(uint16_t) * hw->sq_depth,
+					RTE_CACHE_LINE_SIZE, uacce_dev->numa_node);
+	if (hw->status == NULL) {
+		ACC_DMA_ERR(hw, "malloc status region fail!");
+		ret = -ENOMEM;
+		goto unmap_dus;
+	}
+
+	dev->state = RTE_DMA_DEV_READY;
+	ACC_DMA_DEBUG(hw, "create dmadev %s success!", name);
+
+	return 0;
+
+unmap_dus:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+unmap_mmio:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+free_uacce_queue:
+	rte_uacce_queue_free(&hw->qctx);
+release_dma_pmd:
+	rte_dma_pmd_release(name);
+	return ret;
+}
+
+static int
+acc_dma_parse_queues(const char *key, const char *value, void *extra_args)
+{
+	struct acc_dma_config *config = extra_args;
+	uint64_t val;
+	char *end;
+
+	RTE_SET_USED(key);
+
+	errno = 0;
+	val = strtoull(value, &end, 0);
+	if (errno == ERANGE || value == end || *end != '\0' || val == 0) {
+		ACC_DMA_LOG(ERR, "%s invalid queues! set to default one queue!",
+			    config->dev->name);
+		config->queues = ACC_DMA_DEFAULT_QUEUES;
+	} else if (val > config->avail_queues) {
+		ACC_DMA_LOG(WARNING, "%s exceed available queues! set to available queues",
+			     config->dev->name);
+		config->queues = config->avail_queues;
+	} else {
+		config->queues = val;
+	}
+
+	return 0;
+}
+
+static int
+acc_dma_parse_devargs(struct rte_uacce_device *uacce_dev, struct acc_dma_config *config)
+{
+	struct rte_kvargs *kvlist;
+	int avail_queues;
+
+	avail_queues = rte_uacce_avail_queues(uacce_dev);
+	if (avail_queues <= 0) {
+		ACC_DMA_LOG(ERR, "%s don't have available queues!", uacce_dev->name);
+		return -1;
+	}
+	config->dev = uacce_dev;
+	config->avail_queues = avail_queues <= UINT16_MAX ? avail_queues : UINT16_MAX;
+
+	if (uacce_dev->device.devargs == NULL)
+		return 0;
+
+	kvlist = rte_kvargs_parse(uacce_dev->device.devargs->args, NULL);
+	if (kvlist == NULL)
+		return 0;
+
+	(void)rte_kvargs_process(kvlist, ACC_DMA_DEVARG_QUEUES, &acc_dma_parse_queues, config);
+
+	rte_kvargs_free(kvlist);
+
+	return 0;
+}
+
+static int
+acc_dma_probe(struct rte_uacce_driver *dr, struct rte_uacce_device *uacce_dev)
+{
+	struct acc_dma_config config = { .queues = ACC_DMA_DEFAULT_QUEUES };
+	int ret = 0;
+	uint32_t i;
+
+	RTE_SET_USED(dr);
+
+	ret = acc_dma_parse_devargs(uacce_dev, &config);
+	if (ret != 0)
+		return ret;
+
+	for (i = 0; i < config.queues; i++) {
+		ret = acc_dma_create(uacce_dev, i);
+		if (ret != 0) {
+			ACC_DMA_LOG(ERR, "%s create dmadev No.%u failed!", uacce_dev->name, i);
+			break;
+		}
+	}
+
+	if (ret != 0 && i > 0) {
+		ACC_DMA_LOG(WARNING, "%s probed %u dmadev, can't probe more!", uacce_dev->name, i);
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int
+acc_dma_remove(struct rte_uacce_device *uacce_dev)
+{
+	struct rte_dma_info info;
+	int i = 0;
+	int ret;
+
+	RTE_DMA_FOREACH_DEV(i) {
+		ret = rte_dma_info_get(i, &info);
+		if (ret != 0)
+			continue;
+		if (strncmp(info.dev_name, uacce_dev->device.name,
+			    strlen(uacce_dev->device.name)) == 0)
+			rte_dma_pmd_release(info.dev_name);
+	}
+
+	return 0;
+}
+
+static const struct rte_uacce_id acc_dma_id_table[] = {
+	{ "hisi_qm_v5", "udma" },
+	{ .dev_api = NULL, },
+};
+
+static struct rte_uacce_driver acc_dma_pmd_drv = {
+	.id_table = acc_dma_id_table,
+	.probe = acc_dma_probe,
+	.remove = acc_dma_remove,
+};
+
+RTE_PMD_REGISTER_UACCE(dma_acc, acc_dma_pmd_drv);
+RTE_PMD_REGISTER_PARAM_STRING(dma_acc,
+			ACC_DMA_DEVARG_QUEUES "=<uint16> ");
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
new file mode 100644
index 0000000000..ce613541c0
--- /dev/null
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#ifndef ACC_DMADEV_H
+#define ACC_DMADEV_H
+
+#include <bus_uacce_driver.h>
+#include <rte_bitops.h>
+#include <rte_common.h>
+#include <rte_dmadev_pmd.h>
+
+#define ACC_DMA_DEVARG_QUEUES		"queues"
+#define ACC_DMA_DEFAULT_QUEUES		1
+
+struct acc_dma_config {
+	uint16_t queues;
+
+	/* The following fields are config contexts. */
+	struct rte_uacce_device *dev;
+	uint16_t avail_queues;
+};
+
+struct acc_dma_sqe {};
+struct acc_dma_cqe {};
+
+struct acc_dma_dev {
+	struct acc_dma_sqe *sqe;
+	struct acc_dma_cqe *cqe;
+	uint16_t *status;             /* the completion status array of SQEs. */
+
+	volatile void *doorbell_reg;  /**< register address for doorbell. */
+	volatile uint32_t *sq_status; /**< SQ status pointer. */
+	volatile uint32_t *cq_status; /**< CQ status pointer. */
+
+	uint16_t sqn;           /**< SQ global number, inited when created. */
+	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
+
+	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+
+	/**
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	struct rte_dma_dev_data *data;
+	struct rte_uacce_qcontex qctx;
+	void *io_base;
+	void *dus_base;
+	uint32_t sqe_size;
+	uint16_t sq_depth;
+};
+
+#endif /* ACC_DMADEV_H */
diff --git a/drivers/dma/acc/meson.build b/drivers/dma/acc/meson.build
new file mode 100644
index 0000000000..8a1bad5281
--- /dev/null
+++ b/drivers/dma/acc/meson.build
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+if not is_linux
+    build = false
+    reason = 'only supported on Linux'
+    subdir_done()
+endif
+
+if (arch_subdir != 'x86' and arch_subdir != 'arm') or (not dpdk_conf.get('RTE_ARCH_64'))
+    build = false
+    reason = 'only supported on x86_64 and aarch64'
+    subdir_done()
+endif
+
+deps += ['bus_uacce', 'dmadev']
+sources = files(
+        'acc_dmadev.c',
+)
+
+require_iova_in_mbuf = false
diff --git a/drivers/dma/meson.build b/drivers/dma/meson.build
index 358132759a..eeab0ec361 100644
--- a/drivers/dma/meson.build
+++ b/drivers/dma/meson.build
@@ -2,6 +2,7 @@
 # Copyright 2021 HiSilicon Limited
 
 drivers = [
+        'acc',
         'cnxk',
         'dpaa',
         'dpaa2',
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 2/4] dma/acc: add control path ops
  2025-08-27  9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-08-27  9:27 ` [PATCH 1/4] dma/acc: add probe and remove Chengwen Feng
@ 2025-08-27  9:27 ` Chengwen Feng
  2025-08-27  9:27 ` [PATCH 3/4] dma/acc: add data " Chengwen Feng
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-08-27  9:27 UTC (permalink / raw)
  To: thomas, liuyonglong; +Cc: dev

This commit adds control path ops for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/acc/acc_dmadev.c | 156 +++++++++++++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h |  42 ++++++++++
 2 files changed, 198 insertions(+)

diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
index b479d52c91..ce2f45cedb 100644
--- a/drivers/dma/acc/acc_dmadev.c
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -34,6 +34,161 @@ RTE_LOG_REGISTER_DEFAULT(acc_dma_logtype, INFO);
 #define ACC_DMA_ERR(hw, ...) \
 	ACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
 
+static int
+acc_dma_info_get(const struct rte_dma_dev *dev,
+		 struct rte_dma_info *dev_info,
+		 uint32_t info_sz)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(info_sz);
+
+	dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
+			     RTE_DMA_CAPA_SVA |
+			     RTE_DMA_CAPA_OPS_COPY |
+			     RTE_DMA_CAPA_OPS_FILL;
+	dev_info->max_vchans = 1;
+	dev_info->max_desc = hw->sq_depth;
+	dev_info->min_desc = hw->sq_depth;
+
+	return 0;
+}
+
+static int
+acc_dma_configure(struct rte_dma_dev *dev,
+		  const struct rte_dma_conf *conf,
+		  uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+acc_dma_start(struct rte_dma_dev *dev)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+	int ret;
+
+	if (hw->started) {
+		hw->ridx = 0;
+		hw->cridx = 0;
+		return 0;
+	}
+
+	memset(hw->sqe, 0, hw->sqe_size * hw->sq_depth);
+	memset(hw->cqe, 0, sizeof(struct acc_dma_cqe) * hw->cq_depth);
+	memset(hw->status, 0, sizeof(uint16_t) * hw->sq_depth);
+	hw->ridx = 0;
+	hw->cridx = 0;
+	hw->sq_head = 0;
+	hw->sq_tail = 0;
+	hw->cq_sq_head = 0;
+	hw->avail_sqes = hw->sq_depth - ACC_DMA_SQ_GAP_NUM - 1;
+	hw->cq_head = 0;
+	hw->cqs_completed = 0;
+	hw->cqe_vld = 1;
+	hw->submitted = 0;
+	hw->completed = 0;
+	hw->errors = 0;
+	hw->invalid_lens = 0;
+	hw->qfulls = 0;
+
+	ret = rte_uacce_queue_start(&hw->qctx);
+	if (ret == 0)
+		hw->started = true;
+
+	return ret;
+}
+
+static int
+acc_dma_stop(struct rte_dma_dev *dev)
+{
+	RTE_SET_USED(dev);
+	return 0;
+}
+
+static int
+acc_dma_close(struct rte_dma_dev *dev)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+	/* The dmadev already stopped */
+	rte_free(hw->status);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	rte_uacce_queue_free(&hw->qctx);
+	return 0;
+}
+
+static int
+acc_dma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
+		    const struct rte_dma_vchan_conf *conf,
+		    uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+acc_dma_stats_get(const struct rte_dma_dev *dev, uint16_t vchan,
+		  struct rte_dma_stats *stats,
+		  uint32_t stats_sz)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(stats_sz);
+	stats->submitted = hw->submitted;
+	stats->completed = hw->completed;
+	stats->errors    = hw->errors;
+
+	return 0;
+}
+
+static int
+acc_dma_stats_reset(struct rte_dma_dev *dev, uint16_t vchan)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	hw->submitted    = 0;
+	hw->completed    = 0;
+	hw->errors       = 0;
+	hw->invalid_lens = 0;
+	hw->io_errors    = 0;
+	hw->qfulls       = 0;
+
+	return 0;
+}
+
+static int
+acc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s\n"
+		"  sqe_size: %u sq_depth: %u sq_depth_mask: %u cq_depth: %u\n",
+		hw->sqn, (*hw->sq_status != 0) ? "ERR" : "OK",
+		(*hw->cq_status != 0) ? "ERR" : "OK",
+		hw->sqe_size, hw->sq_depth, hw->sq_depth_mask, hw->cq_depth);
+	fprintf(f, "  ridx: %u cridx: %u\n"
+		"  sq_head: %u sq_tail: %u cq_sq_head: %u avail_sqes: %u\n"
+		"  cq_head: %u cqs_completed: %u cqe_vld: %u\n",
+		hw->ridx, hw->cridx,
+		hw->sq_head, hw->sq_tail, hw->cq_sq_head, hw->avail_sqes,
+		hw->cq_head, hw->cqs_completed, hw->cqe_vld);
+	fprintf(f, "  submitted: %" PRIu64 " completed: %" PRIu64 " errors: %" PRIu64
+		" invalid_lens: %" PRIu64 " io_errors: %" PRIu64 " qfulls: %" PRIu64 "\n",
+		hw->submitted, hw->completed, hw->errors, hw->invalid_lens,
+		hw->io_errors, hw->qfulls);
+
+	return 0;
+}
+
 static void
 acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
 		     uint16_t queue_id, char *dev_name, size_t size)
@@ -104,6 +259,7 @@ acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	}
 
 	dev->device = &uacce_dev->device;
+	dev->dev_ops = &acc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
 
 	hw = dev->data->dev_private;
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
index ce613541c0..b87626c244 100644
--- a/drivers/dma/acc/acc_dmadev.h
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -13,6 +13,9 @@
 #define ACC_DMA_DEVARG_QUEUES		"queues"
 #define ACC_DMA_DEFAULT_QUEUES		1
 
+#define ACC_DMA_CQ_DOORBELL_PACE	64
+#define ACC_DMA_SQ_GAP_NUM		ACC_DMA_CQ_DOORBELL_PACE
+
 struct acc_dma_config {
 	uint16_t queues;
 
@@ -36,7 +39,45 @@ struct acc_dma_dev {
 	uint16_t sqn;           /**< SQ global number, inited when created. */
 	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
 
+	uint16_t ridx;  /**< ring index which will assign to the next request. */
+	uint16_t cridx; /**< ring index which returned by completed APIs. */
+
+	/**
+	 * SQE array management fields:
+	 *
+	 *  -----------------------------------------------------
+	 *  | SQE0 | SQE1 | SQE2 |   ...  | SQEx | ... | SQEn-1 |
+	 *  -----------------------------------------------------
+	 *     ^             ^               ^
+	 *     |             |               |
+	 *   sq_head     cq_sq_head       sq_tail
+	 *
+	 *  sq_head: index to the oldest completed request, this filed was
+	 *           updated by completed* APIs.
+	 *  sq_tail: index of the next new request, this field was updated by
+	 *           copy API.
+	 *  cq_sq_head: next index of index that has been completed by hardware,
+	 *              this filed was updated by completed* APIs.
+	 *
+	 *  [sq_head, cq_sq_head): the SQEs that hardware already completed.
+	 *  [cq_sq_head, sq_tail): the SQEs that hardware processing.
+	 */
+	uint16_t sq_head;
+	uint16_t sq_tail;
+	uint16_t cq_sq_head;
+	uint16_t avail_sqes;
+
 	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+	uint16_t cq_head;       /**< CQ index for next scans. */
+	uint16_t cqs_completed; /**< accumulated number of completed CQs. */
+	uint8_t  cqe_vld;       /**< valid bit for CQE, will change for every round. */
+
+	uint64_t submitted;
+	uint64_t completed;
+	uint64_t errors;
+	uint64_t invalid_lens;
+	uint64_t io_errors;
+	uint64_t qfulls;
 
 	/**
 	 * The following fields are not accessed in the I/O path, so they are
@@ -48,6 +89,7 @@ struct acc_dma_dev {
 	void *dus_base;
 	uint32_t sqe_size;
 	uint16_t sq_depth;
+	bool started;
 };
 
 #endif /* ACC_DMADEV_H */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 3/4] dma/acc: add data path ops
  2025-08-27  9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-08-27  9:27 ` [PATCH 1/4] dma/acc: add probe and remove Chengwen Feng
  2025-08-27  9:27 ` [PATCH 2/4] dma/acc: add control path ops Chengwen Feng
@ 2025-08-27  9:27 ` Chengwen Feng
  2025-08-27  9:27 ` [PATCH 4/4] dma/acc: add doc Chengwen Feng
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-08-27  9:27 UTC (permalink / raw)
  To: thomas, liuyonglong; +Cc: dev

This commit adds data path ops for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/acc/acc_dmadev.c | 284 +++++++++++++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h |  65 +++++++-
 2 files changed, 347 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
index ce2f45cedb..12201ba571 100644
--- a/drivers/dma/acc/acc_dmadev.c
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -189,6 +189,284 @@ acc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
 	return 0;
 }
 
+static inline void
+acc_dma_sq_doorbell(struct acc_dma_dev *hw)
+{
+#define SQ_CMD	0ull
+	uint64_t doorbell = (uint64_t)hw->sqn | (SQ_CMD << 12) |
+			    (((uint64_t)hw->sq_tail) << 32);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static int
+acc_dma_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,
+	     uint32_t length, uint64_t flags)
+{
+	struct acc_dma_dev *hw = dev_private;
+	struct acc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(length > ACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = ACC_DMA_SQE_TYPE;
+	sqe->task_type     = ACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = ACC_DMA_DATA_MEMCPY;
+	sqe->init_val      = 0;
+	sqe->addr_array    = src;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = ACC_DMA_SVA_PREFETCH_EN;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		acc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+acc_dma_fill(void *dev_private, uint16_t vchan, uint64_t pattern,
+	     rte_iova_t dst, uint32_t length, uint64_t flags)
+{
+	struct acc_dma_dev *hw = dev_private;
+	struct acc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(length > ACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = ACC_DMA_SQE_TYPE;
+	sqe->task_type     = ACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = ACC_DMA_DATA_MEMSET;
+	sqe->init_val      = pattern;
+	sqe->addr_array    = 0;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = ACC_DMA_SVA_PREFETCH_EN;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		acc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+acc_dma_submit(void *dev_private, uint16_t vchan)
+{
+	struct acc_dma_dev *hw = dev_private;
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	acc_dma_sq_doorbell(hw);
+
+	return 0;
+}
+
+static inline void
+acc_dma_cq_doorbell(struct acc_dma_dev *hw)
+{
+#define CQ_CMD	1ull
+	uint64_t doorbell = (uint64_t)hw->sqn | (CQ_CMD << 12) |
+			    (((uint64_t)hw->cq_head) << 32);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static inline void
+acc_dma_scan_cq(struct acc_dma_dev *hw)
+{
+	volatile struct acc_dma_cqe *cqe;
+	struct acc_dma_sqe *sqe;
+	uint16_t csq_head = hw->cq_sq_head;
+	uint16_t cq_head = hw->cq_head;
+	uint16_t count = 0;
+	uint64_t misc;
+
+	if (unlikely(*hw->cq_status != 0)) {
+		hw->io_errors++;
+		return;
+	}
+
+	while (count < hw->cq_depth) {
+		cqe = &hw->cqe[cq_head];
+		misc = cqe->misc;
+		misc = rte_le_to_cpu_64(misc);
+		if (RTE_FIELD_GET64(ACC_DMA_CQE_VALID_B, misc) != hw->cqe_vld)
+			break;
+
+		csq_head = RTE_FIELD_GET64(ACC_DMA_SQ_HEAD_MASK, misc);
+		if (unlikely(csq_head > hw->sq_depth_mask)) {
+			/**
+			 * Defensive programming to prevent overflow of the
+			 * status array indexed by csq_head. Only error logs
+			 * are used for prompting.
+			 */
+			ACC_DMA_ERR(hw, "invalid csq_head: %u!", csq_head);
+			count = 0;
+			break;
+		}
+		sqe = &hw->sqe[csq_head];
+		if (sqe->done_flag != ACC_DMA_TASK_DONE ||
+			sqe->err_type || sqe->ext_err_type || sqe->wtype) {
+			hw->status[csq_head] = RTE_DMA_STATUS_ERROR_UNKNOWN;
+		}
+
+		count++;
+		cq_head++;
+		if (cq_head == hw->cq_depth) {
+			hw->cqe_vld = !hw->cqe_vld;
+			cq_head = 0;
+		}
+	}
+
+	if (count == 0)
+		return;
+
+	hw->cq_head = cq_head;
+	hw->cq_sq_head = (csq_head + 1) & hw->sq_depth_mask;
+	hw->avail_sqes += count;
+	hw->cqs_completed += count;
+	if (hw->cqs_completed >= ACC_DMA_CQ_DOORBELL_PACE) {
+		acc_dma_cq_doorbell(hw);
+		hw->cqs_completed = 0;
+	}
+}
+
+static inline uint16_t
+acc_dma_calc_cpls(struct acc_dma_dev *hw, const uint16_t nb_cpls)
+{
+	uint16_t cpl_num;
+
+	if (hw->cq_sq_head >= hw->sq_head)
+		cpl_num = hw->cq_sq_head - hw->sq_head;
+	else
+		cpl_num = hw->sq_depth_mask + 1 - hw->sq_head + hw->cq_sq_head;
+
+	if (cpl_num > nb_cpls)
+		cpl_num = nb_cpls;
+
+	return cpl_num;
+}
+
+static uint16_t
+acc_dma_completed(void *dev_private,
+		  uint16_t vchan, const uint16_t nb_cpls,
+		  uint16_t *last_idx, bool *has_error)
+{
+	struct acc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	acc_dma_scan_cq(hw);
+
+	cpl_num = acc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		if (hw->status[sq_head]) {
+			*has_error = true;
+			break;
+		}
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + i - 1;
+	if (i > 0) {
+		hw->cridx += i;
+		hw->sq_head = sq_head;
+		hw->completed += i;
+	}
+
+	return i;
+}
+
+static uint16_t
+acc_dma_completed_status(void *dev_private,
+			 uint16_t vchan, const uint16_t nb_cpls,
+			 uint16_t *last_idx, enum rte_dma_status_code *status)
+{
+	struct acc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	acc_dma_scan_cq(hw);
+
+	cpl_num = acc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		status[i] = hw->status[sq_head];
+		hw->errors += !!status[i];
+		hw->status[sq_head] = 0;
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + cpl_num - 1;
+	if (likely(cpl_num > 0)) {
+		hw->cridx += cpl_num;
+		hw->sq_head = sq_head;
+		hw->completed += cpl_num;
+	}
+
+	return cpl_num;
+}
+
+static uint16_t
+acc_dma_burst_capacity(const void *dev_private, uint16_t vchan)
+{
+	const struct acc_dma_dev *hw = dev_private;
+	RTE_SET_USED(vchan);
+	return hw->avail_sqes;
+}
+
+static const struct rte_dma_dev_ops acc_dmadev_ops = {
+	.dev_info_get     = acc_dma_info_get,
+	.dev_configure    = acc_dma_configure,
+	.dev_start        = acc_dma_start,
+	.dev_stop         = acc_dma_stop,
+	.dev_close        = acc_dma_close,
+	.vchan_setup      = acc_dma_vchan_setup,
+	.stats_get        = acc_dma_stats_get,
+	.stats_reset      = acc_dma_stats_reset,
+	.dev_dump         = acc_dma_dump,
+};
+
 static void
 acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
 		     uint16_t queue_id, char *dev_name, size_t size)
@@ -261,6 +539,12 @@ acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	dev->device = &uacce_dev->device;
 	dev->dev_ops = &acc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
+	dev->fp_obj->copy = acc_dma_copy;
+	dev->fp_obj->fill = acc_dma_fill;
+	dev->fp_obj->submit = acc_dma_submit;
+	dev->fp_obj->completed = acc_dma_completed;
+	dev->fp_obj->completed_status = acc_dma_completed_status;
+	dev->fp_obj->burst_capacity = acc_dma_burst_capacity;
 
 	hw = dev->data->dev_private;
 	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
index b87626c244..2055e968f6 100644
--- a/drivers/dma/acc/acc_dmadev.h
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -24,8 +24,69 @@ struct acc_dma_config {
 	uint16_t avail_queues;
 };
 
-struct acc_dma_sqe {};
-struct acc_dma_cqe {};
+#define ACC_DMA_TASK_TYPE		0x3
+#define ACC_DMA_SQE_TYPE		0x1
+#define ACC_DMA_SVA_PREFETCH_EN		RTE_BIT32(15)
+#define ACC_DMA_MAX_OP_SIZE		(RTE_BIT32(24) - 1)
+
+enum {
+	ACC_DMA_DATA_MEMCPY = 0,
+	ACC_DMA_DATA_MEMSET = 7,
+};
+
+enum {
+	ACC_DMA_TASK_DONE = 1,
+	ACC_DMA_TASK_ERROR,
+};
+
+struct acc_dma_sqe {
+	uint32_t bd_type : 6;
+	uint32_t resv1 : 2;
+	uint32_t task_type : 6;
+	uint32_t resv2 : 2;
+	uint32_t task_type_ext : 6;
+	uint32_t resv3 : 9;
+	uint32_t bd_invlid : 1;
+	uint32_t rsv4[2];
+	uint32_t low_tag;
+	uint32_t hi_tag;
+	/* The number of bytes to be copied or filled for single address. */
+	uint32_t data_size;
+	uint32_t rsv5;
+	/*
+	 * 0 ~ 13 bits: reserved,
+	 * 14 bit: single address or multi addresses,
+	 * 15 bit: sva prefetch en.
+	 */
+	uint16_t dw0;
+	/*
+	 * 0 ~5 bits: reserved,
+	 * 6 ~ 13 bits: address num,
+	 * 14 ~15 bits: reserved.
+	 */
+	uint16_t dw1;
+	uint64_t init_val;
+	uint32_t rsv6[12];
+	/* dst addr for single address task. */
+	uint64_t dst_addr;
+	uint32_t rsv7[2];
+	/* src addr for single address task, addr array for multi addresses. */
+	uint64_t addr_array;
+	uint32_t done_flag : 3;
+	uint32_t rsv8 : 1;
+	uint32_t ext_err_type : 12;
+	uint32_t err_type : 8;
+	uint32_t wtype : 8;
+	uint32_t rsv9[3];
+};
+
+#define ACC_DMA_SQ_HEAD_MASK		RTE_GENMASK64(15, 0)
+#define ACC_DMA_CQE_VALID_B		RTE_BIT64(48)
+
+struct acc_dma_cqe {
+	uint64_t rsv;
+	uint64_t misc;
+};
 
 struct acc_dma_dev {
 	struct acc_dma_sqe *sqe;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH 4/4] dma/acc: add doc
  2025-08-27  9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                   ` (2 preceding siblings ...)
  2025-08-27  9:27 ` [PATCH 3/4] dma/acc: add data " Chengwen Feng
@ 2025-08-27  9:27 ` Chengwen Feng
  2025-09-08  2:39 ` [PATCH v2 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-08-27  9:27 UTC (permalink / raw)
  To: thomas, liuyonglong; +Cc: dev

This commit adds document for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                            |  1 +
 doc/guides/dmadevs/acc.rst             | 63 ++++++++++++++++++++++++++
 doc/guides/dmadevs/index.rst           |  1 +
 doc/guides/rel_notes/release_25_11.rst |  6 +++
 4 files changed, 71 insertions(+)
 create mode 100644 doc/guides/dmadevs/acc.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 42717363a0..ca3a8a421b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1366,6 +1366,7 @@ F: doc/guides/dmadevs/hisilicon.rst
 HiSilicon Accelerator DMA
 M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/acc/
+F: doc/guides/dmadevs/acc.rst
 
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
diff --git a/doc/guides/dmadevs/acc.rst b/doc/guides/dmadevs/acc.rst
new file mode 100644
index 0000000000..41fc24877f
--- /dev/null
+++ b/doc/guides/dmadevs/acc.rst
@@ -0,0 +1,63 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+HISILICON Accelerator DMA Driver
+================================
+
+Kunpeng SoC has an internal accelerator unit which includes zip function, and
+the zip also supports data copy and fill. This driver exposes this capability to
+DPDK application.
+
+
+Supported Kunpeng SoCs
+----------------------
+
+* Kunpeng 920
+
+
+Device Setup
+-------------
+
+In order to use the device in DPDK, user should insmod uacce.ko, hisi_qm.ko and
+hisi_zip.ko(with module parameter uacce_mode=1), then there will be several
+subdirectories whose names start with hisi_zip in /sys/class/uacce/ directory.
+
+Device Probing and Initialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+User should use following method to probe device::
+
+        $ dpdk-app -a uacce:hisi_zip-0,queues=2 ...
+
+hisi_zip-0 is the directory name in the /sys/class/uacce/ directory, queues is
+runtime config parameter which indicates how many dmadevs are created.
+
+If the probe is successful, two dmadevs are created, named "hisi_zip-0-dma0"
+and "hisi_zip-0-dma1".
+
+.. note::
+        In the /sys/class/uacce/hisi_zip-x/ directory, user could query api and
+        algorithms, this driver can only match the device whose api is
+        hisi_qm_v5 and algorithms contain udma.
+
+Device Configuration
+~~~~~~~~~~~~~~~~~~~~~
+
+Configuration requirements:
+
+* ``ring_size`` obtain from UACCE API and is a fixed value.
+* Only one ``vchan`` is supported per ``dmadev``.
+* Silent mode is not supported.
+* The transfer direction must be set to ``RTE_DMA_DIR_MEM_TO_MEM``.
+
+
+Device Datapath Capability and Limitation
+-----------------------------------------
+
+Support memory copy and fill operations.
+
+.. note::
+        Currently, the maximum size of the operation data is limited to 16MB-1B
+        in the driver. The device actually supports operations in a larger data
+        size, but the driver requires complex operations in the datapth. If you
+        have such requirement, please contact the maintainers.
diff --git a/doc/guides/dmadevs/index.rst b/doc/guides/dmadevs/index.rst
index 15ddaf5192..dcc8c189ba 100644
--- a/doc/guides/dmadevs/index.rst
+++ b/doc/guides/dmadevs/index.rst
@@ -11,6 +11,7 @@ an application through DMA API.
    :maxdepth: 1
    :numbered:
 
+   acc
    cnxk
    dpaa
    dpaa2
diff --git a/doc/guides/rel_notes/release_25_11.rst b/doc/guides/rel_notes/release_25_11.rst
index ccad6d89ff..977f4c7e43 100644
--- a/doc/guides/rel_notes/release_25_11.rst
+++ b/doc/guides/rel_notes/release_25_11.rst
@@ -55,6 +55,12 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+    * **Add Hisilicon Accelerator DMA Driver.**
+
+    Kunpeng SoC has an internal accelerator unit which includes zip function,
+    and the zip also support data copy and fill. This driver exposes this
+    capability to DPDK application.
+
 
 Removed Items
 -------------
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v2 0/4] add Hisilicon accelerator DMA driver
  2025-08-27  9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                   ` (3 preceding siblings ...)
  2025-08-27  9:27 ` [PATCH 4/4] dma/acc: add doc Chengwen Feng
@ 2025-09-08  2:39 ` Chengwen Feng
  2025-09-08  2:39   ` [PATCH v2 1/4] dma/acc: add probe and remove Chengwen Feng
                     ` (3 more replies)
  2025-09-10  8:50 ` [PATCH v3 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                   ` (3 subsequent siblings)
  8 siblings, 4 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-09-08  2:39 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This patchset adds Hisilicon accelerator DMA driver which based on
UACCE bus.

Chengwen Feng (4):
  dma/acc: add probe and remove
  dma/acc: add control path ops
  dma/acc: add data path ops
  dma/acc: add doc

---
v2: fix magic number.
    support clean write back field when submit request.

 MAINTAINERS                            |   5 +
 doc/guides/dmadevs/acc.rst             |  63 +++
 doc/guides/dmadevs/index.rst           |   1 +
 doc/guides/rel_notes/release_25_11.rst |   6 +
 drivers/dma/acc/acc_dmadev.c           | 723 +++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h           | 168 ++++++
 drivers/dma/acc/meson.build            |  21 +
 drivers/dma/meson.build                |   1 +
 8 files changed, 988 insertions(+)
 create mode 100644 doc/guides/dmadevs/acc.rst
 create mode 100644 drivers/dma/acc/acc_dmadev.c
 create mode 100644 drivers/dma/acc/acc_dmadev.h
 create mode 100644 drivers/dma/acc/meson.build

-- 
2.17.1


^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH v2 1/4] dma/acc: add probe and remove
  2025-09-08  2:39 ` [PATCH v2 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
@ 2025-09-08  2:39   ` Chengwen Feng
  2025-09-08  2:39   ` [PATCH v2 2/4] dma/acc: add control path ops Chengwen Feng
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-09-08  2:39 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This patch adds probe and remove operation for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                  |   4 +
 drivers/dma/acc/acc_dmadev.c | 281 +++++++++++++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h |  55 +++++++
 drivers/dma/acc/meson.build  |  21 +++
 drivers/dma/meson.build      |   1 +
 5 files changed, 362 insertions(+)
 create mode 100644 drivers/dma/acc/acc_dmadev.c
 create mode 100644 drivers/dma/acc/acc_dmadev.h
 create mode 100644 drivers/dma/acc/meson.build

diff --git a/MAINTAINERS b/MAINTAINERS
index 7aca98c537..42717363a0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1363,6 +1363,10 @@ M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/hisilicon/
 F: doc/guides/dmadevs/hisilicon.rst
 
+HiSilicon Accelerator DMA
+M: Chengwen Feng <fengchengwen@huawei.com>
+F: drivers/dma/acc/
+
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
 T: git://dpdk.org/next/dpdk-next-net-mrvl
diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
new file mode 100644
index 0000000000..40348b70b8
--- /dev/null
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <rte_byteorder.h>
+#include <rte_eal.h>
+#include <rte_io.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+
+#include <rte_dmadev_pmd.h>
+
+#include "acc_dmadev.h"
+
+RTE_LOG_REGISTER_DEFAULT(acc_dma_logtype, INFO);
+#define RTE_LOGTYPE_ACC_DMA acc_dma_logtype
+#define ACC_DMA_LOG(level, ...) \
+	RTE_LOG_LINE_PREFIX(level, ACC_DMA, "%s(): ", __func__, __VA_ARGS__)
+#define ACC_DMA_DEV_LOG(hw, level, ...) \
+	RTE_LOG_LINE_PREFIX(level, ACC_DMA, "%s %s(): ", \
+		(hw)->data->dev_name RTE_LOG_COMMA __func__, __VA_ARGS__)
+#define ACC_DMA_DEBUG(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, DEBUG, __VA_ARGS__)
+#define ACC_DMA_INFO(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, INFO, __VA_ARGS__)
+#define ACC_DMA_WARN(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, WARNING, __VA_ARGS__)
+#define ACC_DMA_ERR(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
+
+static void
+acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
+		     uint16_t queue_id, char *dev_name, size_t size)
+{
+	memset(dev_name, 0, size);
+	(void)snprintf(dev_name, size, "%s-dma%u", uacce_dev->device.name, queue_id);
+}
+
+static int
+acc_dma_get_qp_info(struct acc_dma_dev *hw)
+{
+#define CMD_QM_GET_QP_CTX	_IOWR('H', 10, struct acc_dma_qp_contex)
+#define CMD_QM_GET_QP_INFO	_IOWR('H', 11, struct acc_dma_qp_info)
+#define QP_ALG_TYPE		2
+	struct acc_dma_qp_contex {
+		uint16_t id;
+		uint16_t qc_type;
+	} qp_ctx;
+	struct acc_dma_qp_info {
+		uint32_t sqe_size;
+		uint16_t sq_depth;
+		uint16_t cq_depth;
+		uint64_t reserved;
+	} qp_info;
+	int ret;
+
+	memset(&qp_ctx, 0, sizeof(qp_ctx));
+	qp_ctx.qc_type = QP_ALG_TYPE;
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_CTX, &qp_ctx);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "get qm qp context fail!");
+		return -EINVAL;
+	}
+	hw->sqn = qp_ctx.id;
+
+	memset(&qp_info, 0, sizeof(qp_info));
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_INFO, &qp_info);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "get qm qp info fail!");
+		return -EINVAL;
+	}
+	if ((qp_info.sq_depth & (qp_info.sq_depth - 1)) != 0) {
+		ACC_DMA_ERR(hw, "sq depth is not 2's power!");
+		return -EINVAL;
+	}
+	hw->sqe_size = qp_info.sqe_size;
+	hw->sq_depth = qp_info.sq_depth;
+	hw->cq_depth = qp_info.cq_depth;
+	hw->sq_depth_mask = hw->sq_depth - 1;
+
+	return 0;
+}
+
+static int
+acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
+{
+	char name[RTE_DEV_NAME_MAX_LEN];
+	struct rte_dma_dev *dev;
+	struct acc_dma_dev *hw;
+	int ret;
+
+	acc_dma_gen_dev_name(uacce_dev, queue_id, name, sizeof(name));
+	dev = rte_dma_pmd_allocate(name, uacce_dev->device.numa_node,
+				   sizeof(struct acc_dma_dev));
+	if (dev == NULL) {
+		ACC_DMA_LOG(ERR, "%s allocate dmadev fail!", name);
+		return -EINVAL;
+	}
+
+	dev->device = &uacce_dev->device;
+	dev->fp_obj->dev_private = dev->data->dev_private;
+
+	hw = dev->data->dev_private;
+	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
+
+	ret = rte_uacce_queue_alloc(uacce_dev, &hw->qctx);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "alloc queue fail!");
+		goto release_dma_pmd;
+	}
+
+	ret = acc_dma_get_qp_info(hw);
+	if (ret != 0)
+		goto free_uacce_queue;
+
+	hw->io_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	if (hw->io_base == NULL) {
+		ACC_DMA_ERR(hw, "mmap MMIO region fail!");
+		ret = -EINVAL;
+		goto free_uacce_queue;
+	}
+	hw->doorbell_reg = (void *)((uintptr_t)hw->io_base + ACC_DMA_DOORBELL_OFFSET);
+
+	hw->dus_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	if (hw->dus_base == NULL) {
+		ACC_DMA_ERR(hw, "mmap DUS region fail!");
+		ret = -EINVAL;
+		goto unmap_mmio;
+	}
+	hw->sqe = hw->dus_base;
+	hw->cqe = (void *)((uintptr_t)hw->dus_base + hw->sqe_size * hw->sq_depth);
+	hw->sq_status = (uint32_t *)((uintptr_t)hw->dus_base +
+			uacce_dev->qfrt_sz[RTE_UACCE_QFRT_DUS] - sizeof(uint32_t));
+	hw->cq_status = hw->sq_status - 1;
+
+	hw->status = rte_zmalloc_socket(NULL, sizeof(uint16_t) * hw->sq_depth,
+					RTE_CACHE_LINE_SIZE, uacce_dev->numa_node);
+	if (hw->status == NULL) {
+		ACC_DMA_ERR(hw, "malloc status region fail!");
+		ret = -ENOMEM;
+		goto unmap_dus;
+	}
+
+	dev->state = RTE_DMA_DEV_READY;
+	ACC_DMA_DEBUG(hw, "create dmadev %s success!", name);
+
+	return 0;
+
+unmap_dus:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+unmap_mmio:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+free_uacce_queue:
+	rte_uacce_queue_free(&hw->qctx);
+release_dma_pmd:
+	rte_dma_pmd_release(name);
+	return ret;
+}
+
+static int
+acc_dma_parse_queues(const char *key, const char *value, void *extra_args)
+{
+	struct acc_dma_config *config = extra_args;
+	uint64_t val;
+	char *end;
+
+	RTE_SET_USED(key);
+
+	errno = 0;
+	val = strtoull(value, &end, 0);
+	if (errno == ERANGE || value == end || *end != '\0' || val == 0) {
+		ACC_DMA_LOG(ERR, "%s invalid queues! set to default one queue!",
+			    config->dev->name);
+		config->queues = ACC_DMA_DEFAULT_QUEUES;
+	} else if (val > config->avail_queues) {
+		ACC_DMA_LOG(WARNING, "%s exceed available queues! set to available queues %u",
+			     config->dev->name, config->avail_queues);
+		config->queues = config->avail_queues;
+	} else {
+		config->queues = val;
+	}
+
+	return 0;
+}
+
+static int
+acc_dma_parse_devargs(struct rte_uacce_device *uacce_dev, struct acc_dma_config *config)
+{
+	struct rte_kvargs *kvlist;
+	int avail_queues;
+
+	avail_queues = rte_uacce_avail_queues(uacce_dev);
+	if (avail_queues <= 0) {
+		ACC_DMA_LOG(ERR, "%s don't have available queues!", uacce_dev->name);
+		return -1;
+	}
+	config->dev = uacce_dev;
+	config->avail_queues = avail_queues <= UINT16_MAX ? avail_queues : UINT16_MAX;
+
+	if (uacce_dev->device.devargs == NULL)
+		return 0;
+
+	kvlist = rte_kvargs_parse(uacce_dev->device.devargs->args, NULL);
+	if (kvlist == NULL)
+		return 0;
+
+	(void)rte_kvargs_process(kvlist, ACC_DMA_DEVARG_QUEUES, &acc_dma_parse_queues, config);
+
+	rte_kvargs_free(kvlist);
+
+	return 0;
+}
+
+static int
+acc_dma_probe(struct rte_uacce_driver *dr, struct rte_uacce_device *uacce_dev)
+{
+	struct acc_dma_config config = { .queues = ACC_DMA_DEFAULT_QUEUES };
+	int ret = 0;
+	uint32_t i;
+
+	RTE_SET_USED(dr);
+
+	ret = acc_dma_parse_devargs(uacce_dev, &config);
+	if (ret != 0)
+		return ret;
+
+	for (i = 0; i < config.queues; i++) {
+		ret = acc_dma_create(uacce_dev, i);
+		if (ret != 0) {
+			ACC_DMA_LOG(ERR, "%s create dmadev No.%u failed!", uacce_dev->name, i);
+			break;
+		}
+	}
+
+	if (ret != 0 && i > 0) {
+		ACC_DMA_LOG(WARNING, "%s probed %u dmadev, can't probe more!", uacce_dev->name, i);
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int
+acc_dma_remove(struct rte_uacce_device *uacce_dev)
+{
+	struct rte_dma_info info;
+	int i = 0;
+	int ret;
+
+	RTE_DMA_FOREACH_DEV(i) {
+		ret = rte_dma_info_get(i, &info);
+		if (ret != 0)
+			continue;
+		if (strncmp(info.dev_name, uacce_dev->device.name,
+			    strlen(uacce_dev->device.name)) == 0)
+			rte_dma_pmd_release(info.dev_name);
+	}
+
+	return 0;
+}
+
+static const struct rte_uacce_id acc_dma_id_table[] = {
+	{ "hisi_qm_v5", "udma" },
+	{ .dev_api = NULL, },
+};
+
+static struct rte_uacce_driver acc_dma_pmd_drv = {
+	.id_table = acc_dma_id_table,
+	.probe = acc_dma_probe,
+	.remove = acc_dma_remove,
+};
+
+RTE_PMD_REGISTER_UACCE(dma_acc, acc_dma_pmd_drv);
+RTE_PMD_REGISTER_PARAM_STRING(dma_acc,
+			ACC_DMA_DEVARG_QUEUES "=<uint16> ");
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
new file mode 100644
index 0000000000..9a1000fa41
--- /dev/null
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#ifndef ACC_DMADEV_H
+#define ACC_DMADEV_H
+
+#include <bus_uacce_driver.h>
+#include <rte_bitops.h>
+#include <rte_common.h>
+#include <rte_dmadev_pmd.h>
+
+#define ACC_DMA_DEVARG_QUEUES		"queues"
+#define ACC_DMA_DEFAULT_QUEUES		1
+
+struct acc_dma_config {
+	uint16_t queues;
+
+	/* The following fields are config contexts. */
+	struct rte_uacce_device *dev;
+	uint16_t avail_queues;
+};
+
+#define ACC_DMA_DOORBELL_OFFSET		0x1000u
+
+struct acc_dma_sqe {};
+struct acc_dma_cqe {};
+
+struct acc_dma_dev {
+	struct acc_dma_sqe *sqe;
+	struct acc_dma_cqe *cqe;
+	uint16_t *status;             /* the completion status array of SQEs. */
+
+	volatile void *doorbell_reg;  /**< register address for doorbell. */
+	volatile uint32_t *sq_status; /**< SQ status pointer. */
+	volatile uint32_t *cq_status; /**< CQ status pointer. */
+
+	uint16_t sqn;           /**< SQ global number, inited when created. */
+	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
+
+	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+
+	/**
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	struct rte_dma_dev_data *data;
+	struct rte_uacce_qcontex qctx;
+	void *io_base;
+	void *dus_base;
+	uint32_t sqe_size;
+	uint16_t sq_depth;
+};
+
+#endif /* ACC_DMADEV_H */
diff --git a/drivers/dma/acc/meson.build b/drivers/dma/acc/meson.build
new file mode 100644
index 0000000000..8a1bad5281
--- /dev/null
+++ b/drivers/dma/acc/meson.build
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+if not is_linux
+    build = false
+    reason = 'only supported on Linux'
+    subdir_done()
+endif
+
+if (arch_subdir != 'x86' and arch_subdir != 'arm') or (not dpdk_conf.get('RTE_ARCH_64'))
+    build = false
+    reason = 'only supported on x86_64 and aarch64'
+    subdir_done()
+endif
+
+deps += ['bus_uacce', 'dmadev']
+sources = files(
+        'acc_dmadev.c',
+)
+
+require_iova_in_mbuf = false
diff --git a/drivers/dma/meson.build b/drivers/dma/meson.build
index 358132759a..eeab0ec361 100644
--- a/drivers/dma/meson.build
+++ b/drivers/dma/meson.build
@@ -2,6 +2,7 @@
 # Copyright 2021 HiSilicon Limited
 
 drivers = [
+        'acc',
         'cnxk',
         'dpaa',
         'dpaa2',
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v2 2/4] dma/acc: add control path ops
  2025-09-08  2:39 ` [PATCH v2 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-09-08  2:39   ` [PATCH v2 1/4] dma/acc: add probe and remove Chengwen Feng
@ 2025-09-08  2:39   ` Chengwen Feng
  2025-09-08  2:39   ` [PATCH v2 3/4] dma/acc: add data " Chengwen Feng
  2025-09-08  2:39   ` [PATCH v2 4/4] dma/acc: add doc Chengwen Feng
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-09-08  2:39 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds control path ops for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/acc/acc_dmadev.c | 168 +++++++++++++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h |  42 +++++++++
 2 files changed, 210 insertions(+)

diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
index 40348b70b8..8b09dd6062 100644
--- a/drivers/dma/acc/acc_dmadev.c
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -34,6 +34,173 @@ RTE_LOG_REGISTER_DEFAULT(acc_dma_logtype, INFO);
 #define ACC_DMA_ERR(hw, ...) \
 	ACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
 
+static int
+acc_dma_info_get(const struct rte_dma_dev *dev,
+		 struct rte_dma_info *dev_info,
+		 uint32_t info_sz)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(info_sz);
+
+	dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
+			     RTE_DMA_CAPA_SVA |
+			     RTE_DMA_CAPA_OPS_COPY |
+			     RTE_DMA_CAPA_OPS_FILL;
+	dev_info->max_vchans = 1;
+	dev_info->max_desc = hw->sq_depth;
+	dev_info->min_desc = hw->sq_depth;
+
+	return 0;
+}
+
+static int
+acc_dma_configure(struct rte_dma_dev *dev,
+		  const struct rte_dma_conf *conf,
+		  uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+acc_dma_start(struct rte_dma_dev *dev)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+	int ret;
+
+	if (hw->started) {
+		hw->ridx = 0;
+		hw->cridx = 0;
+		return 0;
+	}
+
+	memset(hw->sqe, 0, hw->sqe_size * hw->sq_depth);
+	memset(hw->cqe, 0, sizeof(struct acc_dma_cqe) * hw->cq_depth);
+	memset(hw->status, 0, sizeof(uint16_t) * hw->sq_depth);
+	hw->ridx = 0;
+	hw->cridx = 0;
+	hw->sq_head = 0;
+	hw->sq_tail = 0;
+	hw->cq_sq_head = 0;
+	hw->avail_sqes = hw->sq_depth - ACC_DMA_SQ_GAP_NUM - 1;
+	hw->cq_head = 0;
+	hw->cqs_completed = 0;
+	hw->cqe_vld = 1;
+	hw->submitted = 0;
+	hw->completed = 0;
+	hw->errors = 0;
+	hw->invalid_lens = 0;
+	hw->qfulls = 0;
+
+	ret = rte_uacce_queue_start(&hw->qctx);
+	if (ret == 0)
+		hw->started = true;
+
+	return ret;
+}
+
+static int
+acc_dma_stop(struct rte_dma_dev *dev)
+{
+	RTE_SET_USED(dev);
+	return 0;
+}
+
+static int
+acc_dma_close(struct rte_dma_dev *dev)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+	/* The dmadev already stopped */
+	rte_free(hw->status);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	rte_uacce_queue_free(&hw->qctx);
+	return 0;
+}
+
+static int
+acc_dma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
+		    const struct rte_dma_vchan_conf *conf,
+		    uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+acc_dma_stats_get(const struct rte_dma_dev *dev, uint16_t vchan,
+		  struct rte_dma_stats *stats,
+		  uint32_t stats_sz)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(stats_sz);
+	stats->submitted = hw->submitted;
+	stats->completed = hw->completed;
+	stats->errors    = hw->errors;
+
+	return 0;
+}
+
+static int
+acc_dma_stats_reset(struct rte_dma_dev *dev, uint16_t vchan)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	hw->submitted    = 0;
+	hw->completed    = 0;
+	hw->errors       = 0;
+	hw->invalid_lens = 0;
+	hw->io_errors    = 0;
+	hw->qfulls       = 0;
+
+	return 0;
+}
+
+static int
+acc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s\n"
+		"  sqe_size: %u sq_depth: %u sq_depth_mask: %u cq_depth: %u\n",
+		hw->sqn, (*hw->sq_status != 0) ? "ERR" : "OK",
+		(*hw->cq_status != 0) ? "ERR" : "OK",
+		hw->sqe_size, hw->sq_depth, hw->sq_depth_mask, hw->cq_depth);
+	fprintf(f, "  ridx: %u cridx: %u\n"
+		"  sq_head: %u sq_tail: %u cq_sq_head: %u avail_sqes: %u\n"
+		"  cq_head: %u cqs_completed: %u cqe_vld: %u\n",
+		hw->ridx, hw->cridx,
+		hw->sq_head, hw->sq_tail, hw->cq_sq_head, hw->avail_sqes,
+		hw->cq_head, hw->cqs_completed, hw->cqe_vld);
+	fprintf(f, "  submitted: %" PRIu64 " completed: %" PRIu64 " errors: %" PRIu64
+		" invalid_lens: %" PRIu64 " io_errors: %" PRIu64 " qfulls: %" PRIu64 "\n",
+		hw->submitted, hw->completed, hw->errors, hw->invalid_lens,
+		hw->io_errors, hw->qfulls);
+
+	return 0;
+}
+
+static const struct rte_dma_dev_ops acc_dmadev_ops = {
+	.dev_info_get     = acc_dma_info_get,
+	.dev_configure    = acc_dma_configure,
+	.dev_start        = acc_dma_start,
+	.dev_stop         = acc_dma_stop,
+	.dev_close        = acc_dma_close,
+	.vchan_setup      = acc_dma_vchan_setup,
+	.stats_get        = acc_dma_stats_get,
+	.stats_reset      = acc_dma_stats_reset,
+	.dev_dump         = acc_dma_dump,
+};
+
 static void
 acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
 		     uint16_t queue_id, char *dev_name, size_t size)
@@ -104,6 +271,7 @@ acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	}
 
 	dev->device = &uacce_dev->device;
+	dev->dev_ops = &acc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
 
 	hw = dev->data->dev_private;
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
index 9a1000fa41..fca0ff79dd 100644
--- a/drivers/dma/acc/acc_dmadev.h
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -13,6 +13,9 @@
 #define ACC_DMA_DEVARG_QUEUES		"queues"
 #define ACC_DMA_DEFAULT_QUEUES		1
 
+#define ACC_DMA_CQ_DOORBELL_PACE	64
+#define ACC_DMA_SQ_GAP_NUM		ACC_DMA_CQ_DOORBELL_PACE
+
 struct acc_dma_config {
 	uint16_t queues;
 
@@ -38,7 +41,45 @@ struct acc_dma_dev {
 	uint16_t sqn;           /**< SQ global number, inited when created. */
 	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
 
+	uint16_t ridx;  /**< ring index which will assign to the next request. */
+	uint16_t cridx; /**< ring index which returned by completed APIs. */
+
+	/**
+	 * SQE array management fields:
+	 *
+	 *  -----------------------------------------------------
+	 *  | SQE0 | SQE1 | SQE2 |   ...  | SQEx | ... | SQEn-1 |
+	 *  -----------------------------------------------------
+	 *     ^             ^               ^
+	 *     |             |               |
+	 *   sq_head     cq_sq_head       sq_tail
+	 *
+	 *  sq_head: index to the oldest completed request, this filed was
+	 *           updated by completed* APIs.
+	 *  sq_tail: index of the next new request, this field was updated by
+	 *           copy API.
+	 *  cq_sq_head: next index of index that has been completed by hardware,
+	 *              this filed was updated by completed* APIs.
+	 *
+	 *  [sq_head, cq_sq_head): the SQEs that hardware already completed.
+	 *  [cq_sq_head, sq_tail): the SQEs that hardware processing.
+	 */
+	uint16_t sq_head;
+	uint16_t sq_tail;
+	uint16_t cq_sq_head;
+	uint16_t avail_sqes;
+
 	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+	uint16_t cq_head;       /**< CQ index for next scans. */
+	uint16_t cqs_completed; /**< accumulated number of completed CQs. */
+	uint8_t  cqe_vld;       /**< valid bit for CQE, will change for every round. */
+
+	uint64_t submitted;
+	uint64_t completed;
+	uint64_t errors;
+	uint64_t invalid_lens;
+	uint64_t io_errors;
+	uint64_t qfulls;
 
 	/**
 	 * The following fields are not accessed in the I/O path, so they are
@@ -50,6 +91,7 @@ struct acc_dma_dev {
 	void *dus_base;
 	uint32_t sqe_size;
 	uint16_t sq_depth;
+	bool started;
 };
 
 #endif /* ACC_DMADEV_H */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v2 3/4] dma/acc: add data path ops
  2025-09-08  2:39 ` [PATCH v2 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-09-08  2:39   ` [PATCH v2 1/4] dma/acc: add probe and remove Chengwen Feng
  2025-09-08  2:39   ` [PATCH v2 2/4] dma/acc: add control path ops Chengwen Feng
@ 2025-09-08  2:39   ` Chengwen Feng
  2025-09-08  2:39   ` [PATCH v2 4/4] dma/acc: add doc Chengwen Feng
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-09-08  2:39 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds data path ops for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/acc/acc_dmadev.c | 274 +++++++++++++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h |  75 +++++++++-
 2 files changed, 347 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
index 8b09dd6062..87d726f6a3 100644
--- a/drivers/dma/acc/acc_dmadev.c
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -189,6 +189,274 @@ acc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
 	return 0;
 }
 
+static inline void
+acc_dma_sq_doorbell(struct acc_dma_dev *hw)
+{
+	uint64_t doorbell = (uint64_t)(hw->sqn & ACC_DMA_DOORBELL_SQN_MASK) |
+			    (ACC_DMA_DOORBELL_SQ_CMD << ACC_DMA_DOORBELL_CMD_SHIFT) |
+			    (((uint64_t)hw->sq_tail) << ACC_DMA_DOORBELL_IDX_SHIFT);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static int
+acc_dma_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,
+	     uint32_t length, uint64_t flags)
+{
+	struct acc_dma_dev *hw = dev_private;
+	struct acc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(length > ACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = ACC_DMA_SQE_TYPE;
+	sqe->task_type     = ACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = ACC_DMA_DATA_MEMCPY;
+	sqe->init_val      = 0;
+	sqe->addr_array    = src;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = ACC_DMA_SVA_PREFETCH_EN;
+	sqe->wb_field      = 0;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		acc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+acc_dma_fill(void *dev_private, uint16_t vchan, uint64_t pattern,
+	     rte_iova_t dst, uint32_t length, uint64_t flags)
+{
+	struct acc_dma_dev *hw = dev_private;
+	struct acc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(length > ACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = ACC_DMA_SQE_TYPE;
+	sqe->task_type     = ACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = ACC_DMA_DATA_MEMSET;
+	sqe->init_val      = pattern;
+	sqe->addr_array    = 0;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = ACC_DMA_SVA_PREFETCH_EN;
+	sqe->wb_field      = 0;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		acc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+acc_dma_submit(void *dev_private, uint16_t vchan)
+{
+	struct acc_dma_dev *hw = dev_private;
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	acc_dma_sq_doorbell(hw);
+
+	return 0;
+}
+
+static inline void
+acc_dma_cq_doorbell(struct acc_dma_dev *hw)
+{
+	uint64_t doorbell = (uint64_t)(hw->sqn & ACC_DMA_DOORBELL_SQN_MASK) |
+			    (ACC_DMA_DOORBELL_CQ_CMD << ACC_DMA_DOORBELL_CMD_SHIFT) |
+			    (((uint64_t)hw->cq_head) << ACC_DMA_DOORBELL_IDX_SHIFT);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static inline void
+acc_dma_scan_cq(struct acc_dma_dev *hw)
+{
+	volatile struct acc_dma_cqe *cqe;
+	struct acc_dma_sqe *sqe;
+	uint16_t csq_head = hw->cq_sq_head;
+	uint16_t cq_head = hw->cq_head;
+	uint16_t count = 0;
+	uint64_t misc;
+
+	if (unlikely(*hw->cq_status != 0)) {
+		hw->io_errors++;
+		return;
+	}
+
+	while (count < hw->cq_depth) {
+		cqe = &hw->cqe[cq_head];
+		misc = cqe->misc;
+		misc = rte_le_to_cpu_64(misc);
+		if (RTE_FIELD_GET64(ACC_DMA_CQE_VALID_B, misc) != hw->cqe_vld)
+			break;
+
+		csq_head = RTE_FIELD_GET64(ACC_DMA_SQ_HEAD_MASK, misc);
+		if (unlikely(csq_head > hw->sq_depth_mask)) {
+			/**
+			 * Defensive programming to prevent overflow of the
+			 * status array indexed by csq_head. Only error logs
+			 * are used for prompting.
+			 */
+			ACC_DMA_ERR(hw, "invalid csq_head: %u!", csq_head);
+			count = 0;
+			break;
+		}
+		sqe = &hw->sqe[csq_head];
+		if (sqe->done_flag != ACC_DMA_TASK_DONE ||
+			sqe->err_type || sqe->ext_err_type || sqe->wtype) {
+			hw->status[csq_head] = RTE_DMA_STATUS_ERROR_UNKNOWN;
+		}
+
+		count++;
+		cq_head++;
+		if (cq_head == hw->cq_depth) {
+			hw->cqe_vld = !hw->cqe_vld;
+			cq_head = 0;
+		}
+	}
+
+	if (count == 0)
+		return;
+
+	hw->cq_head = cq_head;
+	hw->cq_sq_head = (csq_head + 1) & hw->sq_depth_mask;
+	hw->avail_sqes += count;
+	hw->cqs_completed += count;
+	if (hw->cqs_completed >= ACC_DMA_CQ_DOORBELL_PACE) {
+		acc_dma_cq_doorbell(hw);
+		hw->cqs_completed = 0;
+	}
+}
+
+static inline uint16_t
+acc_dma_calc_cpls(struct acc_dma_dev *hw, const uint16_t nb_cpls)
+{
+	uint16_t cpl_num;
+
+	if (hw->cq_sq_head >= hw->sq_head)
+		cpl_num = hw->cq_sq_head - hw->sq_head;
+	else
+		cpl_num = hw->sq_depth_mask + 1 - hw->sq_head + hw->cq_sq_head;
+
+	if (cpl_num > nb_cpls)
+		cpl_num = nb_cpls;
+
+	return cpl_num;
+}
+
+static uint16_t
+acc_dma_completed(void *dev_private,
+		  uint16_t vchan, const uint16_t nb_cpls,
+		  uint16_t *last_idx, bool *has_error)
+{
+	struct acc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	acc_dma_scan_cq(hw);
+
+	cpl_num = acc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		if (hw->status[sq_head]) {
+			*has_error = true;
+			break;
+		}
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + i - 1;
+	if (i > 0) {
+		hw->cridx += i;
+		hw->sq_head = sq_head;
+		hw->completed += i;
+	}
+
+	return i;
+}
+
+static uint16_t
+acc_dma_completed_status(void *dev_private,
+			 uint16_t vchan, const uint16_t nb_cpls,
+			 uint16_t *last_idx, enum rte_dma_status_code *status)
+{
+	struct acc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	acc_dma_scan_cq(hw);
+
+	cpl_num = acc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		status[i] = hw->status[sq_head];
+		hw->errors += !!status[i];
+		hw->status[sq_head] = 0;
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + cpl_num - 1;
+	if (likely(cpl_num > 0)) {
+		hw->cridx += cpl_num;
+		hw->sq_head = sq_head;
+		hw->completed += cpl_num;
+	}
+
+	return cpl_num;
+}
+
+static uint16_t
+acc_dma_burst_capacity(const void *dev_private, uint16_t vchan)
+{
+	const struct acc_dma_dev *hw = dev_private;
+	RTE_SET_USED(vchan);
+	return hw->avail_sqes;
+}
+
 static const struct rte_dma_dev_ops acc_dmadev_ops = {
 	.dev_info_get     = acc_dma_info_get,
 	.dev_configure    = acc_dma_configure,
@@ -273,6 +541,12 @@ acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	dev->device = &uacce_dev->device;
 	dev->dev_ops = &acc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
+	dev->fp_obj->copy = acc_dma_copy;
+	dev->fp_obj->fill = acc_dma_fill;
+	dev->fp_obj->submit = acc_dma_submit;
+	dev->fp_obj->completed = acc_dma_completed;
+	dev->fp_obj->completed_status = acc_dma_completed_status;
+	dev->fp_obj->burst_capacity = acc_dma_burst_capacity;
 
 	hw = dev->data->dev_private;
 	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
index fca0ff79dd..dd8c7e0cfd 100644
--- a/drivers/dma/acc/acc_dmadev.h
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -25,9 +25,80 @@ struct acc_dma_config {
 };
 
 #define ACC_DMA_DOORBELL_OFFSET		0x1000u
+#define ACC_DMA_DOORBELL_SQN_MASK	0x3FFu
+#define ACC_DMA_DOORBELL_SQ_CMD		0ull
+#define ACC_DMA_DOORBELL_CQ_CMD		1ull
+#define ACC_DMA_DOORBELL_CMD_SHIFT	12
+#define ACC_DMA_DOORBELL_IDX_SHIFT	32
+
+#define ACC_DMA_TASK_TYPE		0x3
+#define ACC_DMA_SQE_TYPE		0x1
+#define ACC_DMA_SVA_PREFETCH_EN		RTE_BIT32(15)
+#define ACC_DMA_MAX_OP_SIZE		(RTE_BIT32(24) - 1)
+
+enum {
+	ACC_DMA_DATA_MEMCPY = 0,
+	ACC_DMA_DATA_MEMSET = 7,
+};
+
+enum {
+	ACC_DMA_TASK_DONE = 1,
+	ACC_DMA_TASK_ERROR,
+};
 
-struct acc_dma_sqe {};
-struct acc_dma_cqe {};
+struct acc_dma_sqe {
+	uint32_t bd_type : 6;
+	uint32_t resv1 : 2;
+	uint32_t task_type : 6;
+	uint32_t resv2 : 2;
+	uint32_t task_type_ext : 6;
+	uint32_t resv3 : 9;
+	uint32_t bd_invlid : 1;
+	uint32_t rsv4[2];
+	uint32_t low_tag;
+	uint32_t hi_tag;
+	/* The number of bytes to be copied or filled for single address. */
+	uint32_t data_size;
+	uint32_t rsv5;
+	/*
+	 * 0 ~ 13 bits: reserved,
+	 * 14 bit: single address or multi addresses,
+	 * 15 bit: sva prefetch en.
+	 */
+	uint16_t dw0;
+	/*
+	 * 0 ~5 bits: reserved,
+	 * 6 ~ 13 bits: address num,
+	 * 14 ~15 bits: reserved.
+	 */
+	uint16_t dw1;
+	uint64_t init_val;
+	uint32_t rsv6[12];
+	/* dst addr for single address task. */
+	uint64_t dst_addr;
+	uint32_t rsv7[2];
+	/* src addr for single address task, addr array for multi addresses. */
+	uint64_t addr_array;
+	union {
+		uint32_t wb_field;
+		struct {
+			uint32_t done_flag : 3;
+			uint32_t rsv8 : 1;
+			uint32_t ext_err_type : 12;
+			uint32_t err_type : 8;
+			uint32_t wtype : 8;
+		};
+	};
+	uint32_t rsv9[3];
+};
+
+#define ACC_DMA_SQ_HEAD_MASK		RTE_GENMASK64(15, 0)
+#define ACC_DMA_CQE_VALID_B		RTE_BIT64(48)
+
+struct acc_dma_cqe {
+	uint64_t rsv;
+	uint64_t misc;
+};
 
 struct acc_dma_dev {
 	struct acc_dma_sqe *sqe;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v2 4/4] dma/acc: add doc
  2025-09-08  2:39 ` [PATCH v2 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                     ` (2 preceding siblings ...)
  2025-09-08  2:39   ` [PATCH v2 3/4] dma/acc: add data " Chengwen Feng
@ 2025-09-08  2:39   ` Chengwen Feng
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-09-08  2:39 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds document for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                            |  1 +
 doc/guides/dmadevs/acc.rst             | 63 ++++++++++++++++++++++++++
 doc/guides/dmadevs/index.rst           |  1 +
 doc/guides/rel_notes/release_25_11.rst |  6 +++
 4 files changed, 71 insertions(+)
 create mode 100644 doc/guides/dmadevs/acc.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 42717363a0..ca3a8a421b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1366,6 +1366,7 @@ F: doc/guides/dmadevs/hisilicon.rst
 HiSilicon Accelerator DMA
 M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/acc/
+F: doc/guides/dmadevs/acc.rst
 
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
diff --git a/doc/guides/dmadevs/acc.rst b/doc/guides/dmadevs/acc.rst
new file mode 100644
index 0000000000..41fc24877f
--- /dev/null
+++ b/doc/guides/dmadevs/acc.rst
@@ -0,0 +1,63 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+HISILICON Accelerator DMA Driver
+================================
+
+Kunpeng SoC has an internal accelerator unit which includes zip function, and
+the zip also supports data copy and fill. This driver exposes this capability to
+DPDK application.
+
+
+Supported Kunpeng SoCs
+----------------------
+
+* Kunpeng 920
+
+
+Device Setup
+-------------
+
+In order to use the device in DPDK, user should insmod uacce.ko, hisi_qm.ko and
+hisi_zip.ko(with module parameter uacce_mode=1), then there will be several
+subdirectories whose names start with hisi_zip in /sys/class/uacce/ directory.
+
+Device Probing and Initialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+User should use following method to probe device::
+
+        $ dpdk-app -a uacce:hisi_zip-0,queues=2 ...
+
+hisi_zip-0 is the directory name in the /sys/class/uacce/ directory, queues is
+runtime config parameter which indicates how many dmadevs are created.
+
+If the probe is successful, two dmadevs are created, named "hisi_zip-0-dma0"
+and "hisi_zip-0-dma1".
+
+.. note::
+        In the /sys/class/uacce/hisi_zip-x/ directory, user could query api and
+        algorithms, this driver can only match the device whose api is
+        hisi_qm_v5 and algorithms contain udma.
+
+Device Configuration
+~~~~~~~~~~~~~~~~~~~~~
+
+Configuration requirements:
+
+* ``ring_size`` obtain from UACCE API and is a fixed value.
+* Only one ``vchan`` is supported per ``dmadev``.
+* Silent mode is not supported.
+* The transfer direction must be set to ``RTE_DMA_DIR_MEM_TO_MEM``.
+
+
+Device Datapath Capability and Limitation
+-----------------------------------------
+
+Support memory copy and fill operations.
+
+.. note::
+        Currently, the maximum size of the operation data is limited to 16MB-1B
+        in the driver. The device actually supports operations in a larger data
+        size, but the driver requires complex operations in the datapth. If you
+        have such requirement, please contact the maintainers.
diff --git a/doc/guides/dmadevs/index.rst b/doc/guides/dmadevs/index.rst
index 15ddaf5192..dcc8c189ba 100644
--- a/doc/guides/dmadevs/index.rst
+++ b/doc/guides/dmadevs/index.rst
@@ -11,6 +11,7 @@ an application through DMA API.
    :maxdepth: 1
    :numbered:
 
+   acc
    cnxk
    dpaa
    dpaa2
diff --git a/doc/guides/rel_notes/release_25_11.rst b/doc/guides/rel_notes/release_25_11.rst
index 32d61691d2..56605d0a0a 100644
--- a/doc/guides/rel_notes/release_25_11.rst
+++ b/doc/guides/rel_notes/release_25_11.rst
@@ -55,6 +55,12 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+    * **Add Hisilicon Accelerator DMA Driver.**
+
+    Kunpeng SoC has an internal accelerator unit which includes zip function,
+    and the zip also support data copy and fill. This driver exposes this
+    capability to DPDK application.
+
 
 Removed Items
 -------------
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v3 0/4] add Hisilicon accelerator DMA driver
  2025-08-27  9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                   ` (4 preceding siblings ...)
  2025-09-08  2:39 ` [PATCH v2 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
@ 2025-09-10  8:50 ` Chengwen Feng
  2025-09-10  8:50   ` [PATCH v3 1/4] dma/acc: add probe and remove Chengwen Feng
                     ` (3 more replies)
  2025-10-13  9:11 ` [PATCH v4 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                   ` (2 subsequent siblings)
  8 siblings, 4 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-09-10  8:50 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This patchset adds Hisilicon accelerator DMA driver which based on
UACCE bus.

Chengwen Feng (4):
  dma/acc: add probe and remove
  dma/acc: add control path ops
  dma/acc: add data path ops
  dma/acc: add doc

---
v3: fix remove wrong dmadev by add prefix match.
    use proper errcode when get avail queue fail and dmadev alloc fail.
v2: fix magic number.
    support clean write back field when submit request.

 MAINTAINERS                            |   5 +
 doc/guides/dmadevs/acc.rst             |  63 +++
 doc/guides/dmadevs/index.rst           |   1 +
 doc/guides/rel_notes/release_25_11.rst |   6 +
 drivers/dma/acc/acc_dmadev.c           | 731 +++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h           | 168 ++++++
 drivers/dma/acc/meson.build            |  21 +
 drivers/dma/meson.build                |   1 +
 8 files changed, 996 insertions(+)
 create mode 100644 doc/guides/dmadevs/acc.rst
 create mode 100644 drivers/dma/acc/acc_dmadev.c
 create mode 100644 drivers/dma/acc/acc_dmadev.h
 create mode 100644 drivers/dma/acc/meson.build

-- 
2.17.1


^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH v3 1/4] dma/acc: add probe and remove
  2025-09-10  8:50 ` [PATCH v3 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
@ 2025-09-10  8:50   ` Chengwen Feng
  2025-09-10  8:51   ` [PATCH v3 2/4] dma/acc: add control path ops Chengwen Feng
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-09-10  8:50 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This patch adds probe and remove operation for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                  |   4 +
 drivers/dma/acc/acc_dmadev.c | 289 +++++++++++++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h |  55 +++++++
 drivers/dma/acc/meson.build  |  21 +++
 drivers/dma/meson.build      |   1 +
 5 files changed, 370 insertions(+)
 create mode 100644 drivers/dma/acc/acc_dmadev.c
 create mode 100644 drivers/dma/acc/acc_dmadev.h
 create mode 100644 drivers/dma/acc/meson.build

diff --git a/MAINTAINERS b/MAINTAINERS
index 7aca98c537..42717363a0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1363,6 +1363,10 @@ M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/hisilicon/
 F: doc/guides/dmadevs/hisilicon.rst
 
+HiSilicon Accelerator DMA
+M: Chengwen Feng <fengchengwen@huawei.com>
+F: drivers/dma/acc/
+
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
 T: git://dpdk.org/next/dpdk-next-net-mrvl
diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
new file mode 100644
index 0000000000..fb9718312e
--- /dev/null
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <rte_byteorder.h>
+#include <rte_eal.h>
+#include <rte_io.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+
+#include <rte_dmadev_pmd.h>
+
+#include "acc_dmadev.h"
+
+RTE_LOG_REGISTER_DEFAULT(acc_dma_logtype, INFO);
+#define RTE_LOGTYPE_ACC_DMA acc_dma_logtype
+#define ACC_DMA_LOG(level, ...) \
+	RTE_LOG_LINE_PREFIX(level, ACC_DMA, "%s(): ", __func__, __VA_ARGS__)
+#define ACC_DMA_DEV_LOG(hw, level, ...) \
+	RTE_LOG_LINE_PREFIX(level, ACC_DMA, "%s %s(): ", \
+		(hw)->data->dev_name RTE_LOG_COMMA __func__, __VA_ARGS__)
+#define ACC_DMA_DEBUG(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, DEBUG, __VA_ARGS__)
+#define ACC_DMA_INFO(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, INFO, __VA_ARGS__)
+#define ACC_DMA_WARN(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, WARNING, __VA_ARGS__)
+#define ACC_DMA_ERR(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
+
+static void
+acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
+		     uint16_t queue_id, char *dev_name, size_t size)
+{
+	memset(dev_name, 0, size);
+	(void)snprintf(dev_name, size, "%s-dma%u", uacce_dev->device.name, queue_id);
+}
+
+static void
+acc_dma_gen_dev_prefix(const struct rte_uacce_device *uacce_dev, char *dev_name, size_t size)
+{
+	memset(dev_name, 0, size);
+	(void)snprintf(dev_name, size, "%s-dma", uacce_dev->device.name);
+}
+
+static int
+acc_dma_get_qp_info(struct acc_dma_dev *hw)
+{
+#define CMD_QM_GET_QP_CTX	_IOWR('H', 10, struct acc_dma_qp_contex)
+#define CMD_QM_GET_QP_INFO	_IOWR('H', 11, struct acc_dma_qp_info)
+#define QP_ALG_TYPE		2
+	struct acc_dma_qp_contex {
+		uint16_t id;
+		uint16_t qc_type;
+	} qp_ctx;
+	struct acc_dma_qp_info {
+		uint32_t sqe_size;
+		uint16_t sq_depth;
+		uint16_t cq_depth;
+		uint64_t reserved;
+	} qp_info;
+	int ret;
+
+	memset(&qp_ctx, 0, sizeof(qp_ctx));
+	qp_ctx.qc_type = QP_ALG_TYPE;
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_CTX, &qp_ctx);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "get qm qp context fail!");
+		return -EINVAL;
+	}
+	hw->sqn = qp_ctx.id;
+
+	memset(&qp_info, 0, sizeof(qp_info));
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_INFO, &qp_info);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "get qm qp info fail!");
+		return -EINVAL;
+	}
+	if ((qp_info.sq_depth & (qp_info.sq_depth - 1)) != 0) {
+		ACC_DMA_ERR(hw, "sq depth is not 2's power!");
+		return -EINVAL;
+	}
+	hw->sqe_size = qp_info.sqe_size;
+	hw->sq_depth = qp_info.sq_depth;
+	hw->cq_depth = qp_info.cq_depth;
+	hw->sq_depth_mask = hw->sq_depth - 1;
+
+	return 0;
+}
+
+static int
+acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
+{
+	char name[RTE_DEV_NAME_MAX_LEN];
+	struct rte_dma_dev *dev;
+	struct acc_dma_dev *hw;
+	int ret;
+
+	acc_dma_gen_dev_name(uacce_dev, queue_id, name, sizeof(name));
+	dev = rte_dma_pmd_allocate(name, uacce_dev->device.numa_node,
+				   sizeof(struct acc_dma_dev));
+	if (dev == NULL) {
+		ACC_DMA_LOG(ERR, "%s allocate dmadev fail!", name);
+		return -ENOMEM;
+	}
+
+	dev->device = &uacce_dev->device;
+	dev->fp_obj->dev_private = dev->data->dev_private;
+
+	hw = dev->data->dev_private;
+	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
+
+	ret = rte_uacce_queue_alloc(uacce_dev, &hw->qctx);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "alloc queue fail!");
+		goto release_dma_pmd;
+	}
+
+	ret = acc_dma_get_qp_info(hw);
+	if (ret != 0)
+		goto free_uacce_queue;
+
+	hw->io_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	if (hw->io_base == NULL) {
+		ACC_DMA_ERR(hw, "mmap MMIO region fail!");
+		ret = -EINVAL;
+		goto free_uacce_queue;
+	}
+	hw->doorbell_reg = (void *)((uintptr_t)hw->io_base + ACC_DMA_DOORBELL_OFFSET);
+
+	hw->dus_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	if (hw->dus_base == NULL) {
+		ACC_DMA_ERR(hw, "mmap DUS region fail!");
+		ret = -EINVAL;
+		goto unmap_mmio;
+	}
+	hw->sqe = hw->dus_base;
+	hw->cqe = (void *)((uintptr_t)hw->dus_base + hw->sqe_size * hw->sq_depth);
+	hw->sq_status = (uint32_t *)((uintptr_t)hw->dus_base +
+			uacce_dev->qfrt_sz[RTE_UACCE_QFRT_DUS] - sizeof(uint32_t));
+	hw->cq_status = hw->sq_status - 1;
+
+	hw->status = rte_zmalloc_socket(NULL, sizeof(uint16_t) * hw->sq_depth,
+					RTE_CACHE_LINE_SIZE, uacce_dev->numa_node);
+	if (hw->status == NULL) {
+		ACC_DMA_ERR(hw, "malloc status region fail!");
+		ret = -ENOMEM;
+		goto unmap_dus;
+	}
+
+	dev->state = RTE_DMA_DEV_READY;
+	ACC_DMA_DEBUG(hw, "create dmadev %s success!", name);
+
+	return 0;
+
+unmap_dus:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+unmap_mmio:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+free_uacce_queue:
+	rte_uacce_queue_free(&hw->qctx);
+release_dma_pmd:
+	rte_dma_pmd_release(name);
+	return ret;
+}
+
+static int
+acc_dma_parse_queues(const char *key, const char *value, void *extra_args)
+{
+	struct acc_dma_config *config = extra_args;
+	uint64_t val;
+	char *end;
+
+	RTE_SET_USED(key);
+
+	errno = 0;
+	val = strtoull(value, &end, 0);
+	if (errno == ERANGE || value == end || *end != '\0' || val == 0) {
+		ACC_DMA_LOG(ERR, "%s invalid queues! set to default one queue!",
+			    config->dev->name);
+		config->queues = ACC_DMA_DEFAULT_QUEUES;
+	} else if (val > config->avail_queues) {
+		ACC_DMA_LOG(WARNING, "%s exceed available queues! set to available queues %u",
+			     config->dev->name, config->avail_queues);
+		config->queues = config->avail_queues;
+	} else {
+		config->queues = val;
+	}
+
+	return 0;
+}
+
+static int
+acc_dma_parse_devargs(struct rte_uacce_device *uacce_dev, struct acc_dma_config *config)
+{
+	struct rte_kvargs *kvlist;
+	int avail_queues;
+
+	avail_queues = rte_uacce_avail_queues(uacce_dev);
+	if (avail_queues <= 0) {
+		ACC_DMA_LOG(ERR, "%s don't have available queues!", uacce_dev->name);
+		return -EINVAL;
+	}
+	config->dev = uacce_dev;
+	config->avail_queues = avail_queues <= UINT16_MAX ? avail_queues : UINT16_MAX;
+
+	if (uacce_dev->device.devargs == NULL)
+		return 0;
+
+	kvlist = rte_kvargs_parse(uacce_dev->device.devargs->args, NULL);
+	if (kvlist == NULL)
+		return 0;
+
+	(void)rte_kvargs_process(kvlist, ACC_DMA_DEVARG_QUEUES, &acc_dma_parse_queues, config);
+
+	rte_kvargs_free(kvlist);
+
+	return 0;
+}
+
+static int
+acc_dma_probe(struct rte_uacce_driver *dr, struct rte_uacce_device *uacce_dev)
+{
+	struct acc_dma_config config = { .queues = ACC_DMA_DEFAULT_QUEUES };
+	int ret = 0;
+	uint32_t i;
+
+	RTE_SET_USED(dr);
+
+	ret = acc_dma_parse_devargs(uacce_dev, &config);
+	if (ret != 0)
+		return ret;
+
+	for (i = 0; i < config.queues; i++) {
+		ret = acc_dma_create(uacce_dev, i);
+		if (ret != 0) {
+			ACC_DMA_LOG(ERR, "%s create dmadev No.%u failed!", uacce_dev->name, i);
+			break;
+		}
+	}
+
+	if (ret != 0 && i > 0) {
+		ACC_DMA_LOG(WARNING, "%s probed %u dmadev, can't probe more!", uacce_dev->name, i);
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int
+acc_dma_remove(struct rte_uacce_device *uacce_dev)
+{
+	char name[RTE_DEV_NAME_MAX_LEN];
+	struct rte_dma_info info;
+	int i = 0;
+	int ret;
+
+	acc_dma_gen_dev_prefix(uacce_dev, name, sizeof(name));
+	RTE_DMA_FOREACH_DEV(i) {
+		ret = rte_dma_info_get(i, &info);
+		if (ret != 0)
+			continue;
+		if (strncmp(info.dev_name, name, strlen(name)) == 0)
+			rte_dma_pmd_release(info.dev_name);
+	}
+
+	return 0;
+}
+
+static const struct rte_uacce_id acc_dma_id_table[] = {
+	{ "hisi_qm_v5", "udma" },
+	{ .dev_api = NULL, },
+};
+
+static struct rte_uacce_driver acc_dma_pmd_drv = {
+	.id_table = acc_dma_id_table,
+	.probe = acc_dma_probe,
+	.remove = acc_dma_remove,
+};
+
+RTE_PMD_REGISTER_UACCE(dma_acc, acc_dma_pmd_drv);
+RTE_PMD_REGISTER_PARAM_STRING(dma_acc,
+			ACC_DMA_DEVARG_QUEUES "=<uint16> ");
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
new file mode 100644
index 0000000000..9a1000fa41
--- /dev/null
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#ifndef ACC_DMADEV_H
+#define ACC_DMADEV_H
+
+#include <bus_uacce_driver.h>
+#include <rte_bitops.h>
+#include <rte_common.h>
+#include <rte_dmadev_pmd.h>
+
+#define ACC_DMA_DEVARG_QUEUES		"queues"
+#define ACC_DMA_DEFAULT_QUEUES		1
+
+struct acc_dma_config {
+	uint16_t queues;
+
+	/* The following fields are config contexts. */
+	struct rte_uacce_device *dev;
+	uint16_t avail_queues;
+};
+
+#define ACC_DMA_DOORBELL_OFFSET		0x1000u
+
+struct acc_dma_sqe {};
+struct acc_dma_cqe {};
+
+struct acc_dma_dev {
+	struct acc_dma_sqe *sqe;
+	struct acc_dma_cqe *cqe;
+	uint16_t *status;             /* the completion status array of SQEs. */
+
+	volatile void *doorbell_reg;  /**< register address for doorbell. */
+	volatile uint32_t *sq_status; /**< SQ status pointer. */
+	volatile uint32_t *cq_status; /**< CQ status pointer. */
+
+	uint16_t sqn;           /**< SQ global number, inited when created. */
+	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
+
+	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+
+	/**
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	struct rte_dma_dev_data *data;
+	struct rte_uacce_qcontex qctx;
+	void *io_base;
+	void *dus_base;
+	uint32_t sqe_size;
+	uint16_t sq_depth;
+};
+
+#endif /* ACC_DMADEV_H */
diff --git a/drivers/dma/acc/meson.build b/drivers/dma/acc/meson.build
new file mode 100644
index 0000000000..8a1bad5281
--- /dev/null
+++ b/drivers/dma/acc/meson.build
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+if not is_linux
+    build = false
+    reason = 'only supported on Linux'
+    subdir_done()
+endif
+
+if (arch_subdir != 'x86' and arch_subdir != 'arm') or (not dpdk_conf.get('RTE_ARCH_64'))
+    build = false
+    reason = 'only supported on x86_64 and aarch64'
+    subdir_done()
+endif
+
+deps += ['bus_uacce', 'dmadev']
+sources = files(
+        'acc_dmadev.c',
+)
+
+require_iova_in_mbuf = false
diff --git a/drivers/dma/meson.build b/drivers/dma/meson.build
index 358132759a..eeab0ec361 100644
--- a/drivers/dma/meson.build
+++ b/drivers/dma/meson.build
@@ -2,6 +2,7 @@
 # Copyright 2021 HiSilicon Limited
 
 drivers = [
+        'acc',
         'cnxk',
         'dpaa',
         'dpaa2',
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v3 2/4] dma/acc: add control path ops
  2025-09-10  8:50 ` [PATCH v3 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-09-10  8:50   ` [PATCH v3 1/4] dma/acc: add probe and remove Chengwen Feng
@ 2025-09-10  8:51   ` Chengwen Feng
  2025-09-10  8:51   ` [PATCH v3 3/4] dma/acc: add data " Chengwen Feng
  2025-09-10  8:51   ` [PATCH v3 4/4] dma/acc: add doc Chengwen Feng
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-09-10  8:51 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds control path ops for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/acc/acc_dmadev.c | 168 +++++++++++++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h |  42 +++++++++
 2 files changed, 210 insertions(+)

diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
index fb9718312e..bc0a995c7a 100644
--- a/drivers/dma/acc/acc_dmadev.c
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -34,6 +34,173 @@ RTE_LOG_REGISTER_DEFAULT(acc_dma_logtype, INFO);
 #define ACC_DMA_ERR(hw, ...) \
 	ACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
 
+static int
+acc_dma_info_get(const struct rte_dma_dev *dev,
+		 struct rte_dma_info *dev_info,
+		 uint32_t info_sz)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(info_sz);
+
+	dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
+			     RTE_DMA_CAPA_SVA |
+			     RTE_DMA_CAPA_OPS_COPY |
+			     RTE_DMA_CAPA_OPS_FILL;
+	dev_info->max_vchans = 1;
+	dev_info->max_desc = hw->sq_depth;
+	dev_info->min_desc = hw->sq_depth;
+
+	return 0;
+}
+
+static int
+acc_dma_configure(struct rte_dma_dev *dev,
+		  const struct rte_dma_conf *conf,
+		  uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+acc_dma_start(struct rte_dma_dev *dev)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+	int ret;
+
+	if (hw->started) {
+		hw->ridx = 0;
+		hw->cridx = 0;
+		return 0;
+	}
+
+	memset(hw->sqe, 0, hw->sqe_size * hw->sq_depth);
+	memset(hw->cqe, 0, sizeof(struct acc_dma_cqe) * hw->cq_depth);
+	memset(hw->status, 0, sizeof(uint16_t) * hw->sq_depth);
+	hw->ridx = 0;
+	hw->cridx = 0;
+	hw->sq_head = 0;
+	hw->sq_tail = 0;
+	hw->cq_sq_head = 0;
+	hw->avail_sqes = hw->sq_depth - ACC_DMA_SQ_GAP_NUM - 1;
+	hw->cq_head = 0;
+	hw->cqs_completed = 0;
+	hw->cqe_vld = 1;
+	hw->submitted = 0;
+	hw->completed = 0;
+	hw->errors = 0;
+	hw->invalid_lens = 0;
+	hw->qfulls = 0;
+
+	ret = rte_uacce_queue_start(&hw->qctx);
+	if (ret == 0)
+		hw->started = true;
+
+	return ret;
+}
+
+static int
+acc_dma_stop(struct rte_dma_dev *dev)
+{
+	RTE_SET_USED(dev);
+	return 0;
+}
+
+static int
+acc_dma_close(struct rte_dma_dev *dev)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+	/* The dmadev already stopped */
+	rte_free(hw->status);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	rte_uacce_queue_free(&hw->qctx);
+	return 0;
+}
+
+static int
+acc_dma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
+		    const struct rte_dma_vchan_conf *conf,
+		    uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+acc_dma_stats_get(const struct rte_dma_dev *dev, uint16_t vchan,
+		  struct rte_dma_stats *stats,
+		  uint32_t stats_sz)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(stats_sz);
+	stats->submitted = hw->submitted;
+	stats->completed = hw->completed;
+	stats->errors    = hw->errors;
+
+	return 0;
+}
+
+static int
+acc_dma_stats_reset(struct rte_dma_dev *dev, uint16_t vchan)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	hw->submitted    = 0;
+	hw->completed    = 0;
+	hw->errors       = 0;
+	hw->invalid_lens = 0;
+	hw->io_errors    = 0;
+	hw->qfulls       = 0;
+
+	return 0;
+}
+
+static int
+acc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s\n"
+		"  sqe_size: %u sq_depth: %u sq_depth_mask: %u cq_depth: %u\n",
+		hw->sqn, (*hw->sq_status != 0) ? "ERR" : "OK",
+		(*hw->cq_status != 0) ? "ERR" : "OK",
+		hw->sqe_size, hw->sq_depth, hw->sq_depth_mask, hw->cq_depth);
+	fprintf(f, "  ridx: %u cridx: %u\n"
+		"  sq_head: %u sq_tail: %u cq_sq_head: %u avail_sqes: %u\n"
+		"  cq_head: %u cqs_completed: %u cqe_vld: %u\n",
+		hw->ridx, hw->cridx,
+		hw->sq_head, hw->sq_tail, hw->cq_sq_head, hw->avail_sqes,
+		hw->cq_head, hw->cqs_completed, hw->cqe_vld);
+	fprintf(f, "  submitted: %" PRIu64 " completed: %" PRIu64 " errors: %" PRIu64
+		" invalid_lens: %" PRIu64 " io_errors: %" PRIu64 " qfulls: %" PRIu64 "\n",
+		hw->submitted, hw->completed, hw->errors, hw->invalid_lens,
+		hw->io_errors, hw->qfulls);
+
+	return 0;
+}
+
+static const struct rte_dma_dev_ops acc_dmadev_ops = {
+	.dev_info_get     = acc_dma_info_get,
+	.dev_configure    = acc_dma_configure,
+	.dev_start        = acc_dma_start,
+	.dev_stop         = acc_dma_stop,
+	.dev_close        = acc_dma_close,
+	.vchan_setup      = acc_dma_vchan_setup,
+	.stats_get        = acc_dma_stats_get,
+	.stats_reset      = acc_dma_stats_reset,
+	.dev_dump         = acc_dma_dump,
+};
+
 static void
 acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
 		     uint16_t queue_id, char *dev_name, size_t size)
@@ -111,6 +278,7 @@ acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	}
 
 	dev->device = &uacce_dev->device;
+	dev->dev_ops = &acc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
 
 	hw = dev->data->dev_private;
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
index 9a1000fa41..fca0ff79dd 100644
--- a/drivers/dma/acc/acc_dmadev.h
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -13,6 +13,9 @@
 #define ACC_DMA_DEVARG_QUEUES		"queues"
 #define ACC_DMA_DEFAULT_QUEUES		1
 
+#define ACC_DMA_CQ_DOORBELL_PACE	64
+#define ACC_DMA_SQ_GAP_NUM		ACC_DMA_CQ_DOORBELL_PACE
+
 struct acc_dma_config {
 	uint16_t queues;
 
@@ -38,7 +41,45 @@ struct acc_dma_dev {
 	uint16_t sqn;           /**< SQ global number, inited when created. */
 	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
 
+	uint16_t ridx;  /**< ring index which will assign to the next request. */
+	uint16_t cridx; /**< ring index which returned by completed APIs. */
+
+	/**
+	 * SQE array management fields:
+	 *
+	 *  -----------------------------------------------------
+	 *  | SQE0 | SQE1 | SQE2 |   ...  | SQEx | ... | SQEn-1 |
+	 *  -----------------------------------------------------
+	 *     ^             ^               ^
+	 *     |             |               |
+	 *   sq_head     cq_sq_head       sq_tail
+	 *
+	 *  sq_head: index to the oldest completed request, this filed was
+	 *           updated by completed* APIs.
+	 *  sq_tail: index of the next new request, this field was updated by
+	 *           copy API.
+	 *  cq_sq_head: next index of index that has been completed by hardware,
+	 *              this filed was updated by completed* APIs.
+	 *
+	 *  [sq_head, cq_sq_head): the SQEs that hardware already completed.
+	 *  [cq_sq_head, sq_tail): the SQEs that hardware processing.
+	 */
+	uint16_t sq_head;
+	uint16_t sq_tail;
+	uint16_t cq_sq_head;
+	uint16_t avail_sqes;
+
 	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+	uint16_t cq_head;       /**< CQ index for next scans. */
+	uint16_t cqs_completed; /**< accumulated number of completed CQs. */
+	uint8_t  cqe_vld;       /**< valid bit for CQE, will change for every round. */
+
+	uint64_t submitted;
+	uint64_t completed;
+	uint64_t errors;
+	uint64_t invalid_lens;
+	uint64_t io_errors;
+	uint64_t qfulls;
 
 	/**
 	 * The following fields are not accessed in the I/O path, so they are
@@ -50,6 +91,7 @@ struct acc_dma_dev {
 	void *dus_base;
 	uint32_t sqe_size;
 	uint16_t sq_depth;
+	bool started;
 };
 
 #endif /* ACC_DMADEV_H */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v3 3/4] dma/acc: add data path ops
  2025-09-10  8:50 ` [PATCH v3 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-09-10  8:50   ` [PATCH v3 1/4] dma/acc: add probe and remove Chengwen Feng
  2025-09-10  8:51   ` [PATCH v3 2/4] dma/acc: add control path ops Chengwen Feng
@ 2025-09-10  8:51   ` Chengwen Feng
  2025-09-10  8:51   ` [PATCH v3 4/4] dma/acc: add doc Chengwen Feng
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-09-10  8:51 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds data path ops for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/acc/acc_dmadev.c | 274 +++++++++++++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h |  75 +++++++++-
 2 files changed, 347 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
index bc0a995c7a..e93354fe72 100644
--- a/drivers/dma/acc/acc_dmadev.c
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -189,6 +189,274 @@ acc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
 	return 0;
 }
 
+static inline void
+acc_dma_sq_doorbell(struct acc_dma_dev *hw)
+{
+	uint64_t doorbell = (uint64_t)(hw->sqn & ACC_DMA_DOORBELL_SQN_MASK) |
+			    (ACC_DMA_DOORBELL_SQ_CMD << ACC_DMA_DOORBELL_CMD_SHIFT) |
+			    (((uint64_t)hw->sq_tail) << ACC_DMA_DOORBELL_IDX_SHIFT);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static int
+acc_dma_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,
+	     uint32_t length, uint64_t flags)
+{
+	struct acc_dma_dev *hw = dev_private;
+	struct acc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(length > ACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = ACC_DMA_SQE_TYPE;
+	sqe->task_type     = ACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = ACC_DMA_DATA_MEMCPY;
+	sqe->init_val      = 0;
+	sqe->addr_array    = src;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = ACC_DMA_SVA_PREFETCH_EN;
+	sqe->wb_field      = 0;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		acc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+acc_dma_fill(void *dev_private, uint16_t vchan, uint64_t pattern,
+	     rte_iova_t dst, uint32_t length, uint64_t flags)
+{
+	struct acc_dma_dev *hw = dev_private;
+	struct acc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(length > ACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = ACC_DMA_SQE_TYPE;
+	sqe->task_type     = ACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = ACC_DMA_DATA_MEMSET;
+	sqe->init_val      = pattern;
+	sqe->addr_array    = 0;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = ACC_DMA_SVA_PREFETCH_EN;
+	sqe->wb_field      = 0;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		acc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+acc_dma_submit(void *dev_private, uint16_t vchan)
+{
+	struct acc_dma_dev *hw = dev_private;
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	acc_dma_sq_doorbell(hw);
+
+	return 0;
+}
+
+static inline void
+acc_dma_cq_doorbell(struct acc_dma_dev *hw)
+{
+	uint64_t doorbell = (uint64_t)(hw->sqn & ACC_DMA_DOORBELL_SQN_MASK) |
+			    (ACC_DMA_DOORBELL_CQ_CMD << ACC_DMA_DOORBELL_CMD_SHIFT) |
+			    (((uint64_t)hw->cq_head) << ACC_DMA_DOORBELL_IDX_SHIFT);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static inline void
+acc_dma_scan_cq(struct acc_dma_dev *hw)
+{
+	volatile struct acc_dma_cqe *cqe;
+	struct acc_dma_sqe *sqe;
+	uint16_t csq_head = hw->cq_sq_head;
+	uint16_t cq_head = hw->cq_head;
+	uint16_t count = 0;
+	uint64_t misc;
+
+	if (unlikely(*hw->cq_status != 0)) {
+		hw->io_errors++;
+		return;
+	}
+
+	while (count < hw->cq_depth) {
+		cqe = &hw->cqe[cq_head];
+		misc = cqe->misc;
+		misc = rte_le_to_cpu_64(misc);
+		if (RTE_FIELD_GET64(ACC_DMA_CQE_VALID_B, misc) != hw->cqe_vld)
+			break;
+
+		csq_head = RTE_FIELD_GET64(ACC_DMA_SQ_HEAD_MASK, misc);
+		if (unlikely(csq_head > hw->sq_depth_mask)) {
+			/**
+			 * Defensive programming to prevent overflow of the
+			 * status array indexed by csq_head. Only error logs
+			 * are used for prompting.
+			 */
+			ACC_DMA_ERR(hw, "invalid csq_head: %u!", csq_head);
+			count = 0;
+			break;
+		}
+		sqe = &hw->sqe[csq_head];
+		if (sqe->done_flag != ACC_DMA_TASK_DONE ||
+			sqe->err_type || sqe->ext_err_type || sqe->wtype) {
+			hw->status[csq_head] = RTE_DMA_STATUS_ERROR_UNKNOWN;
+		}
+
+		count++;
+		cq_head++;
+		if (cq_head == hw->cq_depth) {
+			hw->cqe_vld = !hw->cqe_vld;
+			cq_head = 0;
+		}
+	}
+
+	if (count == 0)
+		return;
+
+	hw->cq_head = cq_head;
+	hw->cq_sq_head = (csq_head + 1) & hw->sq_depth_mask;
+	hw->avail_sqes += count;
+	hw->cqs_completed += count;
+	if (hw->cqs_completed >= ACC_DMA_CQ_DOORBELL_PACE) {
+		acc_dma_cq_doorbell(hw);
+		hw->cqs_completed = 0;
+	}
+}
+
+static inline uint16_t
+acc_dma_calc_cpls(struct acc_dma_dev *hw, const uint16_t nb_cpls)
+{
+	uint16_t cpl_num;
+
+	if (hw->cq_sq_head >= hw->sq_head)
+		cpl_num = hw->cq_sq_head - hw->sq_head;
+	else
+		cpl_num = hw->sq_depth_mask + 1 - hw->sq_head + hw->cq_sq_head;
+
+	if (cpl_num > nb_cpls)
+		cpl_num = nb_cpls;
+
+	return cpl_num;
+}
+
+static uint16_t
+acc_dma_completed(void *dev_private,
+		  uint16_t vchan, const uint16_t nb_cpls,
+		  uint16_t *last_idx, bool *has_error)
+{
+	struct acc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	acc_dma_scan_cq(hw);
+
+	cpl_num = acc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		if (hw->status[sq_head]) {
+			*has_error = true;
+			break;
+		}
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + i - 1;
+	if (i > 0) {
+		hw->cridx += i;
+		hw->sq_head = sq_head;
+		hw->completed += i;
+	}
+
+	return i;
+}
+
+static uint16_t
+acc_dma_completed_status(void *dev_private,
+			 uint16_t vchan, const uint16_t nb_cpls,
+			 uint16_t *last_idx, enum rte_dma_status_code *status)
+{
+	struct acc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	acc_dma_scan_cq(hw);
+
+	cpl_num = acc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		status[i] = hw->status[sq_head];
+		hw->errors += !!status[i];
+		hw->status[sq_head] = 0;
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + cpl_num - 1;
+	if (likely(cpl_num > 0)) {
+		hw->cridx += cpl_num;
+		hw->sq_head = sq_head;
+		hw->completed += cpl_num;
+	}
+
+	return cpl_num;
+}
+
+static uint16_t
+acc_dma_burst_capacity(const void *dev_private, uint16_t vchan)
+{
+	const struct acc_dma_dev *hw = dev_private;
+	RTE_SET_USED(vchan);
+	return hw->avail_sqes;
+}
+
 static const struct rte_dma_dev_ops acc_dmadev_ops = {
 	.dev_info_get     = acc_dma_info_get,
 	.dev_configure    = acc_dma_configure,
@@ -280,6 +548,12 @@ acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	dev->device = &uacce_dev->device;
 	dev->dev_ops = &acc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
+	dev->fp_obj->copy = acc_dma_copy;
+	dev->fp_obj->fill = acc_dma_fill;
+	dev->fp_obj->submit = acc_dma_submit;
+	dev->fp_obj->completed = acc_dma_completed;
+	dev->fp_obj->completed_status = acc_dma_completed_status;
+	dev->fp_obj->burst_capacity = acc_dma_burst_capacity;
 
 	hw = dev->data->dev_private;
 	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
index fca0ff79dd..dd8c7e0cfd 100644
--- a/drivers/dma/acc/acc_dmadev.h
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -25,9 +25,80 @@ struct acc_dma_config {
 };
 
 #define ACC_DMA_DOORBELL_OFFSET		0x1000u
+#define ACC_DMA_DOORBELL_SQN_MASK	0x3FFu
+#define ACC_DMA_DOORBELL_SQ_CMD		0ull
+#define ACC_DMA_DOORBELL_CQ_CMD		1ull
+#define ACC_DMA_DOORBELL_CMD_SHIFT	12
+#define ACC_DMA_DOORBELL_IDX_SHIFT	32
+
+#define ACC_DMA_TASK_TYPE		0x3
+#define ACC_DMA_SQE_TYPE		0x1
+#define ACC_DMA_SVA_PREFETCH_EN		RTE_BIT32(15)
+#define ACC_DMA_MAX_OP_SIZE		(RTE_BIT32(24) - 1)
+
+enum {
+	ACC_DMA_DATA_MEMCPY = 0,
+	ACC_DMA_DATA_MEMSET = 7,
+};
+
+enum {
+	ACC_DMA_TASK_DONE = 1,
+	ACC_DMA_TASK_ERROR,
+};
 
-struct acc_dma_sqe {};
-struct acc_dma_cqe {};
+struct acc_dma_sqe {
+	uint32_t bd_type : 6;
+	uint32_t resv1 : 2;
+	uint32_t task_type : 6;
+	uint32_t resv2 : 2;
+	uint32_t task_type_ext : 6;
+	uint32_t resv3 : 9;
+	uint32_t bd_invlid : 1;
+	uint32_t rsv4[2];
+	uint32_t low_tag;
+	uint32_t hi_tag;
+	/* The number of bytes to be copied or filled for single address. */
+	uint32_t data_size;
+	uint32_t rsv5;
+	/*
+	 * 0 ~ 13 bits: reserved,
+	 * 14 bit: single address or multi addresses,
+	 * 15 bit: sva prefetch en.
+	 */
+	uint16_t dw0;
+	/*
+	 * 0 ~5 bits: reserved,
+	 * 6 ~ 13 bits: address num,
+	 * 14 ~15 bits: reserved.
+	 */
+	uint16_t dw1;
+	uint64_t init_val;
+	uint32_t rsv6[12];
+	/* dst addr for single address task. */
+	uint64_t dst_addr;
+	uint32_t rsv7[2];
+	/* src addr for single address task, addr array for multi addresses. */
+	uint64_t addr_array;
+	union {
+		uint32_t wb_field;
+		struct {
+			uint32_t done_flag : 3;
+			uint32_t rsv8 : 1;
+			uint32_t ext_err_type : 12;
+			uint32_t err_type : 8;
+			uint32_t wtype : 8;
+		};
+	};
+	uint32_t rsv9[3];
+};
+
+#define ACC_DMA_SQ_HEAD_MASK		RTE_GENMASK64(15, 0)
+#define ACC_DMA_CQE_VALID_B		RTE_BIT64(48)
+
+struct acc_dma_cqe {
+	uint64_t rsv;
+	uint64_t misc;
+};
 
 struct acc_dma_dev {
 	struct acc_dma_sqe *sqe;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v3 4/4] dma/acc: add doc
  2025-09-10  8:50 ` [PATCH v3 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                     ` (2 preceding siblings ...)
  2025-09-10  8:51   ` [PATCH v3 3/4] dma/acc: add data " Chengwen Feng
@ 2025-09-10  8:51   ` Chengwen Feng
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-09-10  8:51 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds document for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                            |  1 +
 doc/guides/dmadevs/acc.rst             | 63 ++++++++++++++++++++++++++
 doc/guides/dmadevs/index.rst           |  1 +
 doc/guides/rel_notes/release_25_11.rst |  6 +++
 4 files changed, 71 insertions(+)
 create mode 100644 doc/guides/dmadevs/acc.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 42717363a0..ca3a8a421b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1366,6 +1366,7 @@ F: doc/guides/dmadevs/hisilicon.rst
 HiSilicon Accelerator DMA
 M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/acc/
+F: doc/guides/dmadevs/acc.rst
 
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
diff --git a/doc/guides/dmadevs/acc.rst b/doc/guides/dmadevs/acc.rst
new file mode 100644
index 0000000000..41fc24877f
--- /dev/null
+++ b/doc/guides/dmadevs/acc.rst
@@ -0,0 +1,63 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+HISILICON Accelerator DMA Driver
+================================
+
+Kunpeng SoC has an internal accelerator unit which includes zip function, and
+the zip also supports data copy and fill. This driver exposes this capability to
+DPDK application.
+
+
+Supported Kunpeng SoCs
+----------------------
+
+* Kunpeng 920
+
+
+Device Setup
+-------------
+
+In order to use the device in DPDK, user should insmod uacce.ko, hisi_qm.ko and
+hisi_zip.ko(with module parameter uacce_mode=1), then there will be several
+subdirectories whose names start with hisi_zip in /sys/class/uacce/ directory.
+
+Device Probing and Initialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+User should use following method to probe device::
+
+        $ dpdk-app -a uacce:hisi_zip-0,queues=2 ...
+
+hisi_zip-0 is the directory name in the /sys/class/uacce/ directory, queues is
+runtime config parameter which indicates how many dmadevs are created.
+
+If the probe is successful, two dmadevs are created, named "hisi_zip-0-dma0"
+and "hisi_zip-0-dma1".
+
+.. note::
+        In the /sys/class/uacce/hisi_zip-x/ directory, user could query api and
+        algorithms, this driver can only match the device whose api is
+        hisi_qm_v5 and algorithms contain udma.
+
+Device Configuration
+~~~~~~~~~~~~~~~~~~~~~
+
+Configuration requirements:
+
+* ``ring_size`` obtain from UACCE API and is a fixed value.
+* Only one ``vchan`` is supported per ``dmadev``.
+* Silent mode is not supported.
+* The transfer direction must be set to ``RTE_DMA_DIR_MEM_TO_MEM``.
+
+
+Device Datapath Capability and Limitation
+-----------------------------------------
+
+Support memory copy and fill operations.
+
+.. note::
+        Currently, the maximum size of the operation data is limited to 16MB-1B
+        in the driver. The device actually supports operations in a larger data
+        size, but the driver requires complex operations in the datapth. If you
+        have such requirement, please contact the maintainers.
diff --git a/doc/guides/dmadevs/index.rst b/doc/guides/dmadevs/index.rst
index 15ddaf5192..dcc8c189ba 100644
--- a/doc/guides/dmadevs/index.rst
+++ b/doc/guides/dmadevs/index.rst
@@ -11,6 +11,7 @@ an application through DMA API.
    :maxdepth: 1
    :numbered:
 
+   acc
    cnxk
    dpaa
    dpaa2
diff --git a/doc/guides/rel_notes/release_25_11.rst b/doc/guides/rel_notes/release_25_11.rst
index efb88bbbb0..7dc66317b4 100644
--- a/doc/guides/rel_notes/release_25_11.rst
+++ b/doc/guides/rel_notes/release_25_11.rst
@@ -60,6 +60,12 @@ New Features
   Added Ethernet link speed for 800 Gb/s as it is well standardized in IEEE,
   and some devices already support this speed.
 
+* **Add Hisilicon Accelerator DMA Driver.**
+
+  Kunpeng SoC has an internal accelerator unit which includes zip function,
+  and the zip also support data copy and fill. This driver exposes this
+  capability to DPDK application.
+
 * **Updated NXP DPAA2 ethernet driver.**
 
   * Enabled software taildrop for ordered queues.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v4 0/4] add Hisilicon accelerator DMA driver
  2025-08-27  9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                   ` (5 preceding siblings ...)
  2025-09-10  8:50 ` [PATCH v3 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
@ 2025-10-13  9:11 ` Chengwen Feng
  2025-10-13  9:11   ` [PATCH v4 1/4] dma/acc: add probe and remove Chengwen Feng
                     ` (3 more replies)
  2025-10-16  1:52 ` [PATCH v5 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-10-16  2:53 ` [PATCH v6 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  8 siblings, 4 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-13  9:11 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This patchset adds Hisilicon accelerator DMA driver which based on
UACCE bus.

Chengwen Feng (4):
  dma/acc: add probe and remove
  dma/acc: add control path ops
  dma/acc: add data path ops
  dma/acc: add doc

---
v4: start/stop support detect sq/cq-status.
    add wait requests complete in stop.
v3: fix remove wrong dmadev by add prefix match.
    use proper errcode when get avail queue fail and dmadev alloc fail.
v2: fix magic number.
    support clean write back field when submit request.

 MAINTAINERS                            |   5 +
 doc/guides/dmadevs/acc.rst             |  63 ++
 doc/guides/dmadevs/index.rst           |   1 +
 doc/guides/rel_notes/release_25_11.rst |   6 +
 drivers/dma/acc/acc_dmadev.c           | 774 +++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h           | 169 ++++++
 drivers/dma/acc/meson.build            |  21 +
 drivers/dma/meson.build                |   1 +
 8 files changed, 1040 insertions(+)
 create mode 100644 doc/guides/dmadevs/acc.rst
 create mode 100644 drivers/dma/acc/acc_dmadev.c
 create mode 100644 drivers/dma/acc/acc_dmadev.h
 create mode 100644 drivers/dma/acc/meson.build

-- 
2.17.1


^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH v4 1/4] dma/acc: add probe and remove
  2025-10-13  9:11 ` [PATCH v4 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
@ 2025-10-13  9:11   ` Chengwen Feng
  2025-10-13  9:11   ` [PATCH v4 2/4] dma/acc: add control path ops Chengwen Feng
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-13  9:11 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This patch adds probe and remove operation for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                  |   4 +
 drivers/dma/acc/acc_dmadev.c | 289 +++++++++++++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h |  55 +++++++
 drivers/dma/acc/meson.build  |  21 +++
 drivers/dma/meson.build      |   1 +
 5 files changed, 370 insertions(+)
 create mode 100644 drivers/dma/acc/acc_dmadev.c
 create mode 100644 drivers/dma/acc/acc_dmadev.h
 create mode 100644 drivers/dma/acc/meson.build

diff --git a/MAINTAINERS b/MAINTAINERS
index 1a2729be66..37a5e1c476 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1366,6 +1366,10 @@ M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/hisilicon/
 F: doc/guides/dmadevs/hisilicon.rst
 
+HiSilicon Accelerator DMA
+M: Chengwen Feng <fengchengwen@huawei.com>
+F: drivers/dma/acc/
+
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
 T: git://dpdk.org/next/dpdk-next-net-mrvl
diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
new file mode 100644
index 0000000000..fb9718312e
--- /dev/null
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <rte_byteorder.h>
+#include <rte_eal.h>
+#include <rte_io.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+
+#include <rte_dmadev_pmd.h>
+
+#include "acc_dmadev.h"
+
+RTE_LOG_REGISTER_DEFAULT(acc_dma_logtype, INFO);
+#define RTE_LOGTYPE_ACC_DMA acc_dma_logtype
+#define ACC_DMA_LOG(level, ...) \
+	RTE_LOG_LINE_PREFIX(level, ACC_DMA, "%s(): ", __func__, __VA_ARGS__)
+#define ACC_DMA_DEV_LOG(hw, level, ...) \
+	RTE_LOG_LINE_PREFIX(level, ACC_DMA, "%s %s(): ", \
+		(hw)->data->dev_name RTE_LOG_COMMA __func__, __VA_ARGS__)
+#define ACC_DMA_DEBUG(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, DEBUG, __VA_ARGS__)
+#define ACC_DMA_INFO(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, INFO, __VA_ARGS__)
+#define ACC_DMA_WARN(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, WARNING, __VA_ARGS__)
+#define ACC_DMA_ERR(hw, ...) \
+	ACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
+
+static void
+acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
+		     uint16_t queue_id, char *dev_name, size_t size)
+{
+	memset(dev_name, 0, size);
+	(void)snprintf(dev_name, size, "%s-dma%u", uacce_dev->device.name, queue_id);
+}
+
+static void
+acc_dma_gen_dev_prefix(const struct rte_uacce_device *uacce_dev, char *dev_name, size_t size)
+{
+	memset(dev_name, 0, size);
+	(void)snprintf(dev_name, size, "%s-dma", uacce_dev->device.name);
+}
+
+static int
+acc_dma_get_qp_info(struct acc_dma_dev *hw)
+{
+#define CMD_QM_GET_QP_CTX	_IOWR('H', 10, struct acc_dma_qp_contex)
+#define CMD_QM_GET_QP_INFO	_IOWR('H', 11, struct acc_dma_qp_info)
+#define QP_ALG_TYPE		2
+	struct acc_dma_qp_contex {
+		uint16_t id;
+		uint16_t qc_type;
+	} qp_ctx;
+	struct acc_dma_qp_info {
+		uint32_t sqe_size;
+		uint16_t sq_depth;
+		uint16_t cq_depth;
+		uint64_t reserved;
+	} qp_info;
+	int ret;
+
+	memset(&qp_ctx, 0, sizeof(qp_ctx));
+	qp_ctx.qc_type = QP_ALG_TYPE;
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_CTX, &qp_ctx);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "get qm qp context fail!");
+		return -EINVAL;
+	}
+	hw->sqn = qp_ctx.id;
+
+	memset(&qp_info, 0, sizeof(qp_info));
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_INFO, &qp_info);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "get qm qp info fail!");
+		return -EINVAL;
+	}
+	if ((qp_info.sq_depth & (qp_info.sq_depth - 1)) != 0) {
+		ACC_DMA_ERR(hw, "sq depth is not 2's power!");
+		return -EINVAL;
+	}
+	hw->sqe_size = qp_info.sqe_size;
+	hw->sq_depth = qp_info.sq_depth;
+	hw->cq_depth = qp_info.cq_depth;
+	hw->sq_depth_mask = hw->sq_depth - 1;
+
+	return 0;
+}
+
+static int
+acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
+{
+	char name[RTE_DEV_NAME_MAX_LEN];
+	struct rte_dma_dev *dev;
+	struct acc_dma_dev *hw;
+	int ret;
+
+	acc_dma_gen_dev_name(uacce_dev, queue_id, name, sizeof(name));
+	dev = rte_dma_pmd_allocate(name, uacce_dev->device.numa_node,
+				   sizeof(struct acc_dma_dev));
+	if (dev == NULL) {
+		ACC_DMA_LOG(ERR, "%s allocate dmadev fail!", name);
+		return -ENOMEM;
+	}
+
+	dev->device = &uacce_dev->device;
+	dev->fp_obj->dev_private = dev->data->dev_private;
+
+	hw = dev->data->dev_private;
+	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
+
+	ret = rte_uacce_queue_alloc(uacce_dev, &hw->qctx);
+	if (ret != 0) {
+		ACC_DMA_ERR(hw, "alloc queue fail!");
+		goto release_dma_pmd;
+	}
+
+	ret = acc_dma_get_qp_info(hw);
+	if (ret != 0)
+		goto free_uacce_queue;
+
+	hw->io_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	if (hw->io_base == NULL) {
+		ACC_DMA_ERR(hw, "mmap MMIO region fail!");
+		ret = -EINVAL;
+		goto free_uacce_queue;
+	}
+	hw->doorbell_reg = (void *)((uintptr_t)hw->io_base + ACC_DMA_DOORBELL_OFFSET);
+
+	hw->dus_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	if (hw->dus_base == NULL) {
+		ACC_DMA_ERR(hw, "mmap DUS region fail!");
+		ret = -EINVAL;
+		goto unmap_mmio;
+	}
+	hw->sqe = hw->dus_base;
+	hw->cqe = (void *)((uintptr_t)hw->dus_base + hw->sqe_size * hw->sq_depth);
+	hw->sq_status = (uint32_t *)((uintptr_t)hw->dus_base +
+			uacce_dev->qfrt_sz[RTE_UACCE_QFRT_DUS] - sizeof(uint32_t));
+	hw->cq_status = hw->sq_status - 1;
+
+	hw->status = rte_zmalloc_socket(NULL, sizeof(uint16_t) * hw->sq_depth,
+					RTE_CACHE_LINE_SIZE, uacce_dev->numa_node);
+	if (hw->status == NULL) {
+		ACC_DMA_ERR(hw, "malloc status region fail!");
+		ret = -ENOMEM;
+		goto unmap_dus;
+	}
+
+	dev->state = RTE_DMA_DEV_READY;
+	ACC_DMA_DEBUG(hw, "create dmadev %s success!", name);
+
+	return 0;
+
+unmap_dus:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+unmap_mmio:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+free_uacce_queue:
+	rte_uacce_queue_free(&hw->qctx);
+release_dma_pmd:
+	rte_dma_pmd_release(name);
+	return ret;
+}
+
+static int
+acc_dma_parse_queues(const char *key, const char *value, void *extra_args)
+{
+	struct acc_dma_config *config = extra_args;
+	uint64_t val;
+	char *end;
+
+	RTE_SET_USED(key);
+
+	errno = 0;
+	val = strtoull(value, &end, 0);
+	if (errno == ERANGE || value == end || *end != '\0' || val == 0) {
+		ACC_DMA_LOG(ERR, "%s invalid queues! set to default one queue!",
+			    config->dev->name);
+		config->queues = ACC_DMA_DEFAULT_QUEUES;
+	} else if (val > config->avail_queues) {
+		ACC_DMA_LOG(WARNING, "%s exceed available queues! set to available queues %u",
+			     config->dev->name, config->avail_queues);
+		config->queues = config->avail_queues;
+	} else {
+		config->queues = val;
+	}
+
+	return 0;
+}
+
+static int
+acc_dma_parse_devargs(struct rte_uacce_device *uacce_dev, struct acc_dma_config *config)
+{
+	struct rte_kvargs *kvlist;
+	int avail_queues;
+
+	avail_queues = rte_uacce_avail_queues(uacce_dev);
+	if (avail_queues <= 0) {
+		ACC_DMA_LOG(ERR, "%s don't have available queues!", uacce_dev->name);
+		return -EINVAL;
+	}
+	config->dev = uacce_dev;
+	config->avail_queues = avail_queues <= UINT16_MAX ? avail_queues : UINT16_MAX;
+
+	if (uacce_dev->device.devargs == NULL)
+		return 0;
+
+	kvlist = rte_kvargs_parse(uacce_dev->device.devargs->args, NULL);
+	if (kvlist == NULL)
+		return 0;
+
+	(void)rte_kvargs_process(kvlist, ACC_DMA_DEVARG_QUEUES, &acc_dma_parse_queues, config);
+
+	rte_kvargs_free(kvlist);
+
+	return 0;
+}
+
+static int
+acc_dma_probe(struct rte_uacce_driver *dr, struct rte_uacce_device *uacce_dev)
+{
+	struct acc_dma_config config = { .queues = ACC_DMA_DEFAULT_QUEUES };
+	int ret = 0;
+	uint32_t i;
+
+	RTE_SET_USED(dr);
+
+	ret = acc_dma_parse_devargs(uacce_dev, &config);
+	if (ret != 0)
+		return ret;
+
+	for (i = 0; i < config.queues; i++) {
+		ret = acc_dma_create(uacce_dev, i);
+		if (ret != 0) {
+			ACC_DMA_LOG(ERR, "%s create dmadev No.%u failed!", uacce_dev->name, i);
+			break;
+		}
+	}
+
+	if (ret != 0 && i > 0) {
+		ACC_DMA_LOG(WARNING, "%s probed %u dmadev, can't probe more!", uacce_dev->name, i);
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int
+acc_dma_remove(struct rte_uacce_device *uacce_dev)
+{
+	char name[RTE_DEV_NAME_MAX_LEN];
+	struct rte_dma_info info;
+	int i = 0;
+	int ret;
+
+	acc_dma_gen_dev_prefix(uacce_dev, name, sizeof(name));
+	RTE_DMA_FOREACH_DEV(i) {
+		ret = rte_dma_info_get(i, &info);
+		if (ret != 0)
+			continue;
+		if (strncmp(info.dev_name, name, strlen(name)) == 0)
+			rte_dma_pmd_release(info.dev_name);
+	}
+
+	return 0;
+}
+
+static const struct rte_uacce_id acc_dma_id_table[] = {
+	{ "hisi_qm_v5", "udma" },
+	{ .dev_api = NULL, },
+};
+
+static struct rte_uacce_driver acc_dma_pmd_drv = {
+	.id_table = acc_dma_id_table,
+	.probe = acc_dma_probe,
+	.remove = acc_dma_remove,
+};
+
+RTE_PMD_REGISTER_UACCE(dma_acc, acc_dma_pmd_drv);
+RTE_PMD_REGISTER_PARAM_STRING(dma_acc,
+			ACC_DMA_DEVARG_QUEUES "=<uint16> ");
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
new file mode 100644
index 0000000000..9a1000fa41
--- /dev/null
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#ifndef ACC_DMADEV_H
+#define ACC_DMADEV_H
+
+#include <bus_uacce_driver.h>
+#include <rte_bitops.h>
+#include <rte_common.h>
+#include <rte_dmadev_pmd.h>
+
+#define ACC_DMA_DEVARG_QUEUES		"queues"
+#define ACC_DMA_DEFAULT_QUEUES		1
+
+struct acc_dma_config {
+	uint16_t queues;
+
+	/* The following fields are config contexts. */
+	struct rte_uacce_device *dev;
+	uint16_t avail_queues;
+};
+
+#define ACC_DMA_DOORBELL_OFFSET		0x1000u
+
+struct acc_dma_sqe {};
+struct acc_dma_cqe {};
+
+struct acc_dma_dev {
+	struct acc_dma_sqe *sqe;
+	struct acc_dma_cqe *cqe;
+	uint16_t *status;             /* the completion status array of SQEs. */
+
+	volatile void *doorbell_reg;  /**< register address for doorbell. */
+	volatile uint32_t *sq_status; /**< SQ status pointer. */
+	volatile uint32_t *cq_status; /**< CQ status pointer. */
+
+	uint16_t sqn;           /**< SQ global number, inited when created. */
+	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
+
+	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+
+	/**
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	struct rte_dma_dev_data *data;
+	struct rte_uacce_qcontex qctx;
+	void *io_base;
+	void *dus_base;
+	uint32_t sqe_size;
+	uint16_t sq_depth;
+};
+
+#endif /* ACC_DMADEV_H */
diff --git a/drivers/dma/acc/meson.build b/drivers/dma/acc/meson.build
new file mode 100644
index 0000000000..8a1bad5281
--- /dev/null
+++ b/drivers/dma/acc/meson.build
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+if not is_linux
+    build = false
+    reason = 'only supported on Linux'
+    subdir_done()
+endif
+
+if (arch_subdir != 'x86' and arch_subdir != 'arm') or (not dpdk_conf.get('RTE_ARCH_64'))
+    build = false
+    reason = 'only supported on x86_64 and aarch64'
+    subdir_done()
+endif
+
+deps += ['bus_uacce', 'dmadev']
+sources = files(
+        'acc_dmadev.c',
+)
+
+require_iova_in_mbuf = false
diff --git a/drivers/dma/meson.build b/drivers/dma/meson.build
index 358132759a..eeab0ec361 100644
--- a/drivers/dma/meson.build
+++ b/drivers/dma/meson.build
@@ -2,6 +2,7 @@
 # Copyright 2021 HiSilicon Limited
 
 drivers = [
+        'acc',
         'cnxk',
         'dpaa',
         'dpaa2',
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v4 2/4] dma/acc: add control path ops
  2025-10-13  9:11 ` [PATCH v4 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-10-13  9:11   ` [PATCH v4 1/4] dma/acc: add probe and remove Chengwen Feng
@ 2025-10-13  9:11   ` Chengwen Feng
  2025-10-13  9:11   ` [PATCH v4 3/4] dma/acc: add data " Chengwen Feng
  2025-10-13  9:11   ` [PATCH v4 4/4] dma/acc: add doc Chengwen Feng
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-13  9:11 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds control path ops for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/acc/acc_dmadev.c | 185 +++++++++++++++++++++++++++++++++++
 drivers/dma/acc/acc_dmadev.h |  42 ++++++++
 2 files changed, 227 insertions(+)

diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
index fb9718312e..8a43d8b2dc 100644
--- a/drivers/dma/acc/acc_dmadev.c
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -34,6 +34,190 @@ RTE_LOG_REGISTER_DEFAULT(acc_dma_logtype, INFO);
 #define ACC_DMA_ERR(hw, ...) \
 	ACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
 
+static int
+acc_dma_info_get(const struct rte_dma_dev *dev,
+		 struct rte_dma_info *dev_info,
+		 uint32_t info_sz)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(info_sz);
+
+	dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
+			     RTE_DMA_CAPA_SVA |
+			     RTE_DMA_CAPA_OPS_COPY |
+			     RTE_DMA_CAPA_OPS_FILL;
+	dev_info->max_vchans = 1;
+	dev_info->max_desc = hw->sq_depth;
+	dev_info->min_desc = hw->sq_depth;
+
+	return 0;
+}
+
+static int
+acc_dma_configure(struct rte_dma_dev *dev,
+		  const struct rte_dma_conf *conf,
+		  uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+acc_dma_start(struct rte_dma_dev *dev)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+	int ret;
+
+	if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
+		ACC_DMA_ERR(hw, "detect dev is abnormal!");
+		return -EIO;
+	}
+
+	if (hw->started) {
+		hw->ridx = 0;
+		hw->cridx = 0;
+		return 0;
+	}
+
+	memset(hw->sqe, 0, hw->sqe_size * hw->sq_depth);
+	memset(hw->cqe, 0, sizeof(struct acc_dma_cqe) * hw->cq_depth);
+	memset(hw->status, 0, sizeof(uint16_t) * hw->sq_depth);
+	hw->ridx = 0;
+	hw->cridx = 0;
+	hw->sq_head = 0;
+	hw->sq_tail = 0;
+	hw->cq_sq_head = 0;
+	hw->avail_sqes = hw->sq_depth - ACC_DMA_SQ_GAP_NUM - 1;
+	hw->cq_head = 0;
+	hw->cqs_completed = 0;
+	hw->cqe_vld = 1;
+	hw->submitted = 0;
+	hw->completed = 0;
+	hw->errors = 0;
+	hw->invalid_lens = 0;
+	hw->qfulls = 0;
+
+	ret = rte_uacce_queue_start(&hw->qctx);
+	if (ret == 0)
+		hw->started = true;
+
+	return ret;
+}
+
+static int
+acc_dma_stop(struct rte_dma_dev *dev)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
+		/* This indicates that the dev is abnormal. The correct error handling
+		 * is to close the dev (so that kernel module will perform error handling)
+		 * and apply for a new dev.
+		 * If an error code is returned here, the dev cannot be closed. Therefore,
+		 * zero is returned and an error trace is added.
+		 */
+		ACC_DMA_ERR(hw, "detect dev is abnormal!");
+		return 0;
+	}
+
+	return 0;
+}
+
+static int
+acc_dma_close(struct rte_dma_dev *dev)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+	/* The dmadev already stopped */
+	rte_free(hw->status);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	rte_uacce_queue_free(&hw->qctx);
+	return 0;
+}
+
+static int
+acc_dma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
+		    const struct rte_dma_vchan_conf *conf,
+		    uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+acc_dma_stats_get(const struct rte_dma_dev *dev, uint16_t vchan,
+		  struct rte_dma_stats *stats,
+		  uint32_t stats_sz)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(stats_sz);
+	stats->submitted = hw->submitted;
+	stats->completed = hw->completed;
+	stats->errors    = hw->errors;
+
+	return 0;
+}
+
+static int
+acc_dma_stats_reset(struct rte_dma_dev *dev, uint16_t vchan)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	hw->submitted    = 0;
+	hw->completed    = 0;
+	hw->errors       = 0;
+	hw->invalid_lens = 0;
+	hw->io_errors    = 0;
+	hw->qfulls       = 0;
+
+	return 0;
+}
+
+static int
+acc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
+{
+	struct acc_dma_dev *hw = dev->data->dev_private;
+
+	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s\n"
+		"  sqe_size: %u sq_depth: %u sq_depth_mask: %u cq_depth: %u\n",
+		hw->sqn, (*hw->sq_status != 0) ? "ERR" : "OK",
+		(*hw->cq_status != 0) ? "ERR" : "OK",
+		hw->sqe_size, hw->sq_depth, hw->sq_depth_mask, hw->cq_depth);
+	fprintf(f, "  ridx: %u cridx: %u\n"
+		"  sq_head: %u sq_tail: %u cq_sq_head: %u avail_sqes: %u\n"
+		"  cq_head: %u cqs_completed: %u cqe_vld: %u\n",
+		hw->ridx, hw->cridx,
+		hw->sq_head, hw->sq_tail, hw->cq_sq_head, hw->avail_sqes,
+		hw->cq_head, hw->cqs_completed, hw->cqe_vld);
+	fprintf(f, "  submitted: %" PRIu64 " completed: %" PRIu64 " errors: %" PRIu64
+		" invalid_lens: %" PRIu64 " io_errors: %" PRIu64 " qfulls: %" PRIu64 "\n",
+		hw->submitted, hw->completed, hw->errors, hw->invalid_lens,
+		hw->io_errors, hw->qfulls);
+
+	return 0;
+}
+
+static const struct rte_dma_dev_ops acc_dmadev_ops = {
+	.dev_info_get     = acc_dma_info_get,
+	.dev_configure    = acc_dma_configure,
+	.dev_start        = acc_dma_start,
+	.dev_stop         = acc_dma_stop,
+	.dev_close        = acc_dma_close,
+	.vchan_setup      = acc_dma_vchan_setup,
+	.stats_get        = acc_dma_stats_get,
+	.stats_reset      = acc_dma_stats_reset,
+	.dev_dump         = acc_dma_dump,
+};
+
 static void
 acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
 		     uint16_t queue_id, char *dev_name, size_t size)
@@ -111,6 +295,7 @@ acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	}
 
 	dev->device = &uacce_dev->device;
+	dev->dev_ops = &acc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
 
 	hw = dev->data->dev_private;
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
index 9a1000fa41..6e65aad279 100644
--- a/drivers/dma/acc/acc_dmadev.h
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -13,6 +13,9 @@
 #define ACC_DMA_DEVARG_QUEUES		"queues"
 #define ACC_DMA_DEFAULT_QUEUES		1
 
+#define ACC_DMA_CQ_DOORBELL_PACE	64
+#define ACC_DMA_SQ_GAP_NUM		ACC_DMA_CQ_DOORBELL_PACE
+
 struct acc_dma_config {
 	uint16_t queues;
 
@@ -38,7 +41,45 @@ struct acc_dma_dev {
 	uint16_t sqn;           /**< SQ global number, inited when created. */
 	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
 
+	uint16_t ridx;  /**< ring index which will assign to the next request. */
+	uint16_t cridx; /**< ring index which returned by completed APIs. */
+
+	/**
+	 * SQE array management fields:
+	 *
+	 *  -----------------------------------------------------
+	 *  | SQE0 | SQE1 | SQE2 |   ...  | SQEx | ... | SQEn-1 |
+	 *  -----------------------------------------------------
+	 *     ^             ^               ^
+	 *     |             |               |
+	 *   sq_head     cq_sq_head       sq_tail
+	 *
+	 *  sq_head: next index to the oldest completed request, this filed was
+	 *           updated by completed* APIs.
+	 *  sq_tail: index of the next new request, this field was updated by
+	 *           copy or fill API.
+	 *  cq_sq_head: next index of index that has been completed by hardware,
+	 *              this filed was updated by completed* APIs.
+	 *
+	 *  [sq_head, cq_sq_head): the SQEs that hardware already completed.
+	 *  [cq_sq_head, sq_tail): the SQEs that hardware processing.
+	 */
+	uint16_t sq_head;
+	uint16_t sq_tail;
+	uint16_t cq_sq_head;
+	uint16_t avail_sqes;
+
 	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+	uint16_t cq_head;       /**< CQ index for next scans. */
+	uint16_t cqs_completed; /**< accumulated number of completed CQs. */
+	uint8_t  cqe_vld;       /**< valid bit for CQE, will change for every round. */
+
+	uint64_t submitted;
+	uint64_t completed;
+	uint64_t errors;
+	uint64_t invalid_lens;
+	uint64_t io_errors;
+	uint64_t qfulls;
 
 	/**
 	 * The following fields are not accessed in the I/O path, so they are
@@ -50,6 +91,7 @@ struct acc_dma_dev {
 	void *dus_base;
 	uint32_t sqe_size;
 	uint16_t sq_depth;
+	bool started;
 };
 
 #endif /* ACC_DMADEV_H */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v4 3/4] dma/acc: add data path ops
  2025-10-13  9:11 ` [PATCH v4 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-10-13  9:11   ` [PATCH v4 1/4] dma/acc: add probe and remove Chengwen Feng
  2025-10-13  9:11   ` [PATCH v4 2/4] dma/acc: add control path ops Chengwen Feng
@ 2025-10-13  9:11   ` Chengwen Feng
  2025-10-13  9:11   ` [PATCH v4 4/4] dma/acc: add doc Chengwen Feng
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-13  9:11 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds data path ops for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/acc/acc_dmadev.c | 322 +++++++++++++++++++++++++++++++++--
 drivers/dma/acc/acc_dmadev.h |  76 ++++++++-
 2 files changed, 385 insertions(+), 13 deletions(-)

diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
index 8a43d8b2dc..29a6e96013 100644
--- a/drivers/dma/acc/acc_dmadev.c
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -8,6 +8,7 @@
 #include <sys/ioctl.h>
 
 #include <rte_byteorder.h>
+#include <rte_cycles.h>
 #include <rte_eal.h>
 #include <rte_io.h>
 #include <rte_kvargs.h>
@@ -79,6 +80,7 @@ acc_dma_start(struct rte_dma_dev *dev)
 	if (hw->started) {
 		hw->ridx = 0;
 		hw->cridx = 0;
+		hw->stop_proc = 0;
 		return 0;
 	}
 
@@ -94,6 +96,7 @@ acc_dma_start(struct rte_dma_dev *dev)
 	hw->cq_head = 0;
 	hw->cqs_completed = 0;
 	hw->cqe_vld = 1;
+	hw->stop_proc = 0;
 	hw->submitted = 0;
 	hw->completed = 0;
 	hw->errors = 0;
@@ -110,17 +113,33 @@ acc_dma_start(struct rte_dma_dev *dev)
 static int
 acc_dma_stop(struct rte_dma_dev *dev)
 {
+#define MAX_WAIT_MSEC	1000
+#define MAX_CPL_NUM	64
 	struct acc_dma_dev *hw = dev->data->dev_private;
-
-	if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
-		/* This indicates that the dev is abnormal. The correct error handling
-		 * is to close the dev (so that kernel module will perform error handling)
-		 * and apply for a new dev.
-		 * If an error code is returned here, the dev cannot be closed. Therefore,
-		 * zero is returned and an error trace is added.
-		 */
-		ACC_DMA_ERR(hw, "detect dev is abnormal!");
-		return 0;
+	uint32_t wait_msec = 0;
+
+	/* Flag stop processing new requests. */
+	hw->stop_proc = 1;
+	/* Currently, there is no method to notify the hardware to stop.
+	 * Therefore, the timeout mechanism is used to wait for the dataplane
+	 * to stop.
+	 */
+	while (hw->sq_head != hw->sq_tail && wait_msec++ < MAX_WAIT_MSEC) {
+		if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
+			/* This indicates that the dev is abnormal. The correct error handling
+			 * is to close the dev (so that kernel module will perform error handling)
+			 * and apply for a new dev.
+			 * If an error code is returned here, the dev cannot be closed. Therefore,
+			 * zero is returned and an error trace is added.
+			 */
+			ACC_DMA_ERR(hw, "detect dev is abnormal!");
+			return 0;
+		}
+		rte_delay_ms(1);
+	}
+	if (hw->sq_head != hw->sq_tail) {
+		ACC_DMA_ERR(hw, "dev is still active!");
+		return -EBUSY;
 	}
 
 	return 0;
@@ -187,10 +206,11 @@ acc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
 {
 	struct acc_dma_dev *hw = dev->data->dev_private;
 
-	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s\n"
+	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s stop_proc: %u\n"
 		"  sqe_size: %u sq_depth: %u sq_depth_mask: %u cq_depth: %u\n",
 		hw->sqn, (*hw->sq_status != 0) ? "ERR" : "OK",
 		(*hw->cq_status != 0) ? "ERR" : "OK",
+		hw->stop_proc,
 		hw->sqe_size, hw->sq_depth, hw->sq_depth_mask, hw->cq_depth);
 	fprintf(f, "  ridx: %u cridx: %u\n"
 		"  sq_head: %u sq_tail: %u cq_sq_head: %u avail_sqes: %u\n"
@@ -206,6 +226,280 @@ acc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
 	return 0;
 }
 
+static inline void
+acc_dma_sq_doorbell(struct acc_dma_dev *hw)
+{
+	uint64_t doorbell = (uint64_t)(hw->sqn & ACC_DMA_DOORBELL_SQN_MASK) |
+			    (ACC_DMA_DOORBELL_SQ_CMD << ACC_DMA_DOORBELL_CMD_SHIFT) |
+			    (((uint64_t)hw->sq_tail) << ACC_DMA_DOORBELL_IDX_SHIFT);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static int
+acc_dma_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,
+	     uint32_t length, uint64_t flags)
+{
+	struct acc_dma_dev *hw = dev_private;
+	struct acc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(hw->stop_proc > 0))
+		return -EPERM;
+
+	if (unlikely(length > ACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = ACC_DMA_SQE_TYPE;
+	sqe->task_type     = ACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = ACC_DMA_DATA_MEMCPY;
+	sqe->init_val      = 0;
+	sqe->addr_array    = src;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = ACC_DMA_SVA_PREFETCH_EN;
+	sqe->wb_field      = 0;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		acc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+acc_dma_fill(void *dev_private, uint16_t vchan, uint64_t pattern,
+	     rte_iova_t dst, uint32_t length, uint64_t flags)
+{
+	struct acc_dma_dev *hw = dev_private;
+	struct acc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(hw->stop_proc > 0))
+		return -EPERM;
+
+	if (unlikely(length > ACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = ACC_DMA_SQE_TYPE;
+	sqe->task_type     = ACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = ACC_DMA_DATA_MEMSET;
+	sqe->init_val      = pattern;
+	sqe->addr_array    = 0;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = ACC_DMA_SVA_PREFETCH_EN;
+	sqe->wb_field      = 0;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		acc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+acc_dma_submit(void *dev_private, uint16_t vchan)
+{
+	struct acc_dma_dev *hw = dev_private;
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	acc_dma_sq_doorbell(hw);
+
+	return 0;
+}
+
+static inline void
+acc_dma_cq_doorbell(struct acc_dma_dev *hw)
+{
+	uint64_t doorbell = (uint64_t)(hw->sqn & ACC_DMA_DOORBELL_SQN_MASK) |
+			    (ACC_DMA_DOORBELL_CQ_CMD << ACC_DMA_DOORBELL_CMD_SHIFT) |
+			    (((uint64_t)hw->cq_head) << ACC_DMA_DOORBELL_IDX_SHIFT);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static inline void
+acc_dma_scan_cq(struct acc_dma_dev *hw)
+{
+	volatile struct acc_dma_cqe *cqe;
+	struct acc_dma_sqe *sqe;
+	uint16_t csq_head = hw->cq_sq_head;
+	uint16_t cq_head = hw->cq_head;
+	uint16_t count = 0;
+	uint64_t misc;
+
+	if (unlikely(*hw->cq_status != 0)) {
+		hw->io_errors++;
+		return;
+	}
+
+	while (count < hw->cq_depth) {
+		cqe = &hw->cqe[cq_head];
+		misc = cqe->misc;
+		misc = rte_le_to_cpu_64(misc);
+		if (RTE_FIELD_GET64(ACC_DMA_CQE_VALID_B, misc) != hw->cqe_vld)
+			break;
+
+		csq_head = RTE_FIELD_GET64(ACC_DMA_SQ_HEAD_MASK, misc);
+		if (unlikely(csq_head > hw->sq_depth_mask)) {
+			/**
+			 * Defensive programming to prevent overflow of the
+			 * status array indexed by csq_head. Only error logs
+			 * are used for prompting.
+			 */
+			ACC_DMA_ERR(hw, "invalid csq_head: %u!", csq_head);
+			count = 0;
+			break;
+		}
+		sqe = &hw->sqe[csq_head];
+		if (sqe->done_flag != ACC_DMA_TASK_DONE ||
+			sqe->err_type || sqe->ext_err_type || sqe->wtype) {
+			hw->status[csq_head] = RTE_DMA_STATUS_ERROR_UNKNOWN;
+		}
+
+		count++;
+		cq_head++;
+		if (cq_head == hw->cq_depth) {
+			hw->cqe_vld = !hw->cqe_vld;
+			cq_head = 0;
+		}
+	}
+
+	if (count == 0)
+		return;
+
+	hw->cq_head = cq_head;
+	hw->cq_sq_head = (csq_head + 1) & hw->sq_depth_mask;
+	hw->avail_sqes += count;
+	hw->cqs_completed += count;
+	if (hw->cqs_completed >= ACC_DMA_CQ_DOORBELL_PACE) {
+		acc_dma_cq_doorbell(hw);
+		hw->cqs_completed = 0;
+	}
+}
+
+static inline uint16_t
+acc_dma_calc_cpls(struct acc_dma_dev *hw, const uint16_t nb_cpls)
+{
+	uint16_t cpl_num;
+
+	if (hw->cq_sq_head >= hw->sq_head)
+		cpl_num = hw->cq_sq_head - hw->sq_head;
+	else
+		cpl_num = hw->sq_depth_mask + 1 - hw->sq_head + hw->cq_sq_head;
+
+	if (cpl_num > nb_cpls)
+		cpl_num = nb_cpls;
+
+	return cpl_num;
+}
+
+static uint16_t
+acc_dma_completed(void *dev_private,
+		  uint16_t vchan, const uint16_t nb_cpls,
+		  uint16_t *last_idx, bool *has_error)
+{
+	struct acc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	acc_dma_scan_cq(hw);
+
+	cpl_num = acc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		if (hw->status[sq_head]) {
+			*has_error = true;
+			break;
+		}
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + i - 1;
+	if (i > 0) {
+		hw->cridx += i;
+		hw->sq_head = sq_head;
+		hw->completed += i;
+	}
+
+	return i;
+}
+
+static uint16_t
+acc_dma_completed_status(void *dev_private,
+			 uint16_t vchan, const uint16_t nb_cpls,
+			 uint16_t *last_idx, enum rte_dma_status_code *status)
+{
+	struct acc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	acc_dma_scan_cq(hw);
+
+	cpl_num = acc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		status[i] = hw->status[sq_head];
+		hw->errors += !!status[i];
+		hw->status[sq_head] = 0;
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + cpl_num - 1;
+	if (likely(cpl_num > 0)) {
+		hw->cridx += cpl_num;
+		hw->sq_head = sq_head;
+		hw->completed += cpl_num;
+	}
+
+	return cpl_num;
+}
+
+static uint16_t
+acc_dma_burst_capacity(const void *dev_private, uint16_t vchan)
+{
+	const struct acc_dma_dev *hw = dev_private;
+	RTE_SET_USED(vchan);
+	return hw->avail_sqes;
+}
+
 static const struct rte_dma_dev_ops acc_dmadev_ops = {
 	.dev_info_get     = acc_dma_info_get,
 	.dev_configure    = acc_dma_configure,
@@ -297,6 +591,12 @@ acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	dev->device = &uacce_dev->device;
 	dev->dev_ops = &acc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
+	dev->fp_obj->copy = acc_dma_copy;
+	dev->fp_obj->fill = acc_dma_fill;
+	dev->fp_obj->submit = acc_dma_submit;
+	dev->fp_obj->completed = acc_dma_completed;
+	dev->fp_obj->completed_status = acc_dma_completed_status;
+	dev->fp_obj->burst_capacity = acc_dma_burst_capacity;
 
 	hw = dev->data->dev_private;
 	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
index 6e65aad279..fa1d76c870 100644
--- a/drivers/dma/acc/acc_dmadev.h
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -25,9 +25,80 @@ struct acc_dma_config {
 };
 
 #define ACC_DMA_DOORBELL_OFFSET		0x1000u
+#define ACC_DMA_DOORBELL_SQN_MASK	0x3FFu
+#define ACC_DMA_DOORBELL_SQ_CMD		0ull
+#define ACC_DMA_DOORBELL_CQ_CMD		1ull
+#define ACC_DMA_DOORBELL_CMD_SHIFT	12
+#define ACC_DMA_DOORBELL_IDX_SHIFT	32
+
+#define ACC_DMA_TASK_TYPE		0x3
+#define ACC_DMA_SQE_TYPE		0x1
+#define ACC_DMA_SVA_PREFETCH_EN		RTE_BIT32(15)
+#define ACC_DMA_MAX_OP_SIZE		(RTE_BIT32(24) - 1)
+
+enum {
+	ACC_DMA_DATA_MEMCPY = 0,
+	ACC_DMA_DATA_MEMSET = 7,
+};
+
+enum {
+	ACC_DMA_TASK_DONE = 1,
+	ACC_DMA_TASK_ERROR,
+};
 
-struct acc_dma_sqe {};
-struct acc_dma_cqe {};
+struct acc_dma_sqe {
+	uint32_t bd_type : 6;
+	uint32_t resv1 : 2;
+	uint32_t task_type : 6;
+	uint32_t resv2 : 2;
+	uint32_t task_type_ext : 6;
+	uint32_t resv3 : 9;
+	uint32_t bd_invlid : 1;
+	uint32_t rsv4[2];
+	uint32_t low_tag;
+	uint32_t hi_tag;
+	/* The number of bytes to be copied or filled for single address. */
+	uint32_t data_size;
+	uint32_t rsv5;
+	/*
+	 * 0 ~ 13 bits: reserved,
+	 * 14 bit: single address or multi addresses,
+	 * 15 bit: sva prefetch en.
+	 */
+	uint16_t dw0;
+	/*
+	 * 0 ~5 bits: reserved,
+	 * 6 ~ 13 bits: address num,
+	 * 14 ~15 bits: reserved.
+	 */
+	uint16_t dw1;
+	uint64_t init_val;
+	uint32_t rsv6[12];
+	/* dst addr for single address task. */
+	uint64_t dst_addr;
+	uint32_t rsv7[2];
+	/* src addr for single address task, addr array for multi addresses. */
+	uint64_t addr_array;
+	union {
+		uint32_t wb_field;
+		struct {
+			uint32_t done_flag : 3;
+			uint32_t rsv8 : 1;
+			uint32_t ext_err_type : 12;
+			uint32_t err_type : 8;
+			uint32_t wtype : 8;
+		};
+	};
+	uint32_t rsv9[3];
+};
+
+#define ACC_DMA_SQ_HEAD_MASK		RTE_GENMASK64(15, 0)
+#define ACC_DMA_CQE_VALID_B		RTE_BIT64(48)
+
+struct acc_dma_cqe {
+	uint64_t rsv;
+	uint64_t misc;
+};
 
 struct acc_dma_dev {
 	struct acc_dma_sqe *sqe;
@@ -73,6 +144,7 @@ struct acc_dma_dev {
 	uint16_t cq_head;       /**< CQ index for next scans. */
 	uint16_t cqs_completed; /**< accumulated number of completed CQs. */
 	uint8_t  cqe_vld;       /**< valid bit for CQE, will change for every round. */
+	volatile uint8_t stop_proc; /**< whether stop processing new requests. */
 
 	uint64_t submitted;
 	uint64_t completed;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v4 4/4] dma/acc: add doc
  2025-10-13  9:11 ` [PATCH v4 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                     ` (2 preceding siblings ...)
  2025-10-13  9:11   ` [PATCH v4 3/4] dma/acc: add data " Chengwen Feng
@ 2025-10-13  9:11   ` Chengwen Feng
  2025-10-15 10:05     ` Thomas Monjalon
  3 siblings, 1 reply; 37+ messages in thread
From: Chengwen Feng @ 2025-10-13  9:11 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds document for accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                            |  1 +
 doc/guides/dmadevs/acc.rst             | 63 ++++++++++++++++++++++++++
 doc/guides/dmadevs/index.rst           |  1 +
 doc/guides/rel_notes/release_25_11.rst |  6 +++
 4 files changed, 71 insertions(+)
 create mode 100644 doc/guides/dmadevs/acc.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 37a5e1c476..00437e80bf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1369,6 +1369,7 @@ F: doc/guides/dmadevs/hisilicon.rst
 HiSilicon Accelerator DMA
 M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/acc/
+F: doc/guides/dmadevs/acc.rst
 
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
diff --git a/doc/guides/dmadevs/acc.rst b/doc/guides/dmadevs/acc.rst
new file mode 100644
index 0000000000..41fc24877f
--- /dev/null
+++ b/doc/guides/dmadevs/acc.rst
@@ -0,0 +1,63 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+HISILICON Accelerator DMA Driver
+================================
+
+Kunpeng SoC has an internal accelerator unit which includes zip function, and
+the zip also supports data copy and fill. This driver exposes this capability to
+DPDK application.
+
+
+Supported Kunpeng SoCs
+----------------------
+
+* Kunpeng 920
+
+
+Device Setup
+-------------
+
+In order to use the device in DPDK, user should insmod uacce.ko, hisi_qm.ko and
+hisi_zip.ko(with module parameter uacce_mode=1), then there will be several
+subdirectories whose names start with hisi_zip in /sys/class/uacce/ directory.
+
+Device Probing and Initialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+User should use following method to probe device::
+
+        $ dpdk-app -a uacce:hisi_zip-0,queues=2 ...
+
+hisi_zip-0 is the directory name in the /sys/class/uacce/ directory, queues is
+runtime config parameter which indicates how many dmadevs are created.
+
+If the probe is successful, two dmadevs are created, named "hisi_zip-0-dma0"
+and "hisi_zip-0-dma1".
+
+.. note::
+        In the /sys/class/uacce/hisi_zip-x/ directory, user could query api and
+        algorithms, this driver can only match the device whose api is
+        hisi_qm_v5 and algorithms contain udma.
+
+Device Configuration
+~~~~~~~~~~~~~~~~~~~~~
+
+Configuration requirements:
+
+* ``ring_size`` obtain from UACCE API and is a fixed value.
+* Only one ``vchan`` is supported per ``dmadev``.
+* Silent mode is not supported.
+* The transfer direction must be set to ``RTE_DMA_DIR_MEM_TO_MEM``.
+
+
+Device Datapath Capability and Limitation
+-----------------------------------------
+
+Support memory copy and fill operations.
+
+.. note::
+        Currently, the maximum size of the operation data is limited to 16MB-1B
+        in the driver. The device actually supports operations in a larger data
+        size, but the driver requires complex operations in the datapth. If you
+        have such requirement, please contact the maintainers.
diff --git a/doc/guides/dmadevs/index.rst b/doc/guides/dmadevs/index.rst
index 15ddaf5192..dcc8c189ba 100644
--- a/doc/guides/dmadevs/index.rst
+++ b/doc/guides/dmadevs/index.rst
@@ -11,6 +11,7 @@ an application through DMA API.
    :maxdepth: 1
    :numbered:
 
+   acc
    cnxk
    dpaa
    dpaa2
diff --git a/doc/guides/rel_notes/release_25_11.rst b/doc/guides/rel_notes/release_25_11.rst
index c3b94e1896..8d19eff2bc 100644
--- a/doc/guides/rel_notes/release_25_11.rst
+++ b/doc/guides/rel_notes/release_25_11.rst
@@ -60,6 +60,12 @@ New Features
   Added Ethernet link speed for 800 Gb/s as it is well standardized in IEEE,
   and some devices already support this speed.
 
+* **Add Hisilicon Accelerator DMA Driver.**
+
+  Kunpeng SoC has an internal accelerator unit which includes zip function,
+  and the zip also support data copy and fill. This driver exposes this
+  capability to DPDK application.
+
 * **Updated NXP DPAA2 ethernet driver.**
 
   * Enabled software taildrop for ordered queues.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH v4 4/4] dma/acc: add doc
  2025-10-13  9:11   ` [PATCH v4 4/4] dma/acc: add doc Chengwen Feng
@ 2025-10-15 10:05     ` Thomas Monjalon
  2025-10-15 11:10       ` fengchengwen
  0 siblings, 1 reply; 37+ messages in thread
From: Thomas Monjalon @ 2025-10-15 10:05 UTC (permalink / raw)
  To: Chengwen Feng; +Cc: dev, liuyonglong

13/10/2025 11:11, Chengwen Feng:
> +HISILICON Accelerator DMA Driver
> +================================
> +
> +Kunpeng SoC has an internal accelerator unit which includes zip function, and
> +the zip also supports data copy and fill. This driver exposes this capability to
> +DPDK application.

How is it different of drivers/dma/hisilicon/ ?

"
Kunpeng SoC has an internal DMA unit which can be used by application
to accelerate data copies.
"

If it is the same hardware (Kunpeng),
I recommend having a single driver, or a single directory.
Also we already have a driver named "acc" in baseband.



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v4 4/4] dma/acc: add doc
  2025-10-15 10:05     ` Thomas Monjalon
@ 2025-10-15 11:10       ` fengchengwen
  2025-10-15 12:01         ` Thomas Monjalon
  0 siblings, 1 reply; 37+ messages in thread
From: fengchengwen @ 2025-10-15 11:10 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev, liuyonglong

On 2025/10/15 18:05, Thomas Monjalon wrote:
> 13/10/2025 11:11, Chengwen Feng:
>> +HISILICON Accelerator DMA Driver
>> +================================
>> +
>> +Kunpeng SoC has an internal accelerator unit which includes zip function, and
>> +the zip also supports data copy and fill. This driver exposes this capability to
>> +DPDK application.
> How is it different of drivers/dma/hisilicon/ ?
>
> "
> Kunpeng SoC has an internal DMA unit which can be used by application
> to accelerate data copies.
> "
>
> If it is the same hardware (Kunpeng),
> I recommend having a single driver, or a single directory.
> Also we already have a driver named "acc" in baseband.


they belong to different hardware IP of Kunpeng platform, /dma/hisilicon 
is the PCIE subsystem DMA's driver, while this acc is the 
Accelerator(ZIP) subsystem DMA's driver.  they don't exist in the same 
specific Kunpeng Soc.

a single driver will add complex.

as for a single directory, should we do for all vendor?  I see there 
are  dpaa and dpaa2, ioat and idxd both belong to vendor platform.

if there are no rule to have same name with different subsystem driver, 
I prefer keep it.


>
>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v4 4/4] dma/acc: add doc
  2025-10-15 11:10       ` fengchengwen
@ 2025-10-15 12:01         ` Thomas Monjalon
  2025-10-15 12:55           ` fengchengwen
  0 siblings, 1 reply; 37+ messages in thread
From: Thomas Monjalon @ 2025-10-15 12:01 UTC (permalink / raw)
  To: fengchengwen; +Cc: dev, liuyonglong

15/10/2025 13:10, fengchengwen:
> On 2025/10/15 18:05, Thomas Monjalon wrote:
> > 13/10/2025 11:11, Chengwen Feng:
> >> +HISILICON Accelerator DMA Driver
> >> +================================
> >> +
> >> +Kunpeng SoC has an internal accelerator unit which includes zip function, and
> >> +the zip also supports data copy and fill. This driver exposes this capability to
> >> +DPDK application.
> > How is it different of drivers/dma/hisilicon/ ?
> >
> > "
> > Kunpeng SoC has an internal DMA unit which can be used by application
> > to accelerate data copies.
> > "
> >
> > If it is the same hardware (Kunpeng),
> > I recommend having a single driver, or a single directory.
> > Also we already have a driver named "acc" in baseband.
> 
> 
> they belong to different hardware IP of Kunpeng platform, /dma/hisilicon 
> is the PCIE subsystem DMA's driver, while this acc is the 
> Accelerator(ZIP) subsystem DMA's driver.  they don't exist in the same 
> specific Kunpeng Soc.
> 
> a single driver will add complex.
> 
> as for a single directory, should we do for all vendor?  I see there 
> are  dpaa and dpaa2, ioat and idxd both belong to vendor platform.

When it is really different driver, it is OK to keep them separate.

> if there are no rule to have same name with different subsystem driver, 
> I prefer keep it.

OK but why it does not contain HiSilicon or Kunpeng in its name?
"acc" is a really short name, and already used.
Please find something else.



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v4 4/4] dma/acc: add doc
  2025-10-15 12:01         ` Thomas Monjalon
@ 2025-10-15 12:55           ` fengchengwen
  2025-10-15 14:09             ` Thomas Monjalon
  0 siblings, 1 reply; 37+ messages in thread
From: fengchengwen @ 2025-10-15 12:55 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev, liuyonglong

On 2025/10/15 20:01, Thomas Monjalon wrote:
> 15/10/2025 13:10, fengchengwen:
>> On 2025/10/15 18:05, Thomas Monjalon wrote:
>>> 13/10/2025 11:11, Chengwen Feng:
>>>> +HISILICON Accelerator DMA Driver
>>>> +================================
>>>> +
>>>> +Kunpeng SoC has an internal accelerator unit which includes zip function, and
>>>> +the zip also supports data copy and fill. This driver exposes this capability to
>>>> +DPDK application.
>>> How is it different of drivers/dma/hisilicon/ ?
>>>
>>> "
>>> Kunpeng SoC has an internal DMA unit which can be used by application
>>> to accelerate data copies.
>>> "
>>>
>>> If it is the same hardware (Kunpeng),
>>> I recommend having a single driver, or a single directory.
>>> Also we already have a driver named "acc" in baseband.
>>
>> they belong to different hardware IP of Kunpeng platform, /dma/hisilicon
>> is the PCIE subsystem DMA's driver, while this acc is the
>> Accelerator(ZIP) subsystem DMA's driver.  they don't exist in the same
>> specific Kunpeng Soc.
>>
>> a single driver will add complex.
>>
>> as for a single directory, should we do for all vendor?  I see there
>> are  dpaa and dpaa2, ioat and idxd both belong to vendor platform.
> When it is really different driver, it is OK to keep them separate.
>
>> if there are no rule to have same name with different subsystem driver,
>> I prefer keep it.
> OK but why it does not contain HiSilicon or Kunpeng in its name?
> "acc" is a really short name, and already used.
> Please find something else.

how about hacc and kacc ?   Hisilicon / Kunpeng accelerator DMA

I will send v5 with one of them

>
>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v4 4/4] dma/acc: add doc
  2025-10-15 12:55           ` fengchengwen
@ 2025-10-15 14:09             ` Thomas Monjalon
  2025-10-25  1:07               ` fengchengwen
  0 siblings, 1 reply; 37+ messages in thread
From: Thomas Monjalon @ 2025-10-15 14:09 UTC (permalink / raw)
  To: fengchengwen; +Cc: dev, liuyonglong

15/10/2025 14:55, fengchengwen:
> On 2025/10/15 20:01, Thomas Monjalon wrote:
> > 15/10/2025 13:10, fengchengwen:
> >> On 2025/10/15 18:05, Thomas Monjalon wrote:
> >>> 13/10/2025 11:11, Chengwen Feng:
> >>>> +HISILICON Accelerator DMA Driver
> >>>> +================================
> >>>> +
> >>>> +Kunpeng SoC has an internal accelerator unit which includes zip function, and
> >>>> +the zip also supports data copy and fill. This driver exposes this capability to
> >>>> +DPDK application.
> >>> How is it different of drivers/dma/hisilicon/ ?
> >>>
> >>> "
> >>> Kunpeng SoC has an internal DMA unit which can be used by application
> >>> to accelerate data copies.
> >>> "
> >>>
> >>> If it is the same hardware (Kunpeng),
> >>> I recommend having a single driver, or a single directory.
> >>> Also we already have a driver named "acc" in baseband.
> >>
> >> they belong to different hardware IP of Kunpeng platform, /dma/hisilicon
> >> is the PCIE subsystem DMA's driver, while this acc is the
> >> Accelerator(ZIP) subsystem DMA's driver.  they don't exist in the same
> >> specific Kunpeng Soc.
> >>
> >> a single driver will add complex.
> >>
> >> as for a single directory, should we do for all vendor?  I see there
> >> are  dpaa and dpaa2, ioat and idxd both belong to vendor platform.
> > When it is really different driver, it is OK to keep them separate.
> >
> >> if there are no rule to have same name with different subsystem driver,
> >> I prefer keep it.
> > OK but why it does not contain HiSilicon or Kunpeng in its name?
> > "acc" is a really short name, and already used.
> > Please find something else.
> 
> how about hacc and kacc ?   Hisilicon / Kunpeng accelerator DMA

You already have one DMA driver in drivers/dma/hisilicon/
Can you build around that? like hisilicon_something?
hisil_acc?



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH v5 0/4] add Hisilicon accelerator DMA driver
  2025-08-27  9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                   ` (6 preceding siblings ...)
  2025-10-13  9:11 ` [PATCH v4 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
@ 2025-10-16  1:52 ` Chengwen Feng
  2025-10-16  1:52   ` [PATCH v5 1/4] dma/hisi_acc: add probe and remove Chengwen Feng
                     ` (3 more replies)
  2025-10-16  2:53 ` [PATCH v6 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  8 siblings, 4 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-16  1:52 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This patchset adds Hisilicon accelerator DMA driver which based on
UACCE bus.

Chengwen Feng (4):
  dma/hisi_acc: add probe and remove
  dma/hisi_acc: add control path ops
  dma/hisi_acc: add data path ops
  dma/hisi_acc: add doc

---
v5: rename driver name to hisi_acc
v4: start/stop support detect sq/cq-status.
    add wait requests complete in stop.
v3: fix remove wrong dmadev by add prefix match.
    use proper errcode when get avail queue fail and dmadev alloc fail.
v2: fix magic number.
    support clean write back field when submit request.

 MAINTAINERS                            |   5 +
 doc/guides/dmadevs/hisi_acc.rst        |  63 ++
 doc/guides/dmadevs/index.rst           |   1 +
 doc/guides/rel_notes/release_25_11.rst |   6 +
 drivers/dma/hisi_acc/hisi_acc_dmadev.c | 774 +++++++++++++++++++++++++
 drivers/dma/hisi_acc/hisi_acc_dmadev.h | 169 ++++++
 drivers/dma/hisi_acc/meson.build       |  21 +
 drivers/dma/meson.build                |   1 +
 8 files changed, 1040 insertions(+)
 create mode 100644 doc/guides/dmadevs/hisi_acc.rst
 create mode 100644 drivers/dma/hisi_acc/hisi_acc_dmadev.c
 create mode 100644 drivers/dma/hisi_acc/hisi_acc_dmadev.h
 create mode 100644 drivers/dma/hisi_acc/meson.build

-- 
2.17.1


^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH v5 1/4] dma/hisi_acc: add probe and remove
  2025-10-16  1:52 ` [PATCH v5 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
@ 2025-10-16  1:52   ` Chengwen Feng
  2025-10-16  1:52   ` [PATCH v5 2/4] dma/hisi_acc: add control path ops Chengwen Feng
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-16  1:52 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This patch adds probe and remove operation for Hisilicon accelerator DMA
driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                            |   4 +
 drivers/dma/hisi_acc/hisi_acc_dmadev.c | 289 +++++++++++++++++++++++++
 drivers/dma/hisi_acc/hisi_acc_dmadev.h |  55 +++++
 drivers/dma/hisi_acc/meson.build       |  21 ++
 drivers/dma/meson.build                |   1 +
 5 files changed, 370 insertions(+)
 create mode 100644 drivers/dma/hisi_acc/hisi_acc_dmadev.c
 create mode 100644 drivers/dma/hisi_acc/hisi_acc_dmadev.h
 create mode 100644 drivers/dma/hisi_acc/meson.build

diff --git a/MAINTAINERS b/MAINTAINERS
index 1a2729be66..b5e3479203 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1366,6 +1366,10 @@ M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/hisilicon/
 F: doc/guides/dmadevs/hisilicon.rst
 
+HiSilicon Accelerator DMA
+M: Chengwen Feng <fengchengwen@huawei.com>
+F: drivers/dma/hisi_acc/
+
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
 T: git://dpdk.org/next/dpdk-next-net-mrvl
diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.c b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
new file mode 100644
index 0000000000..db6b8e8ba8
--- /dev/null
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <rte_byteorder.h>
+#include <rte_eal.h>
+#include <rte_io.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+
+#include <rte_dmadev_pmd.h>
+
+#include "hisi_acc_dmadev.h"
+
+RTE_LOG_REGISTER_DEFAULT(hacc_dma_logtype, INFO);
+#define RTE_LOGTYPE_HACC_DMA hacc_dma_logtype
+#define HACC_DMA_LOG(level, ...) \
+	RTE_LOG_LINE_PREFIX(level, HACC_DMA, "%s(): ", __func__, __VA_ARGS__)
+#define HACC_DMA_DEV_LOG(hw, level, ...) \
+	RTE_LOG_LINE_PREFIX(level, HACC_DMA, "%s %s(): ", \
+		(hw)->data->dev_name RTE_LOG_COMMA __func__, __VA_ARGS__)
+#define HACC_DMA_DEBUG(hw, ...) \
+	HACC_DMA_DEV_LOG(hw, DEBUG, __VA_ARGS__)
+#define HACC_DMA_INFO(hw, ...) \
+	HACC_DMA_DEV_LOG(hw, INFO, __VA_ARGS__)
+#define HACC_DMA_WARN(hw, ...) \
+	HACC_DMA_DEV_LOG(hw, WARNING, __VA_ARGS__)
+#define HACC_DMA_ERR(hw, ...) \
+	HACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
+
+static void
+hacc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
+		      uint16_t queue_id, char *dev_name, size_t size)
+{
+	memset(dev_name, 0, size);
+	(void)snprintf(dev_name, size, "%s-dma%u", uacce_dev->device.name, queue_id);
+}
+
+static void
+hacc_dma_gen_dev_prefix(const struct rte_uacce_device *uacce_dev, char *dev_name, size_t size)
+{
+	memset(dev_name, 0, size);
+	(void)snprintf(dev_name, size, "%s-dma", uacce_dev->device.name);
+}
+
+static int
+hacc_dma_get_qp_info(struct hacc_dma_dev *hw)
+{
+#define CMD_QM_GET_QP_CTX	_IOWR('H', 10, struct hacc_dma_qp_contex)
+#define CMD_QM_GET_QP_INFO	_IOWR('H', 11, struct hacc_dma_qp_info)
+#define QP_ALG_TYPE		2
+	struct hacc_dma_qp_contex {
+		uint16_t id;
+		uint16_t qc_type;
+	} qp_ctx;
+	struct hacc_dma_qp_info {
+		uint32_t sqe_size;
+		uint16_t sq_depth;
+		uint16_t cq_depth;
+		uint64_t reserved;
+	} qp_info;
+	int ret;
+
+	memset(&qp_ctx, 0, sizeof(qp_ctx));
+	qp_ctx.qc_type = QP_ALG_TYPE;
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_CTX, &qp_ctx);
+	if (ret != 0) {
+		HACC_DMA_ERR(hw, "get qm qp context fail!");
+		return -EINVAL;
+	}
+	hw->sqn = qp_ctx.id;
+
+	memset(&qp_info, 0, sizeof(qp_info));
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_INFO, &qp_info);
+	if (ret != 0) {
+		HACC_DMA_ERR(hw, "get qm qp info fail!");
+		return -EINVAL;
+	}
+	if ((qp_info.sq_depth & (qp_info.sq_depth - 1)) != 0) {
+		HACC_DMA_ERR(hw, "sq depth is not 2's power!");
+		return -EINVAL;
+	}
+	hw->sqe_size = qp_info.sqe_size;
+	hw->sq_depth = qp_info.sq_depth;
+	hw->cq_depth = qp_info.cq_depth;
+	hw->sq_depth_mask = hw->sq_depth - 1;
+
+	return 0;
+}
+
+static int
+hacc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
+{
+	char name[RTE_DEV_NAME_MAX_LEN];
+	struct rte_dma_dev *dev;
+	struct hacc_dma_dev *hw;
+	int ret;
+
+	hacc_dma_gen_dev_name(uacce_dev, queue_id, name, sizeof(name));
+	dev = rte_dma_pmd_allocate(name, uacce_dev->device.numa_node,
+				   sizeof(struct hacc_dma_dev));
+	if (dev == NULL) {
+		HACC_DMA_LOG(ERR, "%s allocate dmadev fail!", name);
+		return -ENOMEM;
+	}
+
+	dev->device = &uacce_dev->device;
+	dev->fp_obj->dev_private = dev->data->dev_private;
+
+	hw = dev->data->dev_private;
+	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
+
+	ret = rte_uacce_queue_alloc(uacce_dev, &hw->qctx);
+	if (ret != 0) {
+		HACC_DMA_ERR(hw, "alloc queue fail!");
+		goto release_dma_pmd;
+	}
+
+	ret = hacc_dma_get_qp_info(hw);
+	if (ret != 0)
+		goto free_uacce_queue;
+
+	hw->io_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	if (hw->io_base == NULL) {
+		HACC_DMA_ERR(hw, "mmap MMIO region fail!");
+		ret = -EINVAL;
+		goto free_uacce_queue;
+	}
+	hw->doorbell_reg = (void *)((uintptr_t)hw->io_base + HACC_DMA_DOORBELL_OFFSET);
+
+	hw->dus_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	if (hw->dus_base == NULL) {
+		HACC_DMA_ERR(hw, "mmap DUS region fail!");
+		ret = -EINVAL;
+		goto unmap_mmio;
+	}
+	hw->sqe = hw->dus_base;
+	hw->cqe = (void *)((uintptr_t)hw->dus_base + hw->sqe_size * hw->sq_depth);
+	hw->sq_status = (uint32_t *)((uintptr_t)hw->dus_base +
+			uacce_dev->qfrt_sz[RTE_UACCE_QFRT_DUS] - sizeof(uint32_t));
+	hw->cq_status = hw->sq_status - 1;
+
+	hw->status = rte_zmalloc_socket(NULL, sizeof(uint16_t) * hw->sq_depth,
+					RTE_CACHE_LINE_SIZE, uacce_dev->numa_node);
+	if (hw->status == NULL) {
+		HACC_DMA_ERR(hw, "malloc status region fail!");
+		ret = -ENOMEM;
+		goto unmap_dus;
+	}
+
+	dev->state = RTE_DMA_DEV_READY;
+	HACC_DMA_DEBUG(hw, "create dmadev %s success!", name);
+
+	return 0;
+
+unmap_dus:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+unmap_mmio:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+free_uacce_queue:
+	rte_uacce_queue_free(&hw->qctx);
+release_dma_pmd:
+	rte_dma_pmd_release(name);
+	return ret;
+}
+
+static int
+hacc_dma_parse_queues(const char *key, const char *value, void *extra_args)
+{
+	struct hacc_dma_config *config = extra_args;
+	uint64_t val;
+	char *end;
+
+	RTE_SET_USED(key);
+
+	errno = 0;
+	val = strtoull(value, &end, 0);
+	if (errno == ERANGE || value == end || *end != '\0' || val == 0) {
+		HACC_DMA_LOG(ERR, "%s invalid queues! set to default one queue!",
+			    config->dev->name);
+		config->queues = HACC_DMA_DEFAULT_QUEUES;
+	} else if (val > config->avail_queues) {
+		HACC_DMA_LOG(WARNING, "%s exceed available queues! set to available queues %u",
+			     config->dev->name, config->avail_queues);
+		config->queues = config->avail_queues;
+	} else {
+		config->queues = val;
+	}
+
+	return 0;
+}
+
+static int
+hacc_dma_parse_devargs(struct rte_uacce_device *uacce_dev, struct hacc_dma_config *config)
+{
+	struct rte_kvargs *kvlist;
+	int avail_queues;
+
+	avail_queues = rte_uacce_avail_queues(uacce_dev);
+	if (avail_queues <= 0) {
+		HACC_DMA_LOG(ERR, "%s don't have available queues!", uacce_dev->name);
+		return -EINVAL;
+	}
+	config->dev = uacce_dev;
+	config->avail_queues = avail_queues <= UINT16_MAX ? avail_queues : UINT16_MAX;
+
+	if (uacce_dev->device.devargs == NULL)
+		return 0;
+
+	kvlist = rte_kvargs_parse(uacce_dev->device.devargs->args, NULL);
+	if (kvlist == NULL)
+		return 0;
+
+	(void)rte_kvargs_process(kvlist, HACC_DMA_DEVARG_QUEUES, &hacc_dma_parse_queues, config);
+
+	rte_kvargs_free(kvlist);
+
+	return 0;
+}
+
+static int
+hacc_dma_probe(struct rte_uacce_driver *dr, struct rte_uacce_device *uacce_dev)
+{
+	struct hacc_dma_config config = { .queues = HACC_DMA_DEFAULT_QUEUES };
+	int ret = 0;
+	uint32_t i;
+
+	RTE_SET_USED(dr);
+
+	ret = hacc_dma_parse_devargs(uacce_dev, &config);
+	if (ret != 0)
+		return ret;
+
+	for (i = 0; i < config.queues; i++) {
+		ret = hacc_dma_create(uacce_dev, i);
+		if (ret != 0) {
+			HACC_DMA_LOG(ERR, "%s create dmadev No.%u failed!", uacce_dev->name, i);
+			break;
+		}
+	}
+
+	if (ret != 0 && i > 0) {
+		HACC_DMA_LOG(WARNING, "%s probed %u dmadev, can't probe more!", uacce_dev->name, i);
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int
+hacc_dma_remove(struct rte_uacce_device *uacce_dev)
+{
+	char name[RTE_DEV_NAME_MAX_LEN];
+	struct rte_dma_info info;
+	int i = 0;
+	int ret;
+
+	hacc_dma_gen_dev_prefix(uacce_dev, name, sizeof(name));
+	RTE_DMA_FOREACH_DEV(i) {
+		ret = rte_dma_info_get(i, &info);
+		if (ret != 0)
+			continue;
+		if (strncmp(info.dev_name, name, strlen(name)) == 0)
+			rte_dma_pmd_release(info.dev_name);
+	}
+
+	return 0;
+}
+
+static const struct rte_uacce_id hacc_dma_id_table[] = {
+	{ "hisi_qm_v5", "udma" },
+	{ .dev_api = NULL, },
+};
+
+static struct rte_uacce_driver hacc_dma_pmd_drv = {
+	.id_table = hacc_dma_id_table,
+	.probe    = hacc_dma_probe,
+	.remove   = hacc_dma_remove,
+};
+
+RTE_PMD_REGISTER_UACCE(dma_hisi_acc, hacc_dma_pmd_drv);
+RTE_PMD_REGISTER_PARAM_STRING(dma_hisi_acc,
+			HACC_DMA_DEVARG_QUEUES "=<uint16> ");
diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.h b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
new file mode 100644
index 0000000000..3a790bb445
--- /dev/null
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#ifndef HISI_ACC_DMADEV_H
+#define HISI_ACC_DMADEV_H
+
+#include <bus_uacce_driver.h>
+#include <rte_bitops.h>
+#include <rte_common.h>
+#include <rte_dmadev_pmd.h>
+
+#define HACC_DMA_DEVARG_QUEUES		"queues"
+#define HACC_DMA_DEFAULT_QUEUES		1
+
+struct hacc_dma_config {
+	uint16_t queues;
+
+	/* The following fields are config contexts. */
+	struct rte_uacce_device *dev;
+	uint16_t avail_queues;
+};
+
+#define HACC_DMA_DOORBELL_OFFSET		0x1000u
+
+struct hacc_dma_sqe {};
+struct hacc_dma_cqe {};
+
+struct hacc_dma_dev {
+	struct hacc_dma_sqe *sqe;
+	struct hacc_dma_cqe *cqe;
+	uint16_t *status;             /* the completion status array of SQEs. */
+
+	volatile void *doorbell_reg;  /**< register address for doorbell. */
+	volatile uint32_t *sq_status; /**< SQ status pointer. */
+	volatile uint32_t *cq_status; /**< CQ status pointer. */
+
+	uint16_t sqn;           /**< SQ global number, inited when created. */
+	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
+
+	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+
+	/**
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	struct rte_dma_dev_data *data;
+	struct rte_uacce_qcontex qctx;
+	void *io_base;
+	void *dus_base;
+	uint32_t sqe_size;
+	uint16_t sq_depth;
+};
+
+#endif /* HISI_ACC_DMADEV_H */
diff --git a/drivers/dma/hisi_acc/meson.build b/drivers/dma/hisi_acc/meson.build
new file mode 100644
index 0000000000..f5a75777fa
--- /dev/null
+++ b/drivers/dma/hisi_acc/meson.build
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+if not is_linux
+    build = false
+    reason = 'only supported on Linux'
+    subdir_done()
+endif
+
+if (arch_subdir != 'x86' and arch_subdir != 'arm') or (not dpdk_conf.get('RTE_ARCH_64'))
+    build = false
+    reason = 'only supported on x86_64 and aarch64'
+    subdir_done()
+endif
+
+deps += ['bus_uacce', 'dmadev']
+sources = files(
+        'hisi_acc_dmadev.c',
+)
+
+require_iova_in_mbuf = false
diff --git a/drivers/dma/meson.build b/drivers/dma/meson.build
index 358132759a..194e08e0b1 100644
--- a/drivers/dma/meson.build
+++ b/drivers/dma/meson.build
@@ -5,6 +5,7 @@ drivers = [
         'cnxk',
         'dpaa',
         'dpaa2',
+        'hisi_acc',
         'hisilicon',
         'idxd',
         'ioat',
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 2/4] dma/hisi_acc: add control path ops
  2025-10-16  1:52 ` [PATCH v5 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-10-16  1:52   ` [PATCH v5 1/4] dma/hisi_acc: add probe and remove Chengwen Feng
@ 2025-10-16  1:52   ` Chengwen Feng
  2025-10-16  1:52   ` [PATCH v5 3/4] dma/hisi_acc: add data " Chengwen Feng
  2025-10-16  1:52   ` [PATCH v5 4/4] dma/hisi_acc: add doc Chengwen Feng
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-16  1:52 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds control path ops for Hisilicon accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/hisi_acc/hisi_acc_dmadev.c | 185 +++++++++++++++++++++++++
 drivers/dma/hisi_acc/hisi_acc_dmadev.h |  42 ++++++
 2 files changed, 227 insertions(+)

diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.c b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
index db6b8e8ba8..00ccbf8426 100644
--- a/drivers/dma/hisi_acc/hisi_acc_dmadev.c
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
@@ -34,6 +34,190 @@ RTE_LOG_REGISTER_DEFAULT(hacc_dma_logtype, INFO);
 #define HACC_DMA_ERR(hw, ...) \
 	HACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
 
+static int
+hacc_dma_info_get(const struct rte_dma_dev *dev,
+		  struct rte_dma_info *dev_info,
+		  uint32_t info_sz)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(info_sz);
+
+	dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
+			     RTE_DMA_CAPA_SVA |
+			     RTE_DMA_CAPA_OPS_COPY |
+			     RTE_DMA_CAPA_OPS_FILL;
+	dev_info->max_vchans = 1;
+	dev_info->max_desc = hw->sq_depth;
+	dev_info->min_desc = hw->sq_depth;
+
+	return 0;
+}
+
+static int
+hacc_dma_configure(struct rte_dma_dev *dev,
+		   const struct rte_dma_conf *conf,
+		   uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+hacc_dma_start(struct rte_dma_dev *dev)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+	int ret;
+
+	if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
+		HACC_DMA_ERR(hw, "detect dev is abnormal!");
+		return -EIO;
+	}
+
+	if (hw->started) {
+		hw->ridx = 0;
+		hw->cridx = 0;
+		return 0;
+	}
+
+	memset(hw->sqe, 0, hw->sqe_size * hw->sq_depth);
+	memset(hw->cqe, 0, sizeof(struct hacc_dma_cqe) * hw->cq_depth);
+	memset(hw->status, 0, sizeof(uint16_t) * hw->sq_depth);
+	hw->ridx = 0;
+	hw->cridx = 0;
+	hw->sq_head = 0;
+	hw->sq_tail = 0;
+	hw->cq_sq_head = 0;
+	hw->avail_sqes = hw->sq_depth - HACC_DMA_SQ_GAP_NUM - 1;
+	hw->cq_head = 0;
+	hw->cqs_completed = 0;
+	hw->cqe_vld = 1;
+	hw->submitted = 0;
+	hw->completed = 0;
+	hw->errors = 0;
+	hw->invalid_lens = 0;
+	hw->qfulls = 0;
+
+	ret = rte_uacce_queue_start(&hw->qctx);
+	if (ret == 0)
+		hw->started = true;
+
+	return ret;
+}
+
+static int
+hacc_dma_stop(struct rte_dma_dev *dev)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+
+	if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
+		/* This indicates that the dev is abnormal. The correct error handling
+		 * is to close the dev (so that kernel module will perform error handling)
+		 * and apply for a new dev.
+		 * If an error code is returned here, the dev cannot be closed. Therefore,
+		 * zero is returned and an error trace is added.
+		 */
+		HACC_DMA_ERR(hw, "detect dev is abnormal!");
+		return 0;
+	}
+
+	return 0;
+}
+
+static int
+hacc_dma_close(struct rte_dma_dev *dev)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+	/* The dmadev already stopped */
+	rte_free(hw->status);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	rte_uacce_queue_free(&hw->qctx);
+	return 0;
+}
+
+static int
+hacc_dma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
+		     const struct rte_dma_vchan_conf *conf,
+		     uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+hacc_dma_stats_get(const struct rte_dma_dev *dev, uint16_t vchan,
+		   struct rte_dma_stats *stats,
+		   uint32_t stats_sz)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(stats_sz);
+	stats->submitted = hw->submitted;
+	stats->completed = hw->completed;
+	stats->errors    = hw->errors;
+
+	return 0;
+}
+
+static int
+hacc_dma_stats_reset(struct rte_dma_dev *dev, uint16_t vchan)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	hw->submitted    = 0;
+	hw->completed    = 0;
+	hw->errors       = 0;
+	hw->invalid_lens = 0;
+	hw->io_errors    = 0;
+	hw->qfulls       = 0;
+
+	return 0;
+}
+
+static int
+hacc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+
+	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s\n"
+		"  sqe_size: %u sq_depth: %u sq_depth_mask: %u cq_depth: %u\n",
+		hw->sqn, (*hw->sq_status != 0) ? "ERR" : "OK",
+		(*hw->cq_status != 0) ? "ERR" : "OK",
+		hw->sqe_size, hw->sq_depth, hw->sq_depth_mask, hw->cq_depth);
+	fprintf(f, "  ridx: %u cridx: %u\n"
+		"  sq_head: %u sq_tail: %u cq_sq_head: %u avail_sqes: %u\n"
+		"  cq_head: %u cqs_completed: %u cqe_vld: %u\n",
+		hw->ridx, hw->cridx,
+		hw->sq_head, hw->sq_tail, hw->cq_sq_head, hw->avail_sqes,
+		hw->cq_head, hw->cqs_completed, hw->cqe_vld);
+	fprintf(f, "  submitted: %" PRIu64 " completed: %" PRIu64 " errors: %" PRIu64
+		" invalid_lens: %" PRIu64 " io_errors: %" PRIu64 " qfulls: %" PRIu64 "\n",
+		hw->submitted, hw->completed, hw->errors, hw->invalid_lens,
+		hw->io_errors, hw->qfulls);
+
+	return 0;
+}
+
+static const struct rte_dma_dev_ops hacc_dmadev_ops = {
+	.dev_info_get     = hacc_dma_info_get,
+	.dev_configure    = hacc_dma_configure,
+	.dev_start        = hacc_dma_start,
+	.dev_stop         = hacc_dma_stop,
+	.dev_close        = hacc_dma_close,
+	.vchan_setup      = hacc_dma_vchan_setup,
+	.stats_get        = hacc_dma_stats_get,
+	.stats_reset      = hacc_dma_stats_reset,
+	.dev_dump         = hacc_dma_dump,
+};
+
 static void
 hacc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
 		      uint16_t queue_id, char *dev_name, size_t size)
@@ -111,6 +295,7 @@ hacc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	}
 
 	dev->device = &uacce_dev->device;
+	dev->dev_ops = &hacc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
 
 	hw = dev->data->dev_private;
diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.h b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
index 3a790bb445..bc94b9d5db 100644
--- a/drivers/dma/hisi_acc/hisi_acc_dmadev.h
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
@@ -13,6 +13,9 @@
 #define HACC_DMA_DEVARG_QUEUES		"queues"
 #define HACC_DMA_DEFAULT_QUEUES		1
 
+#define HACC_DMA_CQ_DOORBELL_PACE	64
+#define HACC_DMA_SQ_GAP_NUM		HACC_DMA_CQ_DOORBELL_PACE
+
 struct hacc_dma_config {
 	uint16_t queues;
 
@@ -38,7 +41,45 @@ struct hacc_dma_dev {
 	uint16_t sqn;           /**< SQ global number, inited when created. */
 	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
 
+	uint16_t ridx;  /**< ring index which will assign to the next request. */
+	uint16_t cridx; /**< ring index which returned by completed APIs. */
+
+	/**
+	 * SQE array management fields:
+	 *
+	 *  -----------------------------------------------------
+	 *  | SQE0 | SQE1 | SQE2 |   ...  | SQEx | ... | SQEn-1 |
+	 *  -----------------------------------------------------
+	 *     ^             ^               ^
+	 *     |             |               |
+	 *   sq_head     cq_sq_head       sq_tail
+	 *
+	 *  sq_head: next index to the oldest completed request, this filed was
+	 *           updated by completed* APIs.
+	 *  sq_tail: index of the next new request, this field was updated by
+	 *           copy or fill API.
+	 *  cq_sq_head: next index of index that has been completed by hardware,
+	 *              this filed was updated by completed* APIs.
+	 *
+	 *  [sq_head, cq_sq_head): the SQEs that hardware already completed.
+	 *  [cq_sq_head, sq_tail): the SQEs that hardware processing.
+	 */
+	uint16_t sq_head;
+	uint16_t sq_tail;
+	uint16_t cq_sq_head;
+	uint16_t avail_sqes;
+
 	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+	uint16_t cq_head;       /**< CQ index for next scans. */
+	uint16_t cqs_completed; /**< accumulated number of completed CQs. */
+	uint8_t  cqe_vld;       /**< valid bit for CQE, will change for every round. */
+
+	uint64_t submitted;
+	uint64_t completed;
+	uint64_t errors;
+	uint64_t invalid_lens;
+	uint64_t io_errors;
+	uint64_t qfulls;
 
 	/**
 	 * The following fields are not accessed in the I/O path, so they are
@@ -50,6 +91,7 @@ struct hacc_dma_dev {
 	void *dus_base;
 	uint32_t sqe_size;
 	uint16_t sq_depth;
+	bool started;
 };
 
 #endif /* HISI_ACC_DMADEV_H */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 3/4] dma/hisi_acc: add data path ops
  2025-10-16  1:52 ` [PATCH v5 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-10-16  1:52   ` [PATCH v5 1/4] dma/hisi_acc: add probe and remove Chengwen Feng
  2025-10-16  1:52   ` [PATCH v5 2/4] dma/hisi_acc: add control path ops Chengwen Feng
@ 2025-10-16  1:52   ` Chengwen Feng
  2025-10-16  1:52   ` [PATCH v5 4/4] dma/hisi_acc: add doc Chengwen Feng
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-16  1:52 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds data path ops for Hisilicon accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/hisi_acc/hisi_acc_dmadev.c | 322 ++++++++++++++++++++++++-
 drivers/dma/hisi_acc/hisi_acc_dmadev.h |  76 +++++-
 2 files changed, 385 insertions(+), 13 deletions(-)

diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.c b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
index 00ccbf8426..b9767d4a08 100644
--- a/drivers/dma/hisi_acc/hisi_acc_dmadev.c
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
@@ -8,6 +8,7 @@
 #include <sys/ioctl.h>
 
 #include <rte_byteorder.h>
+#include <rte_cycles.h>
 #include <rte_eal.h>
 #include <rte_io.h>
 #include <rte_kvargs.h>
@@ -79,6 +80,7 @@ hacc_dma_start(struct rte_dma_dev *dev)
 	if (hw->started) {
 		hw->ridx = 0;
 		hw->cridx = 0;
+		hw->stop_proc = 0;
 		return 0;
 	}
 
@@ -94,6 +96,7 @@ hacc_dma_start(struct rte_dma_dev *dev)
 	hw->cq_head = 0;
 	hw->cqs_completed = 0;
 	hw->cqe_vld = 1;
+	hw->stop_proc = 0;
 	hw->submitted = 0;
 	hw->completed = 0;
 	hw->errors = 0;
@@ -110,17 +113,33 @@ hacc_dma_start(struct rte_dma_dev *dev)
 static int
 hacc_dma_stop(struct rte_dma_dev *dev)
 {
+#define MAX_WAIT_MSEC	1000
+#define MAX_CPL_NUM	64
 	struct hacc_dma_dev *hw = dev->data->dev_private;
-
-	if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
-		/* This indicates that the dev is abnormal. The correct error handling
-		 * is to close the dev (so that kernel module will perform error handling)
-		 * and apply for a new dev.
-		 * If an error code is returned here, the dev cannot be closed. Therefore,
-		 * zero is returned and an error trace is added.
-		 */
-		HACC_DMA_ERR(hw, "detect dev is abnormal!");
-		return 0;
+	uint32_t wait_msec = 0;
+
+	/* Flag stop processing new requests. */
+	hw->stop_proc = 1;
+	/* Currently, there is no method to notify the hardware to stop.
+	 * Therefore, the timeout mechanism is used to wait for the dataplane
+	 * to stop.
+	 */
+	while (hw->sq_head != hw->sq_tail && wait_msec++ < MAX_WAIT_MSEC) {
+		if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
+			/* This indicates that the dev is abnormal. The correct error handling
+			 * is to close the dev (so that kernel module will perform error handling)
+			 * and apply for a new dev.
+			 * If an error code is returned here, the dev cannot be closed. Therefore,
+			 * zero is returned and an error trace is added.
+			 */
+			HACC_DMA_ERR(hw, "detect dev is abnormal!");
+			return 0;
+		}
+		rte_delay_ms(1);
+	}
+	if (hw->sq_head != hw->sq_tail) {
+		HACC_DMA_ERR(hw, "dev is still active!");
+		return -EBUSY;
 	}
 
 	return 0;
@@ -187,10 +206,11 @@ hacc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
 {
 	struct hacc_dma_dev *hw = dev->data->dev_private;
 
-	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s\n"
+	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s stop_proc: %u\n"
 		"  sqe_size: %u sq_depth: %u sq_depth_mask: %u cq_depth: %u\n",
 		hw->sqn, (*hw->sq_status != 0) ? "ERR" : "OK",
 		(*hw->cq_status != 0) ? "ERR" : "OK",
+		hw->stop_proc,
 		hw->sqe_size, hw->sq_depth, hw->sq_depth_mask, hw->cq_depth);
 	fprintf(f, "  ridx: %u cridx: %u\n"
 		"  sq_head: %u sq_tail: %u cq_sq_head: %u avail_sqes: %u\n"
@@ -206,6 +226,280 @@ hacc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
 	return 0;
 }
 
+static inline void
+hacc_dma_sq_doorbell(struct hacc_dma_dev *hw)
+{
+	uint64_t doorbell = (uint64_t)(hw->sqn & HACC_DMA_DOORBELL_SQN_MASK) |
+			    (HACC_DMA_DOORBELL_SQ_CMD << HACC_DMA_DOORBELL_CMD_SHIFT) |
+			    (((uint64_t)hw->sq_tail) << HACC_DMA_DOORBELL_IDX_SHIFT);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static int
+hacc_dma_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,
+	      uint32_t length, uint64_t flags)
+{
+	struct hacc_dma_dev *hw = dev_private;
+	struct hacc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(hw->stop_proc > 0))
+		return -EPERM;
+
+	if (unlikely(length > HACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = HACC_DMA_SQE_TYPE;
+	sqe->task_type     = HACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = HACC_DMA_DATA_MEMCPY;
+	sqe->init_val      = 0;
+	sqe->addr_array    = src;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = HACC_DMA_SVA_PREFETCH_EN;
+	sqe->wb_field      = 0;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		hacc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+hacc_dma_fill(void *dev_private, uint16_t vchan, uint64_t pattern,
+	      rte_iova_t dst, uint32_t length, uint64_t flags)
+{
+	struct hacc_dma_dev *hw = dev_private;
+	struct hacc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(hw->stop_proc > 0))
+		return -EPERM;
+
+	if (unlikely(length > HACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = HACC_DMA_SQE_TYPE;
+	sqe->task_type     = HACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = HACC_DMA_DATA_MEMSET;
+	sqe->init_val      = pattern;
+	sqe->addr_array    = 0;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = HACC_DMA_SVA_PREFETCH_EN;
+	sqe->wb_field      = 0;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		hacc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+hacc_dma_submit(void *dev_private, uint16_t vchan)
+{
+	struct hacc_dma_dev *hw = dev_private;
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	hacc_dma_sq_doorbell(hw);
+
+	return 0;
+}
+
+static inline void
+hacc_dma_cq_doorbell(struct hacc_dma_dev *hw)
+{
+	uint64_t doorbell = (uint64_t)(hw->sqn & HACC_DMA_DOORBELL_SQN_MASK) |
+			    (HACC_DMA_DOORBELL_CQ_CMD << HACC_DMA_DOORBELL_CMD_SHIFT) |
+			    (((uint64_t)hw->cq_head) << HACC_DMA_DOORBELL_IDX_SHIFT);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static inline void
+hacc_dma_scan_cq(struct hacc_dma_dev *hw)
+{
+	volatile struct hacc_dma_cqe *cqe;
+	struct hacc_dma_sqe *sqe;
+	uint16_t csq_head = hw->cq_sq_head;
+	uint16_t cq_head = hw->cq_head;
+	uint16_t count = 0;
+	uint64_t misc;
+
+	if (unlikely(*hw->cq_status != 0)) {
+		hw->io_errors++;
+		return;
+	}
+
+	while (count < hw->cq_depth) {
+		cqe = &hw->cqe[cq_head];
+		misc = cqe->misc;
+		misc = rte_le_to_cpu_64(misc);
+		if (RTE_FIELD_GET64(HACC_DMA_CQE_VALID_B, misc) != hw->cqe_vld)
+			break;
+
+		csq_head = RTE_FIELD_GET64(HACC_DMA_SQ_HEAD_MASK, misc);
+		if (unlikely(csq_head > hw->sq_depth_mask)) {
+			/**
+			 * Defensive programming to prevent overflow of the
+			 * status array indexed by csq_head. Only error logs
+			 * are used for prompting.
+			 */
+			HACC_DMA_ERR(hw, "invalid csq_head: %u!", csq_head);
+			count = 0;
+			break;
+		}
+		sqe = &hw->sqe[csq_head];
+		if (sqe->done_flag != HACC_DMA_TASK_DONE ||
+			sqe->err_type || sqe->ext_err_type || sqe->wtype) {
+			hw->status[csq_head] = RTE_DMA_STATUS_ERROR_UNKNOWN;
+		}
+
+		count++;
+		cq_head++;
+		if (cq_head == hw->cq_depth) {
+			hw->cqe_vld = !hw->cqe_vld;
+			cq_head = 0;
+		}
+	}
+
+	if (count == 0)
+		return;
+
+	hw->cq_head = cq_head;
+	hw->cq_sq_head = (csq_head + 1) & hw->sq_depth_mask;
+	hw->avail_sqes += count;
+	hw->cqs_completed += count;
+	if (hw->cqs_completed >= HACC_DMA_CQ_DOORBELL_PACE) {
+		hacc_dma_cq_doorbell(hw);
+		hw->cqs_completed = 0;
+	}
+}
+
+static inline uint16_t
+hacc_dma_calc_cpls(struct hacc_dma_dev *hw, const uint16_t nb_cpls)
+{
+	uint16_t cpl_num;
+
+	if (hw->cq_sq_head >= hw->sq_head)
+		cpl_num = hw->cq_sq_head - hw->sq_head;
+	else
+		cpl_num = hw->sq_depth_mask + 1 - hw->sq_head + hw->cq_sq_head;
+
+	if (cpl_num > nb_cpls)
+		cpl_num = nb_cpls;
+
+	return cpl_num;
+}
+
+static uint16_t
+hacc_dma_completed(void *dev_private,
+		   uint16_t vchan, const uint16_t nb_cpls,
+		   uint16_t *last_idx, bool *has_error)
+{
+	struct hacc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	hacc_dma_scan_cq(hw);
+
+	cpl_num = hacc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		if (hw->status[sq_head]) {
+			*has_error = true;
+			break;
+		}
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + i - 1;
+	if (i > 0) {
+		hw->cridx += i;
+		hw->sq_head = sq_head;
+		hw->completed += i;
+	}
+
+	return i;
+}
+
+static uint16_t
+hacc_dma_completed_status(void *dev_private,
+			  uint16_t vchan, const uint16_t nb_cpls,
+			  uint16_t *last_idx, enum rte_dma_status_code *status)
+{
+	struct hacc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	hacc_dma_scan_cq(hw);
+
+	cpl_num = hacc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		status[i] = hw->status[sq_head];
+		hw->errors += !!status[i];
+		hw->status[sq_head] = 0;
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + cpl_num - 1;
+	if (likely(cpl_num > 0)) {
+		hw->cridx += cpl_num;
+		hw->sq_head = sq_head;
+		hw->completed += cpl_num;
+	}
+
+	return cpl_num;
+}
+
+static uint16_t
+hacc_dma_burst_capacity(const void *dev_private, uint16_t vchan)
+{
+	const struct hacc_dma_dev *hw = dev_private;
+	RTE_SET_USED(vchan);
+	return hw->avail_sqes;
+}
+
 static const struct rte_dma_dev_ops hacc_dmadev_ops = {
 	.dev_info_get     = hacc_dma_info_get,
 	.dev_configure    = hacc_dma_configure,
@@ -297,6 +591,12 @@ hacc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	dev->device = &uacce_dev->device;
 	dev->dev_ops = &hacc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
+	dev->fp_obj->copy = hacc_dma_copy;
+	dev->fp_obj->fill = hacc_dma_fill;
+	dev->fp_obj->submit = hacc_dma_submit;
+	dev->fp_obj->completed = hacc_dma_completed;
+	dev->fp_obj->completed_status = hacc_dma_completed_status;
+	dev->fp_obj->burst_capacity = hacc_dma_burst_capacity;
 
 	hw = dev->data->dev_private;
 	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.h b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
index bc94b9d5db..cb63b30642 100644
--- a/drivers/dma/hisi_acc/hisi_acc_dmadev.h
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
@@ -25,9 +25,80 @@ struct hacc_dma_config {
 };
 
 #define HACC_DMA_DOORBELL_OFFSET		0x1000u
+#define HACC_DMA_DOORBELL_SQN_MASK	0x3FFu
+#define HACC_DMA_DOORBELL_SQ_CMD		0ull
+#define HACC_DMA_DOORBELL_CQ_CMD		1ull
+#define HACC_DMA_DOORBELL_CMD_SHIFT	12
+#define HACC_DMA_DOORBELL_IDX_SHIFT	32
+
+#define HACC_DMA_TASK_TYPE		0x3
+#define HACC_DMA_SQE_TYPE		0x1
+#define HACC_DMA_SVA_PREFETCH_EN		RTE_BIT32(15)
+#define HACC_DMA_MAX_OP_SIZE		(RTE_BIT32(24) - 1)
+
+enum {
+	HACC_DMA_DATA_MEMCPY = 0,
+	HACC_DMA_DATA_MEMSET = 7,
+};
+
+enum {
+	HACC_DMA_TASK_DONE = 1,
+	HACC_DMA_TASK_ERROR,
+};
 
-struct hacc_dma_sqe {};
-struct hacc_dma_cqe {};
+struct hacc_dma_sqe {
+	uint32_t bd_type : 6;
+	uint32_t resv1 : 2;
+	uint32_t task_type : 6;
+	uint32_t resv2 : 2;
+	uint32_t task_type_ext : 6;
+	uint32_t resv3 : 9;
+	uint32_t bd_invlid : 1;
+	uint32_t rsv4[2];
+	uint32_t low_tag;
+	uint32_t hi_tag;
+	/* The number of bytes to be copied or filled for single address. */
+	uint32_t data_size;
+	uint32_t rsv5;
+	/*
+	 * 0 ~ 13 bits: reserved,
+	 * 14 bit: single address or multi addresses,
+	 * 15 bit: sva prefetch en.
+	 */
+	uint16_t dw0;
+	/*
+	 * 0 ~5 bits: reserved,
+	 * 6 ~ 13 bits: address num,
+	 * 14 ~15 bits: reserved.
+	 */
+	uint16_t dw1;
+	uint64_t init_val;
+	uint32_t rsv6[12];
+	/* dst addr for single address task. */
+	uint64_t dst_addr;
+	uint32_t rsv7[2];
+	/* src addr for single address task, addr array for multi addresses. */
+	uint64_t addr_array;
+	union {
+		uint32_t wb_field;
+		struct {
+			uint32_t done_flag : 3;
+			uint32_t rsv8 : 1;
+			uint32_t ext_err_type : 12;
+			uint32_t err_type : 8;
+			uint32_t wtype : 8;
+		};
+	};
+	uint32_t rsv9[3];
+};
+
+#define HACC_DMA_SQ_HEAD_MASK		RTE_GENMASK64(15, 0)
+#define HACC_DMA_CQE_VALID_B		RTE_BIT64(48)
+
+struct hacc_dma_cqe {
+	uint64_t rsv;
+	uint64_t misc;
+};
 
 struct hacc_dma_dev {
 	struct hacc_dma_sqe *sqe;
@@ -73,6 +144,7 @@ struct hacc_dma_dev {
 	uint16_t cq_head;       /**< CQ index for next scans. */
 	uint16_t cqs_completed; /**< accumulated number of completed CQs. */
 	uint8_t  cqe_vld;       /**< valid bit for CQE, will change for every round. */
+	volatile uint8_t stop_proc; /**< whether stop processing new requests. */
 
 	uint64_t submitted;
 	uint64_t completed;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 4/4] dma/hisi_acc: add doc
  2025-10-16  1:52 ` [PATCH v5 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                     ` (2 preceding siblings ...)
  2025-10-16  1:52   ` [PATCH v5 3/4] dma/hisi_acc: add data " Chengwen Feng
@ 2025-10-16  1:52   ` Chengwen Feng
  3 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-16  1:52 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds document for Hisilicon accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                            |  1 +
 doc/guides/dmadevs/hisi_acc.rst        | 63 ++++++++++++++++++++++++++
 doc/guides/dmadevs/index.rst           |  1 +
 doc/guides/rel_notes/release_25_11.rst |  6 +++
 4 files changed, 71 insertions(+)
 create mode 100644 doc/guides/dmadevs/hisi_acc.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index b5e3479203..ce038822c8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1369,6 +1369,7 @@ F: doc/guides/dmadevs/hisilicon.rst
 HiSilicon Accelerator DMA
 M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/hisi_acc/
+F: doc/guides/dmadevs/hisi_acc.rst
 
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
diff --git a/doc/guides/dmadevs/hisi_acc.rst b/doc/guides/dmadevs/hisi_acc.rst
new file mode 100644
index 0000000000..41fc24877f
--- /dev/null
+++ b/doc/guides/dmadevs/hisi_acc.rst
@@ -0,0 +1,63 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+HISILICON Accelerator DMA Driver
+================================
+
+Kunpeng SoC has an internal accelerator unit which includes zip function, and
+the zip also supports data copy and fill. This driver exposes this capability to
+DPDK application.
+
+
+Supported Kunpeng SoCs
+----------------------
+
+* Kunpeng 920
+
+
+Device Setup
+-------------
+
+In order to use the device in DPDK, user should insmod uacce.ko, hisi_qm.ko and
+hisi_zip.ko(with module parameter uacce_mode=1), then there will be several
+subdirectories whose names start with hisi_zip in /sys/class/uacce/ directory.
+
+Device Probing and Initialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+User should use following method to probe device::
+
+        $ dpdk-app -a uacce:hisi_zip-0,queues=2 ...
+
+hisi_zip-0 is the directory name in the /sys/class/uacce/ directory, queues is
+runtime config parameter which indicates how many dmadevs are created.
+
+If the probe is successful, two dmadevs are created, named "hisi_zip-0-dma0"
+and "hisi_zip-0-dma1".
+
+.. note::
+        In the /sys/class/uacce/hisi_zip-x/ directory, user could query api and
+        algorithms, this driver can only match the device whose api is
+        hisi_qm_v5 and algorithms contain udma.
+
+Device Configuration
+~~~~~~~~~~~~~~~~~~~~~
+
+Configuration requirements:
+
+* ``ring_size`` obtain from UACCE API and is a fixed value.
+* Only one ``vchan`` is supported per ``dmadev``.
+* Silent mode is not supported.
+* The transfer direction must be set to ``RTE_DMA_DIR_MEM_TO_MEM``.
+
+
+Device Datapath Capability and Limitation
+-----------------------------------------
+
+Support memory copy and fill operations.
+
+.. note::
+        Currently, the maximum size of the operation data is limited to 16MB-1B
+        in the driver. The device actually supports operations in a larger data
+        size, but the driver requires complex operations in the datapth. If you
+        have such requirement, please contact the maintainers.
diff --git a/doc/guides/dmadevs/index.rst b/doc/guides/dmadevs/index.rst
index 15ddaf5192..86dcdbb412 100644
--- a/doc/guides/dmadevs/index.rst
+++ b/doc/guides/dmadevs/index.rst
@@ -14,6 +14,7 @@ an application through DMA API.
    cnxk
    dpaa
    dpaa2
+   hisi_acc
    hisilicon
    idxd
    ioat
diff --git a/doc/guides/rel_notes/release_25_11.rst b/doc/guides/rel_notes/release_25_11.rst
index 3c32b26e3b..3c93c19c0d 100644
--- a/doc/guides/rel_notes/release_25_11.rst
+++ b/doc/guides/rel_notes/release_25_11.rst
@@ -71,6 +71,12 @@ New Features
   Added Ethernet link speed for 800 Gb/s as it is well standardized in IEEE,
   and some devices already support this speed.
 
+* **Add Hisilicon Accelerator DMA Driver.**
+
+  Kunpeng SoC has an internal accelerator unit which includes zip function,
+  and the zip also support data copy and fill. This driver exposes this
+  capability to DPDK application.
+
 * **Updated NXP DPAA2 ethernet driver.**
 
   * Enabled software taildrop for ordered queues.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v6 0/4] add Hisilicon accelerator DMA driver
  2025-08-27  9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                   ` (7 preceding siblings ...)
  2025-10-16  1:52 ` [PATCH v5 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
@ 2025-10-16  2:53 ` Chengwen Feng
  2025-10-16  2:53   ` [PATCH v6 1/4] dma/hisi_acc: add probe and remove Chengwen Feng
                     ` (4 more replies)
  8 siblings, 5 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-16  2:53 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This patchset adds Hisilicon accelerator DMA driver which based on
UACCE bus.

Chengwen Feng (4):
  dma/hisi_acc: add probe and remove
  dma/hisi_acc: add control path ops
  dma/hisi_acc: add data path ops
  dma/hisi_acc: add doc

---
v6: update rst file to reflect this driver correspond accelerator
    subsystem.
v5: rename driver name to hisi_acc.
v4: start/stop support detect sq/cq-status.
    add wait requests complete in stop.
v3: fix remove wrong dmadev by add prefix match.
    use proper errcode when get avail queue fail and dmadev alloc fail.
v2: fix magic number.
    support clean write back field when submit request.

 MAINTAINERS                            |   5 +
 doc/guides/dmadevs/hisi_acc.rst        |  62 ++
 doc/guides/dmadevs/index.rst           |   1 +
 doc/guides/rel_notes/release_25_11.rst |   6 +
 drivers/dma/hisi_acc/hisi_acc_dmadev.c | 774 +++++++++++++++++++++++++
 drivers/dma/hisi_acc/hisi_acc_dmadev.h | 169 ++++++
 drivers/dma/hisi_acc/meson.build       |  21 +
 drivers/dma/meson.build                |   1 +
 8 files changed, 1039 insertions(+)
 create mode 100644 doc/guides/dmadevs/hisi_acc.rst
 create mode 100644 drivers/dma/hisi_acc/hisi_acc_dmadev.c
 create mode 100644 drivers/dma/hisi_acc/hisi_acc_dmadev.h
 create mode 100644 drivers/dma/hisi_acc/meson.build

-- 
2.17.1


^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH v6 1/4] dma/hisi_acc: add probe and remove
  2025-10-16  2:53 ` [PATCH v6 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
@ 2025-10-16  2:53   ` Chengwen Feng
  2025-10-16  2:53   ` [PATCH v6 2/4] dma/hisi_acc: add control path ops Chengwen Feng
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-16  2:53 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This patch adds probe and remove operation for Hisilicon accelerator DMA
driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                            |   4 +
 drivers/dma/hisi_acc/hisi_acc_dmadev.c | 289 +++++++++++++++++++++++++
 drivers/dma/hisi_acc/hisi_acc_dmadev.h |  55 +++++
 drivers/dma/hisi_acc/meson.build       |  21 ++
 drivers/dma/meson.build                |   1 +
 5 files changed, 370 insertions(+)
 create mode 100644 drivers/dma/hisi_acc/hisi_acc_dmadev.c
 create mode 100644 drivers/dma/hisi_acc/hisi_acc_dmadev.h
 create mode 100644 drivers/dma/hisi_acc/meson.build

diff --git a/MAINTAINERS b/MAINTAINERS
index 1a2729be66..b5e3479203 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1366,6 +1366,10 @@ M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/hisilicon/
 F: doc/guides/dmadevs/hisilicon.rst
 
+HiSilicon Accelerator DMA
+M: Chengwen Feng <fengchengwen@huawei.com>
+F: drivers/dma/hisi_acc/
+
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
 T: git://dpdk.org/next/dpdk-next-net-mrvl
diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.c b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
new file mode 100644
index 0000000000..db6b8e8ba8
--- /dev/null
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <rte_byteorder.h>
+#include <rte_eal.h>
+#include <rte_io.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+
+#include <rte_dmadev_pmd.h>
+
+#include "hisi_acc_dmadev.h"
+
+RTE_LOG_REGISTER_DEFAULT(hacc_dma_logtype, INFO);
+#define RTE_LOGTYPE_HACC_DMA hacc_dma_logtype
+#define HACC_DMA_LOG(level, ...) \
+	RTE_LOG_LINE_PREFIX(level, HACC_DMA, "%s(): ", __func__, __VA_ARGS__)
+#define HACC_DMA_DEV_LOG(hw, level, ...) \
+	RTE_LOG_LINE_PREFIX(level, HACC_DMA, "%s %s(): ", \
+		(hw)->data->dev_name RTE_LOG_COMMA __func__, __VA_ARGS__)
+#define HACC_DMA_DEBUG(hw, ...) \
+	HACC_DMA_DEV_LOG(hw, DEBUG, __VA_ARGS__)
+#define HACC_DMA_INFO(hw, ...) \
+	HACC_DMA_DEV_LOG(hw, INFO, __VA_ARGS__)
+#define HACC_DMA_WARN(hw, ...) \
+	HACC_DMA_DEV_LOG(hw, WARNING, __VA_ARGS__)
+#define HACC_DMA_ERR(hw, ...) \
+	HACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
+
+static void
+hacc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
+		      uint16_t queue_id, char *dev_name, size_t size)
+{
+	memset(dev_name, 0, size);
+	(void)snprintf(dev_name, size, "%s-dma%u", uacce_dev->device.name, queue_id);
+}
+
+static void
+hacc_dma_gen_dev_prefix(const struct rte_uacce_device *uacce_dev, char *dev_name, size_t size)
+{
+	memset(dev_name, 0, size);
+	(void)snprintf(dev_name, size, "%s-dma", uacce_dev->device.name);
+}
+
+static int
+hacc_dma_get_qp_info(struct hacc_dma_dev *hw)
+{
+#define CMD_QM_GET_QP_CTX	_IOWR('H', 10, struct hacc_dma_qp_contex)
+#define CMD_QM_GET_QP_INFO	_IOWR('H', 11, struct hacc_dma_qp_info)
+#define QP_ALG_TYPE		2
+	struct hacc_dma_qp_contex {
+		uint16_t id;
+		uint16_t qc_type;
+	} qp_ctx;
+	struct hacc_dma_qp_info {
+		uint32_t sqe_size;
+		uint16_t sq_depth;
+		uint16_t cq_depth;
+		uint64_t reserved;
+	} qp_info;
+	int ret;
+
+	memset(&qp_ctx, 0, sizeof(qp_ctx));
+	qp_ctx.qc_type = QP_ALG_TYPE;
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_CTX, &qp_ctx);
+	if (ret != 0) {
+		HACC_DMA_ERR(hw, "get qm qp context fail!");
+		return -EINVAL;
+	}
+	hw->sqn = qp_ctx.id;
+
+	memset(&qp_info, 0, sizeof(qp_info));
+	ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_INFO, &qp_info);
+	if (ret != 0) {
+		HACC_DMA_ERR(hw, "get qm qp info fail!");
+		return -EINVAL;
+	}
+	if ((qp_info.sq_depth & (qp_info.sq_depth - 1)) != 0) {
+		HACC_DMA_ERR(hw, "sq depth is not 2's power!");
+		return -EINVAL;
+	}
+	hw->sqe_size = qp_info.sqe_size;
+	hw->sq_depth = qp_info.sq_depth;
+	hw->cq_depth = qp_info.cq_depth;
+	hw->sq_depth_mask = hw->sq_depth - 1;
+
+	return 0;
+}
+
+static int
+hacc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
+{
+	char name[RTE_DEV_NAME_MAX_LEN];
+	struct rte_dma_dev *dev;
+	struct hacc_dma_dev *hw;
+	int ret;
+
+	hacc_dma_gen_dev_name(uacce_dev, queue_id, name, sizeof(name));
+	dev = rte_dma_pmd_allocate(name, uacce_dev->device.numa_node,
+				   sizeof(struct hacc_dma_dev));
+	if (dev == NULL) {
+		HACC_DMA_LOG(ERR, "%s allocate dmadev fail!", name);
+		return -ENOMEM;
+	}
+
+	dev->device = &uacce_dev->device;
+	dev->fp_obj->dev_private = dev->data->dev_private;
+
+	hw = dev->data->dev_private;
+	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
+
+	ret = rte_uacce_queue_alloc(uacce_dev, &hw->qctx);
+	if (ret != 0) {
+		HACC_DMA_ERR(hw, "alloc queue fail!");
+		goto release_dma_pmd;
+	}
+
+	ret = hacc_dma_get_qp_info(hw);
+	if (ret != 0)
+		goto free_uacce_queue;
+
+	hw->io_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	if (hw->io_base == NULL) {
+		HACC_DMA_ERR(hw, "mmap MMIO region fail!");
+		ret = -EINVAL;
+		goto free_uacce_queue;
+	}
+	hw->doorbell_reg = (void *)((uintptr_t)hw->io_base + HACC_DMA_DOORBELL_OFFSET);
+
+	hw->dus_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	if (hw->dus_base == NULL) {
+		HACC_DMA_ERR(hw, "mmap DUS region fail!");
+		ret = -EINVAL;
+		goto unmap_mmio;
+	}
+	hw->sqe = hw->dus_base;
+	hw->cqe = (void *)((uintptr_t)hw->dus_base + hw->sqe_size * hw->sq_depth);
+	hw->sq_status = (uint32_t *)((uintptr_t)hw->dus_base +
+			uacce_dev->qfrt_sz[RTE_UACCE_QFRT_DUS] - sizeof(uint32_t));
+	hw->cq_status = hw->sq_status - 1;
+
+	hw->status = rte_zmalloc_socket(NULL, sizeof(uint16_t) * hw->sq_depth,
+					RTE_CACHE_LINE_SIZE, uacce_dev->numa_node);
+	if (hw->status == NULL) {
+		HACC_DMA_ERR(hw, "malloc status region fail!");
+		ret = -ENOMEM;
+		goto unmap_dus;
+	}
+
+	dev->state = RTE_DMA_DEV_READY;
+	HACC_DMA_DEBUG(hw, "create dmadev %s success!", name);
+
+	return 0;
+
+unmap_dus:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+unmap_mmio:
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+free_uacce_queue:
+	rte_uacce_queue_free(&hw->qctx);
+release_dma_pmd:
+	rte_dma_pmd_release(name);
+	return ret;
+}
+
+static int
+hacc_dma_parse_queues(const char *key, const char *value, void *extra_args)
+{
+	struct hacc_dma_config *config = extra_args;
+	uint64_t val;
+	char *end;
+
+	RTE_SET_USED(key);
+
+	errno = 0;
+	val = strtoull(value, &end, 0);
+	if (errno == ERANGE || value == end || *end != '\0' || val == 0) {
+		HACC_DMA_LOG(ERR, "%s invalid queues! set to default one queue!",
+			    config->dev->name);
+		config->queues = HACC_DMA_DEFAULT_QUEUES;
+	} else if (val > config->avail_queues) {
+		HACC_DMA_LOG(WARNING, "%s exceed available queues! set to available queues %u",
+			     config->dev->name, config->avail_queues);
+		config->queues = config->avail_queues;
+	} else {
+		config->queues = val;
+	}
+
+	return 0;
+}
+
+static int
+hacc_dma_parse_devargs(struct rte_uacce_device *uacce_dev, struct hacc_dma_config *config)
+{
+	struct rte_kvargs *kvlist;
+	int avail_queues;
+
+	avail_queues = rte_uacce_avail_queues(uacce_dev);
+	if (avail_queues <= 0) {
+		HACC_DMA_LOG(ERR, "%s don't have available queues!", uacce_dev->name);
+		return -EINVAL;
+	}
+	config->dev = uacce_dev;
+	config->avail_queues = avail_queues <= UINT16_MAX ? avail_queues : UINT16_MAX;
+
+	if (uacce_dev->device.devargs == NULL)
+		return 0;
+
+	kvlist = rte_kvargs_parse(uacce_dev->device.devargs->args, NULL);
+	if (kvlist == NULL)
+		return 0;
+
+	(void)rte_kvargs_process(kvlist, HACC_DMA_DEVARG_QUEUES, &hacc_dma_parse_queues, config);
+
+	rte_kvargs_free(kvlist);
+
+	return 0;
+}
+
+static int
+hacc_dma_probe(struct rte_uacce_driver *dr, struct rte_uacce_device *uacce_dev)
+{
+	struct hacc_dma_config config = { .queues = HACC_DMA_DEFAULT_QUEUES };
+	int ret = 0;
+	uint32_t i;
+
+	RTE_SET_USED(dr);
+
+	ret = hacc_dma_parse_devargs(uacce_dev, &config);
+	if (ret != 0)
+		return ret;
+
+	for (i = 0; i < config.queues; i++) {
+		ret = hacc_dma_create(uacce_dev, i);
+		if (ret != 0) {
+			HACC_DMA_LOG(ERR, "%s create dmadev No.%u failed!", uacce_dev->name, i);
+			break;
+		}
+	}
+
+	if (ret != 0 && i > 0) {
+		HACC_DMA_LOG(WARNING, "%s probed %u dmadev, can't probe more!", uacce_dev->name, i);
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int
+hacc_dma_remove(struct rte_uacce_device *uacce_dev)
+{
+	char name[RTE_DEV_NAME_MAX_LEN];
+	struct rte_dma_info info;
+	int i = 0;
+	int ret;
+
+	hacc_dma_gen_dev_prefix(uacce_dev, name, sizeof(name));
+	RTE_DMA_FOREACH_DEV(i) {
+		ret = rte_dma_info_get(i, &info);
+		if (ret != 0)
+			continue;
+		if (strncmp(info.dev_name, name, strlen(name)) == 0)
+			rte_dma_pmd_release(info.dev_name);
+	}
+
+	return 0;
+}
+
+static const struct rte_uacce_id hacc_dma_id_table[] = {
+	{ "hisi_qm_v5", "udma" },
+	{ .dev_api = NULL, },
+};
+
+static struct rte_uacce_driver hacc_dma_pmd_drv = {
+	.id_table = hacc_dma_id_table,
+	.probe    = hacc_dma_probe,
+	.remove   = hacc_dma_remove,
+};
+
+RTE_PMD_REGISTER_UACCE(dma_hisi_acc, hacc_dma_pmd_drv);
+RTE_PMD_REGISTER_PARAM_STRING(dma_hisi_acc,
+			HACC_DMA_DEVARG_QUEUES "=<uint16> ");
diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.h b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
new file mode 100644
index 0000000000..3a790bb445
--- /dev/null
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#ifndef HISI_ACC_DMADEV_H
+#define HISI_ACC_DMADEV_H
+
+#include <bus_uacce_driver.h>
+#include <rte_bitops.h>
+#include <rte_common.h>
+#include <rte_dmadev_pmd.h>
+
+#define HACC_DMA_DEVARG_QUEUES		"queues"
+#define HACC_DMA_DEFAULT_QUEUES		1
+
+struct hacc_dma_config {
+	uint16_t queues;
+
+	/* The following fields are config contexts. */
+	struct rte_uacce_device *dev;
+	uint16_t avail_queues;
+};
+
+#define HACC_DMA_DOORBELL_OFFSET		0x1000u
+
+struct hacc_dma_sqe {};
+struct hacc_dma_cqe {};
+
+struct hacc_dma_dev {
+	struct hacc_dma_sqe *sqe;
+	struct hacc_dma_cqe *cqe;
+	uint16_t *status;             /* the completion status array of SQEs. */
+
+	volatile void *doorbell_reg;  /**< register address for doorbell. */
+	volatile uint32_t *sq_status; /**< SQ status pointer. */
+	volatile uint32_t *cq_status; /**< CQ status pointer. */
+
+	uint16_t sqn;           /**< SQ global number, inited when created. */
+	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
+
+	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+
+	/**
+	 * The following fields are not accessed in the I/O path, so they are
+	 * placed at the end.
+	 */
+	struct rte_dma_dev_data *data;
+	struct rte_uacce_qcontex qctx;
+	void *io_base;
+	void *dus_base;
+	uint32_t sqe_size;
+	uint16_t sq_depth;
+};
+
+#endif /* HISI_ACC_DMADEV_H */
diff --git a/drivers/dma/hisi_acc/meson.build b/drivers/dma/hisi_acc/meson.build
new file mode 100644
index 0000000000..f5a75777fa
--- /dev/null
+++ b/drivers/dma/hisi_acc/meson.build
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+if not is_linux
+    build = false
+    reason = 'only supported on Linux'
+    subdir_done()
+endif
+
+if (arch_subdir != 'x86' and arch_subdir != 'arm') or (not dpdk_conf.get('RTE_ARCH_64'))
+    build = false
+    reason = 'only supported on x86_64 and aarch64'
+    subdir_done()
+endif
+
+deps += ['bus_uacce', 'dmadev']
+sources = files(
+        'hisi_acc_dmadev.c',
+)
+
+require_iova_in_mbuf = false
diff --git a/drivers/dma/meson.build b/drivers/dma/meson.build
index 358132759a..194e08e0b1 100644
--- a/drivers/dma/meson.build
+++ b/drivers/dma/meson.build
@@ -5,6 +5,7 @@ drivers = [
         'cnxk',
         'dpaa',
         'dpaa2',
+        'hisi_acc',
         'hisilicon',
         'idxd',
         'ioat',
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v6 2/4] dma/hisi_acc: add control path ops
  2025-10-16  2:53 ` [PATCH v6 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-10-16  2:53   ` [PATCH v6 1/4] dma/hisi_acc: add probe and remove Chengwen Feng
@ 2025-10-16  2:53   ` Chengwen Feng
  2025-10-16  2:53   ` [PATCH v6 3/4] dma/hisi_acc: add data " Chengwen Feng
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-16  2:53 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds control path ops for Hisilicon accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/hisi_acc/hisi_acc_dmadev.c | 185 +++++++++++++++++++++++++
 drivers/dma/hisi_acc/hisi_acc_dmadev.h |  42 ++++++
 2 files changed, 227 insertions(+)

diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.c b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
index db6b8e8ba8..00ccbf8426 100644
--- a/drivers/dma/hisi_acc/hisi_acc_dmadev.c
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
@@ -34,6 +34,190 @@ RTE_LOG_REGISTER_DEFAULT(hacc_dma_logtype, INFO);
 #define HACC_DMA_ERR(hw, ...) \
 	HACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
 
+static int
+hacc_dma_info_get(const struct rte_dma_dev *dev,
+		  struct rte_dma_info *dev_info,
+		  uint32_t info_sz)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(info_sz);
+
+	dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
+			     RTE_DMA_CAPA_SVA |
+			     RTE_DMA_CAPA_OPS_COPY |
+			     RTE_DMA_CAPA_OPS_FILL;
+	dev_info->max_vchans = 1;
+	dev_info->max_desc = hw->sq_depth;
+	dev_info->min_desc = hw->sq_depth;
+
+	return 0;
+}
+
+static int
+hacc_dma_configure(struct rte_dma_dev *dev,
+		   const struct rte_dma_conf *conf,
+		   uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+hacc_dma_start(struct rte_dma_dev *dev)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+	int ret;
+
+	if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
+		HACC_DMA_ERR(hw, "detect dev is abnormal!");
+		return -EIO;
+	}
+
+	if (hw->started) {
+		hw->ridx = 0;
+		hw->cridx = 0;
+		return 0;
+	}
+
+	memset(hw->sqe, 0, hw->sqe_size * hw->sq_depth);
+	memset(hw->cqe, 0, sizeof(struct hacc_dma_cqe) * hw->cq_depth);
+	memset(hw->status, 0, sizeof(uint16_t) * hw->sq_depth);
+	hw->ridx = 0;
+	hw->cridx = 0;
+	hw->sq_head = 0;
+	hw->sq_tail = 0;
+	hw->cq_sq_head = 0;
+	hw->avail_sqes = hw->sq_depth - HACC_DMA_SQ_GAP_NUM - 1;
+	hw->cq_head = 0;
+	hw->cqs_completed = 0;
+	hw->cqe_vld = 1;
+	hw->submitted = 0;
+	hw->completed = 0;
+	hw->errors = 0;
+	hw->invalid_lens = 0;
+	hw->qfulls = 0;
+
+	ret = rte_uacce_queue_start(&hw->qctx);
+	if (ret == 0)
+		hw->started = true;
+
+	return ret;
+}
+
+static int
+hacc_dma_stop(struct rte_dma_dev *dev)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+
+	if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
+		/* This indicates that the dev is abnormal. The correct error handling
+		 * is to close the dev (so that kernel module will perform error handling)
+		 * and apply for a new dev.
+		 * If an error code is returned here, the dev cannot be closed. Therefore,
+		 * zero is returned and an error trace is added.
+		 */
+		HACC_DMA_ERR(hw, "detect dev is abnormal!");
+		return 0;
+	}
+
+	return 0;
+}
+
+static int
+hacc_dma_close(struct rte_dma_dev *dev)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+	/* The dmadev already stopped */
+	rte_free(hw->status);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+	rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+	rte_uacce_queue_free(&hw->qctx);
+	return 0;
+}
+
+static int
+hacc_dma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
+		     const struct rte_dma_vchan_conf *conf,
+		     uint32_t conf_sz)
+{
+	RTE_SET_USED(dev);
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(conf);
+	RTE_SET_USED(conf_sz);
+	return 0;
+}
+
+static int
+hacc_dma_stats_get(const struct rte_dma_dev *dev, uint16_t vchan,
+		   struct rte_dma_stats *stats,
+		   uint32_t stats_sz)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	RTE_SET_USED(stats_sz);
+	stats->submitted = hw->submitted;
+	stats->completed = hw->completed;
+	stats->errors    = hw->errors;
+
+	return 0;
+}
+
+static int
+hacc_dma_stats_reset(struct rte_dma_dev *dev, uint16_t vchan)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+
+	RTE_SET_USED(vchan);
+	hw->submitted    = 0;
+	hw->completed    = 0;
+	hw->errors       = 0;
+	hw->invalid_lens = 0;
+	hw->io_errors    = 0;
+	hw->qfulls       = 0;
+
+	return 0;
+}
+
+static int
+hacc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
+{
+	struct hacc_dma_dev *hw = dev->data->dev_private;
+
+	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s\n"
+		"  sqe_size: %u sq_depth: %u sq_depth_mask: %u cq_depth: %u\n",
+		hw->sqn, (*hw->sq_status != 0) ? "ERR" : "OK",
+		(*hw->cq_status != 0) ? "ERR" : "OK",
+		hw->sqe_size, hw->sq_depth, hw->sq_depth_mask, hw->cq_depth);
+	fprintf(f, "  ridx: %u cridx: %u\n"
+		"  sq_head: %u sq_tail: %u cq_sq_head: %u avail_sqes: %u\n"
+		"  cq_head: %u cqs_completed: %u cqe_vld: %u\n",
+		hw->ridx, hw->cridx,
+		hw->sq_head, hw->sq_tail, hw->cq_sq_head, hw->avail_sqes,
+		hw->cq_head, hw->cqs_completed, hw->cqe_vld);
+	fprintf(f, "  submitted: %" PRIu64 " completed: %" PRIu64 " errors: %" PRIu64
+		" invalid_lens: %" PRIu64 " io_errors: %" PRIu64 " qfulls: %" PRIu64 "\n",
+		hw->submitted, hw->completed, hw->errors, hw->invalid_lens,
+		hw->io_errors, hw->qfulls);
+
+	return 0;
+}
+
+static const struct rte_dma_dev_ops hacc_dmadev_ops = {
+	.dev_info_get     = hacc_dma_info_get,
+	.dev_configure    = hacc_dma_configure,
+	.dev_start        = hacc_dma_start,
+	.dev_stop         = hacc_dma_stop,
+	.dev_close        = hacc_dma_close,
+	.vchan_setup      = hacc_dma_vchan_setup,
+	.stats_get        = hacc_dma_stats_get,
+	.stats_reset      = hacc_dma_stats_reset,
+	.dev_dump         = hacc_dma_dump,
+};
+
 static void
 hacc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
 		      uint16_t queue_id, char *dev_name, size_t size)
@@ -111,6 +295,7 @@ hacc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	}
 
 	dev->device = &uacce_dev->device;
+	dev->dev_ops = &hacc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
 
 	hw = dev->data->dev_private;
diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.h b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
index 3a790bb445..bc94b9d5db 100644
--- a/drivers/dma/hisi_acc/hisi_acc_dmadev.h
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
@@ -13,6 +13,9 @@
 #define HACC_DMA_DEVARG_QUEUES		"queues"
 #define HACC_DMA_DEFAULT_QUEUES		1
 
+#define HACC_DMA_CQ_DOORBELL_PACE	64
+#define HACC_DMA_SQ_GAP_NUM		HACC_DMA_CQ_DOORBELL_PACE
+
 struct hacc_dma_config {
 	uint16_t queues;
 
@@ -38,7 +41,45 @@ struct hacc_dma_dev {
 	uint16_t sqn;           /**< SQ global number, inited when created. */
 	uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
 
+	uint16_t ridx;  /**< ring index which will assign to the next request. */
+	uint16_t cridx; /**< ring index which returned by completed APIs. */
+
+	/**
+	 * SQE array management fields:
+	 *
+	 *  -----------------------------------------------------
+	 *  | SQE0 | SQE1 | SQE2 |   ...  | SQEx | ... | SQEn-1 |
+	 *  -----------------------------------------------------
+	 *     ^             ^               ^
+	 *     |             |               |
+	 *   sq_head     cq_sq_head       sq_tail
+	 *
+	 *  sq_head: next index to the oldest completed request, this filed was
+	 *           updated by completed* APIs.
+	 *  sq_tail: index of the next new request, this field was updated by
+	 *           copy or fill API.
+	 *  cq_sq_head: next index of index that has been completed by hardware,
+	 *              this filed was updated by completed* APIs.
+	 *
+	 *  [sq_head, cq_sq_head): the SQEs that hardware already completed.
+	 *  [cq_sq_head, sq_tail): the SQEs that hardware processing.
+	 */
+	uint16_t sq_head;
+	uint16_t sq_tail;
+	uint16_t cq_sq_head;
+	uint16_t avail_sqes;
+
 	uint16_t cq_depth;      /**< CQ depth, inited when created. */
+	uint16_t cq_head;       /**< CQ index for next scans. */
+	uint16_t cqs_completed; /**< accumulated number of completed CQs. */
+	uint8_t  cqe_vld;       /**< valid bit for CQE, will change for every round. */
+
+	uint64_t submitted;
+	uint64_t completed;
+	uint64_t errors;
+	uint64_t invalid_lens;
+	uint64_t io_errors;
+	uint64_t qfulls;
 
 	/**
 	 * The following fields are not accessed in the I/O path, so they are
@@ -50,6 +91,7 @@ struct hacc_dma_dev {
 	void *dus_base;
 	uint32_t sqe_size;
 	uint16_t sq_depth;
+	bool started;
 };
 
 #endif /* HISI_ACC_DMADEV_H */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v6 3/4] dma/hisi_acc: add data path ops
  2025-10-16  2:53 ` [PATCH v6 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
  2025-10-16  2:53   ` [PATCH v6 1/4] dma/hisi_acc: add probe and remove Chengwen Feng
  2025-10-16  2:53   ` [PATCH v6 2/4] dma/hisi_acc: add control path ops Chengwen Feng
@ 2025-10-16  2:53   ` Chengwen Feng
  2025-10-16  2:53   ` [PATCH v6 4/4] dma/hisi_acc: add doc Chengwen Feng
  2025-11-05 14:45   ` [PATCH v6 0/4] add Hisilicon accelerator DMA driver Thomas Monjalon
  4 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-16  2:53 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds data path ops for Hisilicon accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 drivers/dma/hisi_acc/hisi_acc_dmadev.c | 322 ++++++++++++++++++++++++-
 drivers/dma/hisi_acc/hisi_acc_dmadev.h |  76 +++++-
 2 files changed, 385 insertions(+), 13 deletions(-)

diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.c b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
index 00ccbf8426..b9767d4a08 100644
--- a/drivers/dma/hisi_acc/hisi_acc_dmadev.c
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.c
@@ -8,6 +8,7 @@
 #include <sys/ioctl.h>
 
 #include <rte_byteorder.h>
+#include <rte_cycles.h>
 #include <rte_eal.h>
 #include <rte_io.h>
 #include <rte_kvargs.h>
@@ -79,6 +80,7 @@ hacc_dma_start(struct rte_dma_dev *dev)
 	if (hw->started) {
 		hw->ridx = 0;
 		hw->cridx = 0;
+		hw->stop_proc = 0;
 		return 0;
 	}
 
@@ -94,6 +96,7 @@ hacc_dma_start(struct rte_dma_dev *dev)
 	hw->cq_head = 0;
 	hw->cqs_completed = 0;
 	hw->cqe_vld = 1;
+	hw->stop_proc = 0;
 	hw->submitted = 0;
 	hw->completed = 0;
 	hw->errors = 0;
@@ -110,17 +113,33 @@ hacc_dma_start(struct rte_dma_dev *dev)
 static int
 hacc_dma_stop(struct rte_dma_dev *dev)
 {
+#define MAX_WAIT_MSEC	1000
+#define MAX_CPL_NUM	64
 	struct hacc_dma_dev *hw = dev->data->dev_private;
-
-	if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
-		/* This indicates that the dev is abnormal. The correct error handling
-		 * is to close the dev (so that kernel module will perform error handling)
-		 * and apply for a new dev.
-		 * If an error code is returned here, the dev cannot be closed. Therefore,
-		 * zero is returned and an error trace is added.
-		 */
-		HACC_DMA_ERR(hw, "detect dev is abnormal!");
-		return 0;
+	uint32_t wait_msec = 0;
+
+	/* Flag stop processing new requests. */
+	hw->stop_proc = 1;
+	/* Currently, there is no method to notify the hardware to stop.
+	 * Therefore, the timeout mechanism is used to wait for the dataplane
+	 * to stop.
+	 */
+	while (hw->sq_head != hw->sq_tail && wait_msec++ < MAX_WAIT_MSEC) {
+		if ((*hw->sq_status != 0) || (*hw->cq_status != 0)) {
+			/* This indicates that the dev is abnormal. The correct error handling
+			 * is to close the dev (so that kernel module will perform error handling)
+			 * and apply for a new dev.
+			 * If an error code is returned here, the dev cannot be closed. Therefore,
+			 * zero is returned and an error trace is added.
+			 */
+			HACC_DMA_ERR(hw, "detect dev is abnormal!");
+			return 0;
+		}
+		rte_delay_ms(1);
+	}
+	if (hw->sq_head != hw->sq_tail) {
+		HACC_DMA_ERR(hw, "dev is still active!");
+		return -EBUSY;
 	}
 
 	return 0;
@@ -187,10 +206,11 @@ hacc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
 {
 	struct hacc_dma_dev *hw = dev->data->dev_private;
 
-	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s\n"
+	fprintf(f, "  sqn: %u sq_status: %s cq_status: %s stop_proc: %u\n"
 		"  sqe_size: %u sq_depth: %u sq_depth_mask: %u cq_depth: %u\n",
 		hw->sqn, (*hw->sq_status != 0) ? "ERR" : "OK",
 		(*hw->cq_status != 0) ? "ERR" : "OK",
+		hw->stop_proc,
 		hw->sqe_size, hw->sq_depth, hw->sq_depth_mask, hw->cq_depth);
 	fprintf(f, "  ridx: %u cridx: %u\n"
 		"  sq_head: %u sq_tail: %u cq_sq_head: %u avail_sqes: %u\n"
@@ -206,6 +226,280 @@ hacc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
 	return 0;
 }
 
+static inline void
+hacc_dma_sq_doorbell(struct hacc_dma_dev *hw)
+{
+	uint64_t doorbell = (uint64_t)(hw->sqn & HACC_DMA_DOORBELL_SQN_MASK) |
+			    (HACC_DMA_DOORBELL_SQ_CMD << HACC_DMA_DOORBELL_CMD_SHIFT) |
+			    (((uint64_t)hw->sq_tail) << HACC_DMA_DOORBELL_IDX_SHIFT);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static int
+hacc_dma_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,
+	      uint32_t length, uint64_t flags)
+{
+	struct hacc_dma_dev *hw = dev_private;
+	struct hacc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(hw->stop_proc > 0))
+		return -EPERM;
+
+	if (unlikely(length > HACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = HACC_DMA_SQE_TYPE;
+	sqe->task_type     = HACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = HACC_DMA_DATA_MEMCPY;
+	sqe->init_val      = 0;
+	sqe->addr_array    = src;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = HACC_DMA_SVA_PREFETCH_EN;
+	sqe->wb_field      = 0;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		hacc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+hacc_dma_fill(void *dev_private, uint16_t vchan, uint64_t pattern,
+	      rte_iova_t dst, uint32_t length, uint64_t flags)
+{
+	struct hacc_dma_dev *hw = dev_private;
+	struct hacc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(hw->stop_proc > 0))
+		return -EPERM;
+
+	if (unlikely(length > HACC_DMA_MAX_OP_SIZE)) {
+		hw->invalid_lens++;
+		return -EINVAL;
+	}
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	if (hw->avail_sqes == 0) {
+		hw->qfulls++;
+		return -ENOSPC;
+	}
+
+	sqe->bd_type       = HACC_DMA_SQE_TYPE;
+	sqe->task_type     = HACC_DMA_TASK_TYPE;
+	sqe->task_type_ext = HACC_DMA_DATA_MEMSET;
+	sqe->init_val      = pattern;
+	sqe->addr_array    = 0;
+	sqe->dst_addr      = dst;
+	sqe->data_size     = length;
+	sqe->dw0           = HACC_DMA_SVA_PREFETCH_EN;
+	sqe->wb_field      = 0;
+
+	hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+	hw->avail_sqes--;
+	hw->submitted++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		hacc_dma_sq_doorbell(hw);
+
+	return hw->ridx++;
+}
+
+static int
+hacc_dma_submit(void *dev_private, uint16_t vchan)
+{
+	struct hacc_dma_dev *hw = dev_private;
+
+	RTE_SET_USED(vchan);
+
+	if (unlikely(*hw->sq_status != 0)) {
+		hw->io_errors++;
+		return -EIO;
+	}
+
+	hacc_dma_sq_doorbell(hw);
+
+	return 0;
+}
+
+static inline void
+hacc_dma_cq_doorbell(struct hacc_dma_dev *hw)
+{
+	uint64_t doorbell = (uint64_t)(hw->sqn & HACC_DMA_DOORBELL_SQN_MASK) |
+			    (HACC_DMA_DOORBELL_CQ_CMD << HACC_DMA_DOORBELL_CMD_SHIFT) |
+			    (((uint64_t)hw->cq_head) << HACC_DMA_DOORBELL_IDX_SHIFT);
+	rte_io_wmb();
+	*(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static inline void
+hacc_dma_scan_cq(struct hacc_dma_dev *hw)
+{
+	volatile struct hacc_dma_cqe *cqe;
+	struct hacc_dma_sqe *sqe;
+	uint16_t csq_head = hw->cq_sq_head;
+	uint16_t cq_head = hw->cq_head;
+	uint16_t count = 0;
+	uint64_t misc;
+
+	if (unlikely(*hw->cq_status != 0)) {
+		hw->io_errors++;
+		return;
+	}
+
+	while (count < hw->cq_depth) {
+		cqe = &hw->cqe[cq_head];
+		misc = cqe->misc;
+		misc = rte_le_to_cpu_64(misc);
+		if (RTE_FIELD_GET64(HACC_DMA_CQE_VALID_B, misc) != hw->cqe_vld)
+			break;
+
+		csq_head = RTE_FIELD_GET64(HACC_DMA_SQ_HEAD_MASK, misc);
+		if (unlikely(csq_head > hw->sq_depth_mask)) {
+			/**
+			 * Defensive programming to prevent overflow of the
+			 * status array indexed by csq_head. Only error logs
+			 * are used for prompting.
+			 */
+			HACC_DMA_ERR(hw, "invalid csq_head: %u!", csq_head);
+			count = 0;
+			break;
+		}
+		sqe = &hw->sqe[csq_head];
+		if (sqe->done_flag != HACC_DMA_TASK_DONE ||
+			sqe->err_type || sqe->ext_err_type || sqe->wtype) {
+			hw->status[csq_head] = RTE_DMA_STATUS_ERROR_UNKNOWN;
+		}
+
+		count++;
+		cq_head++;
+		if (cq_head == hw->cq_depth) {
+			hw->cqe_vld = !hw->cqe_vld;
+			cq_head = 0;
+		}
+	}
+
+	if (count == 0)
+		return;
+
+	hw->cq_head = cq_head;
+	hw->cq_sq_head = (csq_head + 1) & hw->sq_depth_mask;
+	hw->avail_sqes += count;
+	hw->cqs_completed += count;
+	if (hw->cqs_completed >= HACC_DMA_CQ_DOORBELL_PACE) {
+		hacc_dma_cq_doorbell(hw);
+		hw->cqs_completed = 0;
+	}
+}
+
+static inline uint16_t
+hacc_dma_calc_cpls(struct hacc_dma_dev *hw, const uint16_t nb_cpls)
+{
+	uint16_t cpl_num;
+
+	if (hw->cq_sq_head >= hw->sq_head)
+		cpl_num = hw->cq_sq_head - hw->sq_head;
+	else
+		cpl_num = hw->sq_depth_mask + 1 - hw->sq_head + hw->cq_sq_head;
+
+	if (cpl_num > nb_cpls)
+		cpl_num = nb_cpls;
+
+	return cpl_num;
+}
+
+static uint16_t
+hacc_dma_completed(void *dev_private,
+		   uint16_t vchan, const uint16_t nb_cpls,
+		   uint16_t *last_idx, bool *has_error)
+{
+	struct hacc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	hacc_dma_scan_cq(hw);
+
+	cpl_num = hacc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		if (hw->status[sq_head]) {
+			*has_error = true;
+			break;
+		}
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + i - 1;
+	if (i > 0) {
+		hw->cridx += i;
+		hw->sq_head = sq_head;
+		hw->completed += i;
+	}
+
+	return i;
+}
+
+static uint16_t
+hacc_dma_completed_status(void *dev_private,
+			  uint16_t vchan, const uint16_t nb_cpls,
+			  uint16_t *last_idx, enum rte_dma_status_code *status)
+{
+	struct hacc_dma_dev *hw = dev_private;
+	uint16_t sq_head = hw->sq_head;
+	uint16_t cpl_num, i;
+
+	RTE_SET_USED(vchan);
+	hacc_dma_scan_cq(hw);
+
+	cpl_num = hacc_dma_calc_cpls(hw, nb_cpls);
+	for (i = 0; i < cpl_num; i++) {
+		status[i] = hw->status[sq_head];
+		hw->errors += !!status[i];
+		hw->status[sq_head] = 0;
+		sq_head = (sq_head + 1) & hw->sq_depth_mask;
+	}
+	*last_idx = hw->cridx + cpl_num - 1;
+	if (likely(cpl_num > 0)) {
+		hw->cridx += cpl_num;
+		hw->sq_head = sq_head;
+		hw->completed += cpl_num;
+	}
+
+	return cpl_num;
+}
+
+static uint16_t
+hacc_dma_burst_capacity(const void *dev_private, uint16_t vchan)
+{
+	const struct hacc_dma_dev *hw = dev_private;
+	RTE_SET_USED(vchan);
+	return hw->avail_sqes;
+}
+
 static const struct rte_dma_dev_ops hacc_dmadev_ops = {
 	.dev_info_get     = hacc_dma_info_get,
 	.dev_configure    = hacc_dma_configure,
@@ -297,6 +591,12 @@ hacc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
 	dev->device = &uacce_dev->device;
 	dev->dev_ops = &hacc_dmadev_ops;
 	dev->fp_obj->dev_private = dev->data->dev_private;
+	dev->fp_obj->copy = hacc_dma_copy;
+	dev->fp_obj->fill = hacc_dma_fill;
+	dev->fp_obj->submit = hacc_dma_submit;
+	dev->fp_obj->completed = hacc_dma_completed;
+	dev->fp_obj->completed_status = hacc_dma_completed_status;
+	dev->fp_obj->burst_capacity = hacc_dma_burst_capacity;
 
 	hw = dev->data->dev_private;
 	hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
diff --git a/drivers/dma/hisi_acc/hisi_acc_dmadev.h b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
index bc94b9d5db..cb63b30642 100644
--- a/drivers/dma/hisi_acc/hisi_acc_dmadev.h
+++ b/drivers/dma/hisi_acc/hisi_acc_dmadev.h
@@ -25,9 +25,80 @@ struct hacc_dma_config {
 };
 
 #define HACC_DMA_DOORBELL_OFFSET		0x1000u
+#define HACC_DMA_DOORBELL_SQN_MASK	0x3FFu
+#define HACC_DMA_DOORBELL_SQ_CMD		0ull
+#define HACC_DMA_DOORBELL_CQ_CMD		1ull
+#define HACC_DMA_DOORBELL_CMD_SHIFT	12
+#define HACC_DMA_DOORBELL_IDX_SHIFT	32
+
+#define HACC_DMA_TASK_TYPE		0x3
+#define HACC_DMA_SQE_TYPE		0x1
+#define HACC_DMA_SVA_PREFETCH_EN		RTE_BIT32(15)
+#define HACC_DMA_MAX_OP_SIZE		(RTE_BIT32(24) - 1)
+
+enum {
+	HACC_DMA_DATA_MEMCPY = 0,
+	HACC_DMA_DATA_MEMSET = 7,
+};
+
+enum {
+	HACC_DMA_TASK_DONE = 1,
+	HACC_DMA_TASK_ERROR,
+};
 
-struct hacc_dma_sqe {};
-struct hacc_dma_cqe {};
+struct hacc_dma_sqe {
+	uint32_t bd_type : 6;
+	uint32_t resv1 : 2;
+	uint32_t task_type : 6;
+	uint32_t resv2 : 2;
+	uint32_t task_type_ext : 6;
+	uint32_t resv3 : 9;
+	uint32_t bd_invlid : 1;
+	uint32_t rsv4[2];
+	uint32_t low_tag;
+	uint32_t hi_tag;
+	/* The number of bytes to be copied or filled for single address. */
+	uint32_t data_size;
+	uint32_t rsv5;
+	/*
+	 * 0 ~ 13 bits: reserved,
+	 * 14 bit: single address or multi addresses,
+	 * 15 bit: sva prefetch en.
+	 */
+	uint16_t dw0;
+	/*
+	 * 0 ~5 bits: reserved,
+	 * 6 ~ 13 bits: address num,
+	 * 14 ~15 bits: reserved.
+	 */
+	uint16_t dw1;
+	uint64_t init_val;
+	uint32_t rsv6[12];
+	/* dst addr for single address task. */
+	uint64_t dst_addr;
+	uint32_t rsv7[2];
+	/* src addr for single address task, addr array for multi addresses. */
+	uint64_t addr_array;
+	union {
+		uint32_t wb_field;
+		struct {
+			uint32_t done_flag : 3;
+			uint32_t rsv8 : 1;
+			uint32_t ext_err_type : 12;
+			uint32_t err_type : 8;
+			uint32_t wtype : 8;
+		};
+	};
+	uint32_t rsv9[3];
+};
+
+#define HACC_DMA_SQ_HEAD_MASK		RTE_GENMASK64(15, 0)
+#define HACC_DMA_CQE_VALID_B		RTE_BIT64(48)
+
+struct hacc_dma_cqe {
+	uint64_t rsv;
+	uint64_t misc;
+};
 
 struct hacc_dma_dev {
 	struct hacc_dma_sqe *sqe;
@@ -73,6 +144,7 @@ struct hacc_dma_dev {
 	uint16_t cq_head;       /**< CQ index for next scans. */
 	uint16_t cqs_completed; /**< accumulated number of completed CQs. */
 	uint8_t  cqe_vld;       /**< valid bit for CQE, will change for every round. */
+	volatile uint8_t stop_proc; /**< whether stop processing new requests. */
 
 	uint64_t submitted;
 	uint64_t completed;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v6 4/4] dma/hisi_acc: add doc
  2025-10-16  2:53 ` [PATCH v6 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                     ` (2 preceding siblings ...)
  2025-10-16  2:53   ` [PATCH v6 3/4] dma/hisi_acc: add data " Chengwen Feng
@ 2025-10-16  2:53   ` Chengwen Feng
  2025-11-05 14:45   ` [PATCH v6 0/4] add Hisilicon accelerator DMA driver Thomas Monjalon
  4 siblings, 0 replies; 37+ messages in thread
From: Chengwen Feng @ 2025-10-16  2:53 UTC (permalink / raw)
  To: thomas; +Cc: dev, liuyonglong

This commit adds document for Hisilicon accelerator DMA driver.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
 MAINTAINERS                            |  1 +
 doc/guides/dmadevs/hisi_acc.rst        | 62 ++++++++++++++++++++++++++
 doc/guides/dmadevs/index.rst           |  1 +
 doc/guides/rel_notes/release_25_11.rst |  6 +++
 4 files changed, 70 insertions(+)
 create mode 100644 doc/guides/dmadevs/hisi_acc.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index b5e3479203..ce038822c8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1369,6 +1369,7 @@ F: doc/guides/dmadevs/hisilicon.rst
 HiSilicon Accelerator DMA
 M: Chengwen Feng <fengchengwen@huawei.com>
 F: drivers/dma/hisi_acc/
+F: doc/guides/dmadevs/hisi_acc.rst
 
 Marvell CNXK DPI DMA
 M: Vamsi Attunuru <vattunuru@marvell.com>
diff --git a/doc/guides/dmadevs/hisi_acc.rst b/doc/guides/dmadevs/hisi_acc.rst
new file mode 100644
index 0000000000..7c6f2e8122
--- /dev/null
+++ b/doc/guides/dmadevs/hisi_acc.rst
@@ -0,0 +1,62 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+HISILICON Accelerator DMA Driver
+================================
+
+Kunpeng SoC Accelerator subsystem support zip function, the zip also supports
+data copy and fill. This driver exposes this capability to DPDK application.
+
+
+Supported Kunpeng SoCs
+----------------------
+
+* Kunpeng 920
+
+
+Device Setup
+-------------
+
+In order to use the device in DPDK, user should insmod uacce.ko, hisi_qm.ko and
+hisi_zip.ko(with module parameter uacce_mode=1), then there will be several
+subdirectories whose names start with hisi_zip in /sys/class/uacce/ directory.
+
+Device Probing and Initialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+User should use following method to probe device::
+
+        $ dpdk-app -a uacce:hisi_zip-0,queues=2 ...
+
+hisi_zip-0 is the directory name in the /sys/class/uacce/ directory, queues is
+runtime config parameter which indicates how many dmadevs are created.
+
+If the probe is successful, two dmadevs are created, named "hisi_zip-0-dma0"
+and "hisi_zip-0-dma1".
+
+.. note::
+        In the /sys/class/uacce/hisi_zip-x/ directory, user could query api and
+        algorithms, this driver can only match the device whose api is
+        hisi_qm_v5 and algorithms contain udma.
+
+Device Configuration
+~~~~~~~~~~~~~~~~~~~~~
+
+Configuration requirements:
+
+* ``ring_size`` obtain from UACCE API and is a fixed value.
+* Only one ``vchan`` is supported per ``dmadev``.
+* Silent mode is not supported.
+* The transfer direction must be set to ``RTE_DMA_DIR_MEM_TO_MEM``.
+
+
+Device Datapath Capability and Limitation
+-----------------------------------------
+
+Support memory copy and fill operations.
+
+.. note::
+        Currently, the maximum size of the operation data is limited to 16MB-1B
+        in the driver. The device actually supports operations in a larger data
+        size, but the driver requires complex operations in the datapth. If you
+        have such requirement, please contact the maintainers.
diff --git a/doc/guides/dmadevs/index.rst b/doc/guides/dmadevs/index.rst
index 15ddaf5192..86dcdbb412 100644
--- a/doc/guides/dmadevs/index.rst
+++ b/doc/guides/dmadevs/index.rst
@@ -14,6 +14,7 @@ an application through DMA API.
    cnxk
    dpaa
    dpaa2
+   hisi_acc
    hisilicon
    idxd
    ioat
diff --git a/doc/guides/rel_notes/release_25_11.rst b/doc/guides/rel_notes/release_25_11.rst
index 3c32b26e3b..3c93c19c0d 100644
--- a/doc/guides/rel_notes/release_25_11.rst
+++ b/doc/guides/rel_notes/release_25_11.rst
@@ -71,6 +71,12 @@ New Features
   Added Ethernet link speed for 800 Gb/s as it is well standardized in IEEE,
   and some devices already support this speed.
 
+* **Add Hisilicon Accelerator DMA Driver.**
+
+  Kunpeng SoC has an internal accelerator unit which includes zip function,
+  and the zip also support data copy and fill. This driver exposes this
+  capability to DPDK application.
+
 * **Updated NXP DPAA2 ethernet driver.**
 
   * Enabled software taildrop for ordered queues.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH v4 4/4] dma/acc: add doc
  2025-10-15 14:09             ` Thomas Monjalon
@ 2025-10-25  1:07               ` fengchengwen
  0 siblings, 0 replies; 37+ messages in thread
From: fengchengwen @ 2025-10-25  1:07 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev, liuyonglong

On 10/15/2025 10:09 PM, Thomas Monjalon wrote:
> 15/10/2025 14:55, fengchengwen:
>> On 2025/10/15 20:01, Thomas Monjalon wrote:
>>> 15/10/2025 13:10, fengchengwen:
>>>> On 2025/10/15 18:05, Thomas Monjalon wrote:
>>>>> 13/10/2025 11:11, Chengwen Feng:
>>>>>> +HISILICON Accelerator DMA Driver
>>>>>> +================================
>>>>>> +
>>>>>> +Kunpeng SoC has an internal accelerator unit which includes zip function, and
>>>>>> +the zip also supports data copy and fill. This driver exposes this capability to
>>>>>> +DPDK application.
>>>>> How is it different of drivers/dma/hisilicon/ ?
>>>>>
>>>>> "
>>>>> Kunpeng SoC has an internal DMA unit which can be used by application
>>>>> to accelerate data copies.
>>>>> "
>>>>>
>>>>> If it is the same hardware (Kunpeng),
>>>>> I recommend having a single driver, or a single directory.
>>>>> Also we already have a driver named "acc" in baseband.
>>>>
>>>> they belong to different hardware IP of Kunpeng platform, /dma/hisilicon
>>>> is the PCIE subsystem DMA's driver, while this acc is the
>>>> Accelerator(ZIP) subsystem DMA's driver.  they don't exist in the same
>>>> specific Kunpeng Soc.
>>>>
>>>> a single driver will add complex.
>>>>
>>>> as for a single directory, should we do for all vendor?  I see there
>>>> are  dpaa and dpaa2, ioat and idxd both belong to vendor platform.
>>> When it is really different driver, it is OK to keep them separate.
>>>
>>>> if there are no rule to have same name with different subsystem driver,
>>>> I prefer keep it.
>>> OK but why it does not contain HiSilicon or Kunpeng in its name?
>>> "acc" is a really short name, and already used.
>>> Please find something else.
>>
>> how about hacc and kacc ?   Hisilicon / Kunpeng accelerator DMA
> 
> You already have one DMA driver in drivers/dma/hisilicon/
> Can you build around that? like hisilicon_something?
> hisil_acc?

The v6 has sent and named with hisi_acc.
In addition, the original HiSilicon driver has rename to hisi_pciep because the hardware ip is integrated endpoint of pci subsystem.

> 
> 


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v6 0/4] add Hisilicon accelerator DMA driver
  2025-10-16  2:53 ` [PATCH v6 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
                     ` (3 preceding siblings ...)
  2025-10-16  2:53   ` [PATCH v6 4/4] dma/hisi_acc: add doc Chengwen Feng
@ 2025-11-05 14:45   ` Thomas Monjalon
  4 siblings, 0 replies; 37+ messages in thread
From: Thomas Monjalon @ 2025-11-05 14:45 UTC (permalink / raw)
  To: Chengwen Feng; +Cc: dev, liuyonglong

16/10/2025 04:53, Chengwen Feng:
> This patchset adds Hisilicon accelerator DMA driver which based on
> UACCE bus.
> 
> Chengwen Feng (4):
>   dma/hisi_acc: add probe and remove
>   dma/hisi_acc: add control path ops
>   dma/hisi_acc: add data path ops
>   dma/hisi_acc: add doc

Split and squashed the doc commit with minor fixes.
Applied, thanks.




^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2025-11-05 14:45 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-27  9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
2025-08-27  9:27 ` [PATCH 1/4] dma/acc: add probe and remove Chengwen Feng
2025-08-27  9:27 ` [PATCH 2/4] dma/acc: add control path ops Chengwen Feng
2025-08-27  9:27 ` [PATCH 3/4] dma/acc: add data " Chengwen Feng
2025-08-27  9:27 ` [PATCH 4/4] dma/acc: add doc Chengwen Feng
2025-09-08  2:39 ` [PATCH v2 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
2025-09-08  2:39   ` [PATCH v2 1/4] dma/acc: add probe and remove Chengwen Feng
2025-09-08  2:39   ` [PATCH v2 2/4] dma/acc: add control path ops Chengwen Feng
2025-09-08  2:39   ` [PATCH v2 3/4] dma/acc: add data " Chengwen Feng
2025-09-08  2:39   ` [PATCH v2 4/4] dma/acc: add doc Chengwen Feng
2025-09-10  8:50 ` [PATCH v3 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
2025-09-10  8:50   ` [PATCH v3 1/4] dma/acc: add probe and remove Chengwen Feng
2025-09-10  8:51   ` [PATCH v3 2/4] dma/acc: add control path ops Chengwen Feng
2025-09-10  8:51   ` [PATCH v3 3/4] dma/acc: add data " Chengwen Feng
2025-09-10  8:51   ` [PATCH v3 4/4] dma/acc: add doc Chengwen Feng
2025-10-13  9:11 ` [PATCH v4 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
2025-10-13  9:11   ` [PATCH v4 1/4] dma/acc: add probe and remove Chengwen Feng
2025-10-13  9:11   ` [PATCH v4 2/4] dma/acc: add control path ops Chengwen Feng
2025-10-13  9:11   ` [PATCH v4 3/4] dma/acc: add data " Chengwen Feng
2025-10-13  9:11   ` [PATCH v4 4/4] dma/acc: add doc Chengwen Feng
2025-10-15 10:05     ` Thomas Monjalon
2025-10-15 11:10       ` fengchengwen
2025-10-15 12:01         ` Thomas Monjalon
2025-10-15 12:55           ` fengchengwen
2025-10-15 14:09             ` Thomas Monjalon
2025-10-25  1:07               ` fengchengwen
2025-10-16  1:52 ` [PATCH v5 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
2025-10-16  1:52   ` [PATCH v5 1/4] dma/hisi_acc: add probe and remove Chengwen Feng
2025-10-16  1:52   ` [PATCH v5 2/4] dma/hisi_acc: add control path ops Chengwen Feng
2025-10-16  1:52   ` [PATCH v5 3/4] dma/hisi_acc: add data " Chengwen Feng
2025-10-16  1:52   ` [PATCH v5 4/4] dma/hisi_acc: add doc Chengwen Feng
2025-10-16  2:53 ` [PATCH v6 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
2025-10-16  2:53   ` [PATCH v6 1/4] dma/hisi_acc: add probe and remove Chengwen Feng
2025-10-16  2:53   ` [PATCH v6 2/4] dma/hisi_acc: add control path ops Chengwen Feng
2025-10-16  2:53   ` [PATCH v6 3/4] dma/hisi_acc: add data " Chengwen Feng
2025-10-16  2:53   ` [PATCH v6 4/4] dma/hisi_acc: add doc Chengwen Feng
2025-11-05 14:45   ` [PATCH v6 0/4] add Hisilicon accelerator DMA driver Thomas Monjalon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.