public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2] accel/amdxdna: Add carveout memory support for non-IOMMU systems
@ 2026-04-27 17:09 Lizhi Hou
  2026-05-04 19:08 ` Mario Limonciello
  0 siblings, 1 reply; 2+ messages in thread
From: Lizhi Hou @ 2026-04-27 17:09 UTC (permalink / raw)
  To: ogabbay, quic_jhugo, dri-devel, mario.limonciello,
	maciej.falkowski
  Cc: Max Zhen, linux-kernel, sonal.santan, Lizhi Hou

From: Max Zhen <max.zhen@amd.com>

Add support for allocating buffers from reserved carveout memory when
IOMMU is not available. This is useful during debugging or bring-up.

In this configuration, the device uses physical addresses and does
not support scatter-gather lists, requiring physically contiguous
buffers.

Implement carveout-backed allocation and integrate it into buffer
management to support operation in physical address mode.

Signed-off-by: Max Zhen <max.zhen@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/Makefile          |   2 +
 drivers/accel/amdxdna/amdxdna_cbuf.c    | 280 ++++++++++++++++++++++++
 drivers/accel/amdxdna/amdxdna_cbuf.h    |  18 ++
 drivers/accel/amdxdna/amdxdna_debugfs.c | 129 +++++++++++
 drivers/accel/amdxdna/amdxdna_debugfs.h |  18 ++
 drivers/accel/amdxdna/amdxdna_gem.c     |  95 ++++++--
 drivers/accel/amdxdna/amdxdna_iommu.c   |  77 ++++---
 drivers/accel/amdxdna/amdxdna_pci_drv.c |  89 +++++---
 drivers/accel/amdxdna/amdxdna_pci_drv.h |   8 +-
 9 files changed, 636 insertions(+), 80 deletions(-)
 create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.c
 create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.h
 create mode 100644 drivers/accel/amdxdna/amdxdna_debugfs.c
 create mode 100644 drivers/accel/amdxdna/amdxdna_debugfs.h

diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
index 79369e497540..d7720c8c8a98 100644
--- a/drivers/accel/amdxdna/Makefile
+++ b/drivers/accel/amdxdna/Makefile
@@ -12,6 +12,7 @@ amdxdna-y := \
 	aie2_solver.o \
 	aie4_message.o \
 	aie4_pci.o \
+	amdxdna_cbuf.o \
 	amdxdna_ctx.o \
 	amdxdna_gem.o \
 	amdxdna_iommu.o \
@@ -28,4 +29,5 @@ amdxdna-y := \
 	npu6_regs.o
 
 amdxdna-$(CONFIG_PCI_IOV) += aie4_sriov.o
+amdxdna-$(CONFIG_DEBUG_FS) += amdxdna_debugfs.o
 obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o
diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.c b/drivers/accel/amdxdna/amdxdna_cbuf.c
new file mode 100644
index 000000000000..a504560aae98
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_cbuf.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_mm.h>
+#include <drm/drm_prime.h>
+
+#include "amdxdna_cbuf.h"
+#include "amdxdna_pci_drv.h"
+
+/*
+ * Carveout memory is a chunk of memory which is physically contiguous and
+ * is reserved during early boot time. There is only one chunk of such memory
+ * per device. Once available, all BOs accessible from device should be
+ * allocated from this memory. This is a platform debug/bringup feature.
+ */
+struct amdxdna_carveout {
+	u64		addr;
+	u64		size;
+	struct drm_mm	mm;
+	struct mutex	lock; /* protect mm */
+};
+
+bool amdxdna_use_carveout(struct amdxdna_dev *xdna)
+{
+	return !!xdna->carveout;
+}
+
+void amdxdna_get_carveout_conf(struct amdxdna_dev *xdna, u64 *addr, u64 *size)
+{
+	if (amdxdna_use_carveout(xdna)) {
+		*addr = xdna->carveout->addr;
+		*size = xdna->carveout->size;
+	} else {
+		*addr = 0;
+		*size = 0;
+	}
+}
+
+int amdxdna_carveout_init(struct amdxdna_dev *xdna, u64 carveout_addr, u64 carveout_size)
+{
+	struct amdxdna_carveout *carveout;
+
+	/* Only allow carveout memory to be set up once. */
+	if (amdxdna_use_carveout(xdna)) {
+		XDNA_ERR(xdna, "Carveout memory has already been set up.");
+		return -EBUSY;
+	}
+
+	carveout = kzalloc_obj(*carveout);
+	if (!carveout)
+		return -ENOMEM;
+
+	carveout->addr = carveout_addr;
+	carveout->size = carveout_size;
+	mutex_init(&carveout->lock);
+	drm_mm_init(&carveout->mm, carveout->addr, carveout->size);
+
+	xdna->carveout = carveout;
+	XDNA_INFO(xdna, "Use carveout mem: 0x%llx@0x%llx\n", carveout->size, carveout->addr);
+	return 0;
+}
+
+void amdxdna_carveout_fini(struct amdxdna_dev *xdna)
+{
+	struct amdxdna_carveout *carveout = xdna->carveout;
+
+	if (!amdxdna_use_carveout(xdna))
+		return;
+
+	XDNA_INFO(xdna, "Cleanup carveout mem: 0x%llx@0x%llx\n", carveout->size, carveout->addr);
+	drm_mm_takedown(&carveout->mm);
+	mutex_destroy(&carveout->lock);
+	kfree(carveout);
+	xdna->carveout = NULL;
+}
+
+struct amdxdna_cbuf_priv {
+	struct amdxdna_dev *xdna;
+	struct drm_mm_node node;
+};
+
+static struct sg_table *amdxdna_cbuf_map(struct dma_buf_attachment *attach,
+					 enum dma_data_direction direction)
+{
+	struct amdxdna_cbuf_priv *cbuf = attach->dmabuf->priv;
+	struct device *dev = attach->dev;
+	struct scatterlist *sgl, *sg;
+	int ret, n_entries, i;
+	struct sg_table *sgt;
+	dma_addr_t dma_addr;
+	size_t dma_size;
+	size_t max_seg;
+
+	sgt = kzalloc_obj(*sgt);
+	if (!sgt)
+		return ERR_PTR(-ENOMEM);
+
+	max_seg = min_t(size_t, UINT_MAX, dma_max_mapping_size(dev));
+	n_entries = (cbuf->node.size + max_seg - 1) / max_seg;
+	sgl = kzalloc_objs(*sg, n_entries);
+	if (!sgl) {
+		ret = -ENOMEM;
+		goto free_sgt;
+	}
+	sg_init_table(sgl, n_entries);
+	sgt->orig_nents = n_entries;
+	sgt->nents = n_entries;
+	sgt->sgl = sgl;
+
+	dma_size = cbuf->node.size;
+	dma_addr = dma_map_resource(dev, cbuf->node.start, dma_size,
+				    direction, DMA_ATTR_SKIP_CPU_SYNC);
+	ret = dma_mapping_error(dev, dma_addr);
+	if (ret) {
+		pr_err("Failed to dma_map_resource carveout dma buf, ret %d\n", ret);
+		goto free_sgl;
+	}
+
+	for_each_sgtable_dma_sg(sgt, sg, i) {
+		size_t len = min_t(size_t, max_seg, dma_size);
+
+		sg_dma_address(sg) = dma_addr;
+		sg_dma_len(sg) = len;
+		dma_addr += len;
+		dma_size -= len;
+	}
+
+	return sgt;
+
+free_sgl:
+	kfree(sgl);
+free_sgt:
+	kfree(sgt);
+	return ERR_PTR(ret);
+}
+
+static void amdxdna_cbuf_unmap(struct dma_buf_attachment *attach,
+			       struct sg_table *sgt,
+			       enum dma_data_direction direction)
+{
+	dma_unmap_resource(attach->dev, sg_dma_address(sgt->sgl),
+			   drm_prime_get_contiguous_size(sgt), direction,
+			   DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(sgt);
+	kfree(sgt);
+}
+
+static void amdxdna_cbuf_release(struct dma_buf *dbuf)
+{
+	struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
+	struct amdxdna_carveout *carveout;
+
+	carveout = cbuf->xdna->carveout;
+	mutex_lock(&carveout->lock);
+	drm_mm_remove_node(&cbuf->node);
+	mutex_unlock(&carveout->lock);
+
+	kfree(cbuf);
+}
+
+static vm_fault_t amdxdna_cbuf_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct amdxdna_cbuf_priv *cbuf;
+	unsigned long pfn;
+	pgoff_t pgoff;
+
+	cbuf = vma->vm_private_data;
+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+	pfn = (cbuf->node.start >> PAGE_SHIFT) + pgoff;
+
+	return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+static const struct vm_operations_struct amdxdna_cbuf_vm_ops = {
+	.fault = amdxdna_cbuf_vm_fault,
+};
+
+static int amdxdna_cbuf_mmap(struct dma_buf *dbuf, struct vm_area_struct *vma)
+{
+	struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
+
+	vma->vm_ops = &amdxdna_cbuf_vm_ops;
+	vma->vm_private_data = cbuf;
+	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+
+	return 0;
+}
+
+static int amdxdna_cbuf_vmap(struct dma_buf *dbuf, struct iosys_map *map)
+{
+	struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
+	void *kva;
+
+	kva = memremap(cbuf->node.start, cbuf->node.size, MEMREMAP_WB);
+	if (!kva) {
+		pr_err("Failed to vmap carveout dma buf\n");
+		return -ENOMEM;
+	}
+
+	iosys_map_set_vaddr(map, kva);
+	return 0;
+}
+
+static void amdxdna_cbuf_vunmap(struct dma_buf *dbuf, struct iosys_map *map)
+{
+	memunmap(map->vaddr);
+}
+
+static const struct dma_buf_ops amdxdna_cbuf_dmabuf_ops = {
+	.map_dma_buf = amdxdna_cbuf_map,
+	.unmap_dma_buf = amdxdna_cbuf_unmap,
+	.release = amdxdna_cbuf_release,
+	.mmap = amdxdna_cbuf_mmap,
+	.vmap = amdxdna_cbuf_vmap,
+	.vunmap = amdxdna_cbuf_vunmap,
+};
+
+static int amdxdna_cbuf_clear(struct dma_buf *dbuf)
+{
+	struct iosys_map vmap = IOSYS_MAP_INIT_VADDR(NULL);
+
+	dma_buf_vmap(dbuf, &vmap);
+	if (!vmap.vaddr)
+		return -EFAULT;
+
+	memset(vmap.vaddr, 0, dbuf->size);
+	dma_buf_vunmap(dbuf, &vmap);
+
+	return 0;
+}
+
+struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t size, u64 alignment)
+{
+	struct amdxdna_dev *xdna = to_xdna_dev(dev);
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct amdxdna_carveout *carveout;
+	struct amdxdna_cbuf_priv *cbuf;
+	struct dma_buf *dbuf;
+	int ret;
+
+	cbuf = kzalloc_obj(*cbuf);
+	if (!cbuf)
+		return ERR_PTR(-ENOMEM);
+	cbuf->xdna = xdna;
+
+	carveout = xdna->carveout;
+	mutex_lock(&carveout->lock);
+	ret = drm_mm_insert_node_generic(&carveout->mm, &cbuf->node, size,
+					 alignment, 0, DRM_MM_INSERT_BEST);
+	mutex_unlock(&carveout->lock);
+	if (ret)
+		goto free_cbuf;
+
+	exp_info.size = size;
+	exp_info.ops = &amdxdna_cbuf_dmabuf_ops;
+	exp_info.priv = cbuf;
+	exp_info.flags = O_RDWR;
+	dbuf = dma_buf_export(&exp_info);
+	if (IS_ERR(dbuf)) {
+		ret = PTR_ERR(dbuf);
+		goto remove_node;
+	}
+
+	ret = amdxdna_cbuf_clear(dbuf);
+	if (ret) {
+		dma_buf_put(dbuf);
+		goto out;
+	}
+	return dbuf;
+
+remove_node:
+	drm_mm_remove_node(&cbuf->node);
+free_cbuf:
+	kfree(cbuf);
+out:
+	return ERR_PTR(ret);
+}
diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.h b/drivers/accel/amdxdna/amdxdna_cbuf.h
new file mode 100644
index 000000000000..8e89336ffd50
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_cbuf.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+#ifndef _AMDXDNA_CBUF_H_
+#define _AMDXDNA_CBUF_H_
+
+#include "amdxdna_pci_drv.h"
+#include <drm/drm_device.h>
+#include <linux/dma-buf.h>
+
+bool amdxdna_use_carveout(struct amdxdna_dev *xdna);
+int amdxdna_carveout_init(struct amdxdna_dev *xdna, u64 carveout_addr, u64 carveout_size);
+void amdxdna_carveout_fini(struct amdxdna_dev *xdna);
+void amdxdna_get_carveout_conf(struct amdxdna_dev *xdna, u64 *addr, u64 *size);
+struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t size, u64 alignment);
+
+#endif
diff --git a/drivers/accel/amdxdna/amdxdna_debugfs.c b/drivers/accel/amdxdna/amdxdna_debugfs.c
new file mode 100644
index 000000000000..a6ec17c63629
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_debugfs.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+
+#include "amdxdna_cbuf.h"
+#include "amdxdna_debugfs.h"
+
+#include <drm/drm_file.h>
+#include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+
+#define _DBGFS_FOPS(_open, _release, _write) \
+{ \
+	.owner = THIS_MODULE, \
+	.open = _open, \
+	.read = seq_read, \
+	.llseek = seq_lseek, \
+	.release = _release, \
+	.write = _write, \
+}
+
+#define AMDXDNA_DBGFS_FOPS(_name, _show, _write) \
+	static int amdxdna_dbgfs_##_name##_open(struct inode *inode, struct file *file) \
+	{ \
+		return single_open(file, _show, inode->i_private); \
+	} \
+	static int amdxdna_dbgfs_##_name##_release(struct inode *inode, struct file *file) \
+	{ \
+		return single_release(inode, file); \
+	} \
+	static const struct file_operations amdxdna_fops_##_name = \
+		_DBGFS_FOPS(amdxdna_dbgfs_##_name##_open, amdxdna_dbgfs_##_name##_release, _write)
+
+#define AMDXDNA_DBGFS_FILE(_name, _mode) { #_name, &amdxdna_fops_##_name, _mode }
+
+#define file_to_xdna(file) (((struct seq_file *)(file)->private_data)->private)
+
+static ssize_t amdxdna_carveout_write(struct file *file, const char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct amdxdna_dev *xdna = file_to_xdna(file);
+	char kbuf[128];
+	u64 size, addr;
+	char *sep;
+	int ret;
+
+	if (count == 0 || count >= sizeof(kbuf))
+		return -EINVAL;
+
+	if (copy_from_user(kbuf, buf, count))
+		return -EFAULT;
+	kbuf[count] = '\0';
+	strim(kbuf);
+	XDNA_DBG(xdna, "Trying to set carveout to %s", kbuf);
+
+	sep = strchr(kbuf, '@');
+	if (!sep)
+		return -EINVAL;
+	*sep = '\0';
+	sep++;
+
+	ret = kstrtou64(kbuf, 0, &size);
+	if (ret)
+		return ret;
+
+	ret = kstrtou64(sep, 0, &addr);
+	if (ret)
+		return ret;
+
+	/* Sanity check the addr and size. */
+	if (!size)
+		return -EINVAL;
+	if (!IS_ALIGNED(addr, PAGE_SIZE) || !IS_ALIGNED(size, PAGE_SIZE))
+		return -EINVAL;
+
+	guard(mutex)(&xdna->dev_lock);
+
+	ret = amdxdna_carveout_init(xdna, addr, size);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static int amdxdna_carveout_show(struct seq_file *m, void *unused)
+{
+	struct amdxdna_dev *xdna = m->private;
+	u64 addr, size;
+
+	guard(mutex)(&xdna->dev_lock);
+	amdxdna_get_carveout_conf(xdna, &addr, &size);
+	seq_printf(m, "0x%llx@0x%llx\n", size, addr);
+	return 0;
+}
+
+/*
+ * Input/output format: <carveout_size>@<carveout_address>
+ */
+AMDXDNA_DBGFS_FOPS(carveout, amdxdna_carveout_show, amdxdna_carveout_write);
+
+static const struct {
+	const char *name;
+	const struct file_operations *fops;
+	umode_t mode;
+} amdxdna_dbgfs_files[] = {
+	AMDXDNA_DBGFS_FILE(carveout, 0600),
+};
+
+void amdxdna_debugfs_init(struct amdxdna_dev *xdna)
+{
+	struct drm_minor *minor = xdna->ddev.accel;
+	int i;
+
+	/*
+	 * It should be okay that debugfs fails to init.
+	 * We rely on DRM framework to finish debugfs.
+	 */
+	for (i = 0; i < ARRAY_SIZE(amdxdna_dbgfs_files); i++) {
+		debugfs_create_file(amdxdna_dbgfs_files[i].name,
+				    amdxdna_dbgfs_files[i].mode,
+				    minor->debugfs_root,
+				    xdna,
+				    amdxdna_dbgfs_files[i].fops);
+	}
+}
diff --git a/drivers/accel/amdxdna/amdxdna_debugfs.h b/drivers/accel/amdxdna/amdxdna_debugfs.h
new file mode 100644
index 000000000000..2abb45de3f7e
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_debugfs.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+#ifndef _AMDXDNA_DEBUGFS_H_
+#define _AMDXDNA_DEBUGFS_H_
+
+#include "amdxdna_pci_drv.h"
+
+#if defined(CONFIG_DEBUG_FS)
+void amdxdna_debugfs_init(struct amdxdna_dev *xdna);
+#else
+static inline void amdxdna_debugfs_init(struct amdxdna_dev *xdna)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+#endif
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 238ee244d4a6..ebfc472aa9e7 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -16,6 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 
+#include "amdxdna_cbuf.h"
 #include "amdxdna_ctx.h"
 #include "amdxdna_gem.h"
 #include "amdxdna_pci_drv.h"
@@ -516,10 +517,6 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
 static inline bool
 amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
 {
-	/* Do not count imported BOs since the buffer is not allocated by us. */
-	if (is_import_bo(abo))
-		return true;
-
 	/* Already counted as part of HEAP BO */
 	if (abo->type == AMDXDNA_BO_DEV)
 		return true;
@@ -571,9 +568,7 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
 	if (abo->type == AMDXDNA_BO_DEV_HEAP)
 		drm_mm_takedown(&abo->mm);
 
-	if (amdxdna_iova_on(xdna))
-		amdxdna_iommu_unmap_bo(xdna, abo);
-
+	amdxdna_dma_unmap_bo(xdna, abo);
 	amdxdna_gem_vunmap(abo);
 	mutex_destroy(&abo->lock);
 
@@ -591,18 +586,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
 
 	guard(mutex)(&abo->lock);
 	abo->open_ref++;
+	if (abo->open_ref > 1)
+		return 0;
 
-	if (abo->open_ref == 1) {
-		/* Attached to the client when first opened by it. */
-		abo->client = filp->driver_priv;
-		amdxdna_gem_add_bo_usage(abo);
-	}
-	if (amdxdna_iova_on(xdna)) {
-		ret = amdxdna_iommu_map_bo(xdna, abo);
+	/* Attached to the client when first opened by it. */
+	abo->client = filp->driver_priv;
+
+	/* No need to set up dma addr mapping in PASID mode. */
+	if (!amdxdna_pasid_on(abo->client)) {
+		ret = amdxdna_dma_map_bo(xdna, abo);
 		if (ret)
 			return ret;
 	}
 
+	amdxdna_gem_add_bo_usage(abo);
 	return 0;
 }
 
@@ -620,6 +617,39 @@ static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *
 	}
 }
 
+static int amdxdna_gem_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
+{
+	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
+	int ret;
+
+	iosys_map_clear(map);
+
+	dma_resv_assert_held(obj->resv);
+
+	if (is_import_bo(abo))
+		ret = dma_buf_vmap(abo->dma_buf, map);
+	else
+		ret = drm_gem_shmem_object_vmap(obj, map);
+	if (ret)
+		return ret;
+	if (!map->vaddr)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void amdxdna_gem_obj_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
+{
+	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
+
+	dma_resv_assert_held(obj->resv);
+
+	if (is_import_bo(abo))
+		dma_buf_vunmap(abo->dma_buf, map);
+	else
+		drm_gem_shmem_object_vunmap(obj, map);
+}
+
 static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
 {
 	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
@@ -645,8 +675,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
 	.pin = drm_gem_shmem_object_pin,
 	.unpin = drm_gem_shmem_object_unpin,
 	.get_sg_table = drm_gem_shmem_object_get_sg_table,
-	.vmap = drm_gem_shmem_object_vmap,
-	.vunmap = drm_gem_shmem_object_vunmap,
+	.vmap = amdxdna_gem_obj_vmap,
+	.vunmap = amdxdna_gem_obj_vunmap,
 	.mmap = amdxdna_gem_obj_mmap,
 	.vm_ops = &drm_gem_shmem_vm_ops,
 	.export = amdxdna_gem_prime_export,
@@ -714,6 +744,36 @@ amdxdna_gem_create_ubuf_object(struct drm_device *dev, struct amdxdna_drm_create
 	return to_xdna_obj(gobj);
 }
 
+static struct amdxdna_gem_obj *
+amdxdna_gem_create_cbuf_object(struct drm_device *dev, struct amdxdna_drm_create_bo *args)
+{
+	struct amdxdna_dev *xdna = to_xdna_dev(dev);
+	size_t size = PAGE_ALIGN(args->size);
+	struct drm_gem_object *gobj;
+	struct amdxdna_gem_obj *ret;
+	struct dma_buf *dma_buf;
+	u64 align;
+
+	if (!size) {
+		XDNA_ERR(xdna, "Invalid BO size 0x%llx", args->size);
+		return ERR_PTR(-EINVAL);
+	}
+
+	align = (args->type == AMDXDNA_BO_DEV_HEAP) ?  xdna->dev_info->dev_mem_size : 0;
+	dma_buf = amdxdna_get_cbuf(dev, size, align);
+	if (IS_ERR(dma_buf))
+		return ERR_CAST(dma_buf);
+
+	gobj = amdxdna_gem_prime_import(dev, dma_buf);
+	if (IS_ERR(gobj))
+		ret = ERR_CAST(gobj);
+	else
+		ret = to_xdna_obj(gobj);
+
+	dma_buf_put(dma_buf);
+	return ret;
+}
+
 struct drm_gem_object *
 amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
 {
@@ -769,6 +829,8 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
 
 	if (args->vaddr)
 		abo = amdxdna_gem_create_ubuf_object(dev, args);
+	else if (amdxdna_use_carveout(to_xdna_dev(dev)))
+		abo = amdxdna_gem_create_cbuf_object(dev, args);
 	else
 		abo = amdxdna_gem_create_shmem_object(dev, args);
 	if (IS_ERR(abo))
@@ -884,7 +946,6 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
 		 args->type, args->vaddr, args->size, args->flags);
 	switch (args->type) {
 	case AMDXDNA_BO_CMD:
-		fallthrough;
 	case AMDXDNA_BO_SHARE:
 		abo = amdxdna_drm_create_share_bo(dev, args, filp);
 		break;
diff --git a/drivers/accel/amdxdna/amdxdna_iommu.c b/drivers/accel/amdxdna/amdxdna_iommu.c
index 5a9f06183487..eff00131d0f8 100644
--- a/drivers/accel/amdxdna/amdxdna_iommu.c
+++ b/drivers/accel/amdxdna/amdxdna_iommu.c
@@ -35,14 +35,15 @@ static struct iova *amdxdna_iommu_alloc_iova(struct amdxdna_dev *xdna,
 	return iova;
 }
 
-int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
+int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
 {
+	unsigned long contig_sz;
 	struct sg_table *sgt;
 	dma_addr_t dma_addr;
 	struct iova *iova;
 	ssize_t size;
 
-	if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != AMDXDNA_BO_SHMEM)
+	if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != AMDXDNA_BO_SHARE)
 		return 0;
 
 	sgt = drm_gem_shmem_get_pages_sgt(&abo->base);
@@ -51,47 +52,63 @@ int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
 		return PTR_ERR(sgt);
 	}
 
-	if (!sgt->orig_nents || !sg_page(sgt->sgl)) {
-		XDNA_ERR(xdna, "sgl is zero length or not page backed");
+	if (!sgt->orig_nents) {
+		XDNA_ERR(xdna, "sgl is zero length");
 		return -EOPNOTSUPP;
 	}
 
-	iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
-					(abo->type == AMDXDNA_BO_DEV_HEAP));
-	if (IS_ERR(iova)) {
-		XDNA_ERR(xdna, "Alloc iova failed, ret %ld", PTR_ERR(iova));
-		return PTR_ERR(iova);
+	if (amdxdna_iova_on(xdna)) {
+		if (!sg_page(sgt->sgl)) {
+			XDNA_ERR(xdna, "sgl is not page backed");
+			return -EOPNOTSUPP;
+		}
+
+		iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
+						(abo->type == AMDXDNA_BO_DEV_HEAP));
+		if (IS_ERR(iova)) {
+			XDNA_ERR(xdna, "Alloc iova failed, ret %ld", PTR_ERR(iova));
+			return PTR_ERR(iova);
+		}
+
+		size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
+					 IOMMU_READ | IOMMU_WRITE);
+		if (size < 0) {
+			XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
+			__free_iova(&xdna->iovad, iova);
+			return size;
+		}
+		if (size < abo->mem.size) {
+			iommu_unmap(xdna->domain, dma_addr, size);
+			__free_iova(&xdna->iovad, iova);
+			return -ENXIO;
+		}
+		abo->mem.dma_addr = dma_addr;
+	} else {
+		/* Device doesn't support scatter/gather list, fail non-contiguous mapping. */
+		contig_sz = drm_prime_get_contiguous_size(sgt);
+		if (contig_sz < abo->mem.size) {
+			XDNA_ERR(xdna,
+				 "noncontiguous dma addr, contig size:%ld, expected size:%ld",
+				 contig_sz, abo->mem.size);
+			return -EINVAL;
+		}
+		abo->mem.dma_addr = sg_dma_address(sgt->sgl);
 	}
-
-	size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
-				 IOMMU_READ | IOMMU_WRITE);
-	if (size < 0) {
-		XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
-		__free_iova(&xdna->iovad, iova);
-		return size;
-	}
-
-	if (size < abo->mem.size) {
-		iommu_unmap(xdna->domain, dma_addr, size);
-		__free_iova(&xdna->iovad, iova);
-		return -ENXIO;
-	}
-
-	abo->mem.dma_addr = dma_addr;
-
 	return 0;
 }
 
-void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
+void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
 {
 	size_t size;
 
 	if (abo->mem.dma_addr == AMDXDNA_INVALID_ADDR)
 		return;
 
-	size = iova_align(&xdna->iovad, abo->mem.size);
-	iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
-	free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, abo->mem.dma_addr));
+	if (amdxdna_iova_on(xdna)) {
+		size = iova_align(&xdna->iovad, abo->mem.size);
+		iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
+		free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, abo->mem.dma_addr));
+	}
 	abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
 }
 
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 21eddfc538d0..1b08a08343cf 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -14,7 +14,9 @@
 #include <linux/iommu.h>
 #include <linux/pci.h>
 
+#include "amdxdna_cbuf.h"
 #include "amdxdna_ctx.h"
+#include "amdxdna_debugfs.h"
 #include "amdxdna_gem.h"
 #include "amdxdna_pci_drv.h"
 #include "amdxdna_pm.h"
@@ -67,11 +69,40 @@ static const struct amdxdna_device_id amdxdna_ids[] = {
 	{0}
 };
 
+static int amdxdna_sva_init(struct amdxdna_client *client)
+{
+	struct amdxdna_dev *xdna = client->xdna;
+
+	client->sva = iommu_sva_bind_device(xdna->ddev.dev, client->mm);
+	if (IS_ERR(client->sva)) {
+		XDNA_ERR(xdna, "SVA bind device failed, ret %ld", PTR_ERR(client->sva));
+		return PTR_ERR(client->sva);
+	}
+
+	client->pasid = iommu_sva_get_pasid(client->sva);
+	if (client->pasid == IOMMU_PASID_INVALID) {
+		iommu_sva_unbind_device(client->sva);
+		XDNA_ERR(xdna, "SVA get pasid failed");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void amdxdna_sva_fini(struct amdxdna_client *client)
+{
+	if (IS_ERR_OR_NULL(client->sva))
+		return;
+
+	iommu_sva_unbind_device(client->sva);
+	client->sva = NULL;
+	client->pasid = IOMMU_PASID_INVALID;
+}
+
 static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
 {
 	struct amdxdna_dev *xdna = to_xdna_dev(ddev);
 	struct amdxdna_client *client;
-	int ret;
 
 	client = kzalloc_obj(*client);
 	if (!client)
@@ -80,22 +111,13 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
 	client->pid = pid_nr(rcu_access_pointer(filp->pid));
 	client->xdna = xdna;
 	client->pasid = IOMMU_PASID_INVALID;
+	client->mm = current->mm;
 
 	if (!amdxdna_iova_on(xdna)) {
-		client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm);
-		if (IS_ERR(client->sva)) {
-			ret = PTR_ERR(client->sva);
-			XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret);
-			goto failed;
-		}
-		client->pasid = iommu_sva_get_pasid(client->sva);
-		if (client->pasid == IOMMU_PASID_INVALID) {
-			XDNA_ERR(xdna, "SVA get pasid failed");
-			ret = -ENODEV;
-			goto unbind_sva;
-		}
+		/* No need to fail open since user may use pa + carveout later. */
+		if (amdxdna_sva_init(client))
+			XDNA_WARN(xdna, "PASID not available for pid %d", client->pid);
 	}
-	client->mm = current->mm;
 	mmgrab(client->mm);
 	init_srcu_struct(&client->hwctx_srcu);
 	xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
@@ -110,14 +132,6 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
 
 	XDNA_DBG(xdna, "pid %d opened", client->pid);
 	return 0;
-
-unbind_sva:
-	if (!IS_ERR_OR_NULL(client->sva))
-		iommu_sva_unbind_device(client->sva);
-failed:
-	kfree(client);
-
-	return ret;
 }
 
 static void amdxdna_client_cleanup(struct amdxdna_client *client)
@@ -131,11 +145,8 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
 		drm_gem_object_put(to_gobj(client->dev_heap));
 
 	mutex_destroy(&client->mm_lock);
-
-	if (!IS_ERR_OR_NULL(client->sva))
-		iommu_sva_unbind_device(client->sva);
 	mmdrop(client->mm);
-
+	amdxdna_sva_fini(client);
 	kfree(client);
 }
 
@@ -242,15 +253,17 @@ static void amdxdna_show_fdinfo(struct drm_printer *p, struct drm_file *filp)
 
 	/*
 	 * Note for driver specific BO memory usage stat.
-	 * Total memory alloc = amdxdna-internal-alloc + amdxdna-external-alloc
+	 * Total memory in use = amdxdna-internal-alloc + amdxdna-external-alloc, which
+	 * includes both imported and created BOs. To avoid double counts, it includes
+	 * HEAP BO, but not DEV BO. DEV BO is counted by amdxdna-heap-alloc.
 	 */
 	drm_fdinfo_print_size(p, drv_name, "heap", "alloc", heap_usage);
 	drm_fdinfo_print_size(p, drv_name, "internal", "alloc", internal_usage);
 	drm_fdinfo_print_size(p, drv_name, "external", "alloc", external_usage);
 	/*
 	 * Note for DRM standard BO memory stat.
-	 * drm-total-memory counts both DEV BO and HEAP BO
-	 * drm-shared-memory counts BO imported
+	 * drm-total-memory counts both DEV BO and HEAP BO. The DEV BO size is double counted.
+	 * drm-shared-memory counts BO shared with other processes/devices.
 	 */
 	drm_show_memory_stats(p, filp);
 }
@@ -299,25 +312,38 @@ amdxdna_get_dev_info(struct pci_dev *pdev)
 	return NULL;
 }
 
+static void amdxdna_xdna_drm_release(struct drm_device *drm, void *res)
+{
+	struct amdxdna_dev *xdna = res;
+
+	amdxdna_carveout_fini(xdna);
+}
+
 static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct device *dev = &pdev->dev;
 	struct amdxdna_dev *xdna;
+	struct drm_device *ddev;
 	int ret;
 
 	xdna = devm_drm_dev_alloc(dev, &amdxdna_drm_drv, typeof(*xdna), ddev);
 	if (IS_ERR(xdna))
 		return PTR_ERR(xdna);
+	ddev = &xdna->ddev;
 
 	xdna->dev_info = amdxdna_get_dev_info(pdev);
 	if (!xdna->dev_info)
 		return -ENODEV;
 
-	drmm_mutex_init(&xdna->ddev, &xdna->dev_lock);
+	drmm_mutex_init(ddev, &xdna->dev_lock);
 	init_rwsem(&xdna->notifier_lock);
 	INIT_LIST_HEAD(&xdna->client_list);
 	pci_set_drvdata(pdev, xdna);
 
+	ret = drmm_add_action(ddev, amdxdna_xdna_drm_release, xdna);
+	if (ret)
+		return ret;
+
 	if (IS_ENABLED(CONFIG_LOCKDEP)) {
 		fs_reclaim_acquire(GFP_KERNEL);
 		might_lock(&xdna->notifier_lock);
@@ -348,12 +374,13 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto failed_dev_fini;
 	}
 
-	ret = drm_dev_register(&xdna->ddev, 0);
+	ret = drm_dev_register(ddev, 0);
 	if (ret) {
 		XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
 		goto failed_sysfs_fini;
 	}
 
+	amdxdna_debugfs_init(xdna);
 	return 0;
 
 failed_sysfs_fini:
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index bdd0dc83f92e..b1548cf16f59 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -104,6 +104,8 @@ struct amdxdna_fw_ver {
 	u32 build;
 };
 
+struct amdxdna_carveout;
+
 struct amdxdna_dev {
 	struct drm_device		ddev;
 	struct amdxdna_dev_hdl		*dev_handle;
@@ -121,6 +123,8 @@ struct amdxdna_dev {
 	struct iova_domain		iovad;
 	/* Accurate board name queried from firmware, or default_vbnv as fallback */
 	const char			*vbnv;
+
+	struct amdxdna_carveout		*carveout;
 };
 
 /*
@@ -172,11 +176,11 @@ void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
 
 int amdxdna_iommu_init(struct amdxdna_dev *xdna);
 void amdxdna_iommu_fini(struct amdxdna_dev *xdna);
-int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
-void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
 void *amdxdna_iommu_alloc(struct amdxdna_dev *xdna, size_t size, dma_addr_t *dma_addr);
 void amdxdna_iommu_free(struct amdxdna_dev *xdna, size_t size,
 			void *cpu_addr, dma_addr_t dma_addr);
+int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
+void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
 
 static inline bool amdxdna_iova_on(struct amdxdna_dev *xdna)
 {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-05-04 19:08 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-27 17:09 [PATCH V2] accel/amdxdna: Add carveout memory support for non-IOMMU systems Lizhi Hou
2026-05-04 19:08 ` Mario Limonciello

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox