From: Mario Limonciello <superm1@kernel.org>
To: Lizhi Hou <lizhi.hou@amd.com>,
ogabbay@kernel.org, quic_jhugo@quicinc.com,
dri-devel@lists.freedesktop.org,
maciej.falkowski@linux.intel.com
Cc: Max Zhen <max.zhen@amd.com>,
linux-kernel@vger.kernel.org, sonal.santan@amd.com
Subject: Re: [PATCH V2] accel/amdxdna: Add carveout memory support for non-IOMMU systems
Date: Mon, 4 May 2026 14:08:20 -0500 [thread overview]
Message-ID: <88835cfd-dfcb-48d7-9f85-d2956df1082e@kernel.org> (raw)
In-Reply-To: <20260427170949.2666601-1-lizhi.hou@amd.com>
On 4/27/26 12:09, Lizhi Hou wrote:
> From: Max Zhen <max.zhen@amd.com>
>
> Add support for allocating buffers from reserved carveout memory when
> IOMMU is not available. This is useful during debugging or bring-up.
>
> In this configuration, the device uses physical addresses and does
> not support scatter-gather lists, requiring physically contiguous
> buffers.
>
> Implement carveout-backed allocation and integrate it into buffer
> management to support operation in physical address mode.
>
> Signed-off-by: Max Zhen <max.zhen@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
> drivers/accel/amdxdna/Makefile | 2 +
> drivers/accel/amdxdna/amdxdna_cbuf.c | 280 ++++++++++++++++++++++++
> drivers/accel/amdxdna/amdxdna_cbuf.h | 18 ++
> drivers/accel/amdxdna/amdxdna_debugfs.c | 129 +++++++++++
> drivers/accel/amdxdna/amdxdna_debugfs.h | 18 ++
> drivers/accel/amdxdna/amdxdna_gem.c | 95 ++++++--
> drivers/accel/amdxdna/amdxdna_iommu.c | 77 ++++---
> drivers/accel/amdxdna/amdxdna_pci_drv.c | 89 +++++---
> drivers/accel/amdxdna/amdxdna_pci_drv.h | 8 +-
> 9 files changed, 636 insertions(+), 80 deletions(-)
> create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.c
> create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.h
> create mode 100644 drivers/accel/amdxdna/amdxdna_debugfs.c
> create mode 100644 drivers/accel/amdxdna/amdxdna_debugfs.h
>
> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
> index 79369e497540..d7720c8c8a98 100644
> --- a/drivers/accel/amdxdna/Makefile
> +++ b/drivers/accel/amdxdna/Makefile
> @@ -12,6 +12,7 @@ amdxdna-y := \
> aie2_solver.o \
> aie4_message.o \
> aie4_pci.o \
> + amdxdna_cbuf.o \
> amdxdna_ctx.o \
> amdxdna_gem.o \
> amdxdna_iommu.o \
> @@ -28,4 +29,5 @@ amdxdna-y := \
> npu6_regs.o
>
> amdxdna-$(CONFIG_PCI_IOV) += aie4_sriov.o
> +amdxdna-$(CONFIG_DEBUG_FS) += amdxdna_debugfs.o
> obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o
> diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.c b/drivers/accel/amdxdna/amdxdna_cbuf.c
> new file mode 100644
> index 000000000000..a504560aae98
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_cbuf.c
> @@ -0,0 +1,280 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
> + */
> +
> +#include <drm/drm_mm.h>
> +#include <drm/drm_prime.h>
> +
> +#include "amdxdna_cbuf.h"
> +#include "amdxdna_pci_drv.h"
> +
> +/*
> + * Carveout memory is a chunk of memory which is physically contiguous and
> + * is reserved during early boot time. There is only one chunk of such memory
> + * per device. Once available, all BOs accessible from device should be
> + * allocated from this memory. This is a platform debug/bringup feature.
> + */
> +struct amdxdna_carveout {
> + u64 addr;
> + u64 size;
> + struct drm_mm mm;
> + struct mutex lock; /* protect mm */
> +};
> +
> +bool amdxdna_use_carveout(struct amdxdna_dev *xdna)
> +{
> + return !!xdna->carveout;
> +}
> +
> +void amdxdna_get_carveout_conf(struct amdxdna_dev *xdna, u64 *addr, u64 *size)
> +{
> + if (amdxdna_use_carveout(xdna)) {
> + *addr = xdna->carveout->addr;
> + *size = xdna->carveout->size;
> + } else {
> + *addr = 0;
> + *size = 0;
> + }
> +}
> +
> +int amdxdna_carveout_init(struct amdxdna_dev *xdna, u64 carveout_addr, u64 carveout_size)
> +{
> + struct amdxdna_carveout *carveout;
> +
> + /* Only allow carveout memory to be set up once. */
> + if (amdxdna_use_carveout(xdna)) {
> + XDNA_ERR(xdna, "Carveout memory has already been set up.");
> + return -EBUSY;
> + }
> +
> + carveout = kzalloc_obj(*carveout);
> + if (!carveout)
> + return -ENOMEM;
> +
> + carveout->addr = carveout_addr;
> + carveout->size = carveout_size;
> + mutex_init(&carveout->lock);
> + drm_mm_init(&carveout->mm, carveout->addr, carveout->size);
> +
> + xdna->carveout = carveout;
> + XDNA_INFO(xdna, "Use carveout mem: 0x%llx@0x%llx\n", carveout->size, carveout->addr);
> + return 0;
> +}
> +
> +void amdxdna_carveout_fini(struct amdxdna_dev *xdna)
> +{
> + struct amdxdna_carveout *carveout = xdna->carveout;
> +
> + if (!amdxdna_use_carveout(xdna))
> + return;
> +
> + XDNA_INFO(xdna, "Cleanup carveout mem: 0x%llx@0x%llx\n", carveout->size, carveout->addr);
> + drm_mm_takedown(&carveout->mm);
> + mutex_destroy(&carveout->lock);
> + kfree(carveout);
> + xdna->carveout = NULL;
> +}
> +
> +struct amdxdna_cbuf_priv {
> + struct amdxdna_dev *xdna;
> + struct drm_mm_node node;
> +};
> +
> +static struct sg_table *amdxdna_cbuf_map(struct dma_buf_attachment *attach,
> + enum dma_data_direction direction)
> +{
> + struct amdxdna_cbuf_priv *cbuf = attach->dmabuf->priv;
> + struct device *dev = attach->dev;
> + struct scatterlist *sgl, *sg;
> + int ret, n_entries, i;
> + struct sg_table *sgt;
> + dma_addr_t dma_addr;
> + size_t dma_size;
> + size_t max_seg;
> +
> + sgt = kzalloc_obj(*sgt);
> + if (!sgt)
> + return ERR_PTR(-ENOMEM);
> +
> + max_seg = min_t(size_t, UINT_MAX, dma_max_mapping_size(dev));
> + n_entries = (cbuf->node.size + max_seg - 1) / max_seg;
> + sgl = kzalloc_objs(*sg, n_entries);
> + if (!sgl) {
> + ret = -ENOMEM;
> + goto free_sgt;
> + }
> + sg_init_table(sgl, n_entries);
> + sgt->orig_nents = n_entries;
> + sgt->nents = n_entries;
> + sgt->sgl = sgl;
> +
> + dma_size = cbuf->node.size;
> + dma_addr = dma_map_resource(dev, cbuf->node.start, dma_size,
> + direction, DMA_ATTR_SKIP_CPU_SYNC);
> + ret = dma_mapping_error(dev, dma_addr);
> + if (ret) {
> + pr_err("Failed to dma_map_resource carveout dma buf, ret %d\n", ret);
> + goto free_sgl;
> + }
> +
> + for_each_sgtable_dma_sg(sgt, sg, i) {
> + size_t len = min_t(size_t, max_seg, dma_size);
> +
> + sg_dma_address(sg) = dma_addr;
> + sg_dma_len(sg) = len;
> + dma_addr += len;
> + dma_size -= len;
> + }
> +
> + return sgt;
> +
> +free_sgl:
> + kfree(sgl);
> +free_sgt:
> + kfree(sgt);
> + return ERR_PTR(ret);
> +}
> +
> +static void amdxdna_cbuf_unmap(struct dma_buf_attachment *attach,
> + struct sg_table *sgt,
> + enum dma_data_direction direction)
> +{
> + dma_unmap_resource(attach->dev, sg_dma_address(sgt->sgl),
> + drm_prime_get_contiguous_size(sgt), direction,
> + DMA_ATTR_SKIP_CPU_SYNC);
> + sg_free_table(sgt);
> + kfree(sgt);
> +}
> +
> +static void amdxdna_cbuf_release(struct dma_buf *dbuf)
> +{
> + struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
> + struct amdxdna_carveout *carveout;
> +
> + carveout = cbuf->xdna->carveout;
> + mutex_lock(&carveout->lock);
> + drm_mm_remove_node(&cbuf->node);
> + mutex_unlock(&carveout->lock);
> +
> + kfree(cbuf);
> +}
> +
> +static vm_fault_t amdxdna_cbuf_vm_fault(struct vm_fault *vmf)
> +{
> + struct vm_area_struct *vma = vmf->vma;
> + struct amdxdna_cbuf_priv *cbuf;
> + unsigned long pfn;
> + pgoff_t pgoff;
> +
> + cbuf = vma->vm_private_data;
> + pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
> + pfn = (cbuf->node.start >> PAGE_SHIFT) + pgoff;
> +
> + return vmf_insert_pfn(vma, vmf->address, pfn);
> +}
> +
> +static const struct vm_operations_struct amdxdna_cbuf_vm_ops = {
> + .fault = amdxdna_cbuf_vm_fault,
> +};
> +
> +static int amdxdna_cbuf_mmap(struct dma_buf *dbuf, struct vm_area_struct *vma)
> +{
> + struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
> +
> + vma->vm_ops = &amdxdna_cbuf_vm_ops;
> + vma->vm_private_data = cbuf;
> + vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
> +
> + return 0;
> +}
> +
> +static int amdxdna_cbuf_vmap(struct dma_buf *dbuf, struct iosys_map *map)
> +{
> + struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
> + void *kva;
> +
> + kva = memremap(cbuf->node.start, cbuf->node.size, MEMREMAP_WB);
> + if (!kva) {
> + pr_err("Failed to vmap carveout dma buf\n");
> + return -ENOMEM;
> + }
> +
> + iosys_map_set_vaddr(map, kva);
> + return 0;
> +}
> +
> +static void amdxdna_cbuf_vunmap(struct dma_buf *dbuf, struct iosys_map *map)
> +{
> + memunmap(map->vaddr);
> +}
> +
> +static const struct dma_buf_ops amdxdna_cbuf_dmabuf_ops = {
> + .map_dma_buf = amdxdna_cbuf_map,
> + .unmap_dma_buf = amdxdna_cbuf_unmap,
> + .release = amdxdna_cbuf_release,
> + .mmap = amdxdna_cbuf_mmap,
> + .vmap = amdxdna_cbuf_vmap,
> + .vunmap = amdxdna_cbuf_vunmap,
> +};
> +
> +static int amdxdna_cbuf_clear(struct dma_buf *dbuf)
> +{
> + struct iosys_map vmap = IOSYS_MAP_INIT_VADDR(NULL);
> +
> + dma_buf_vmap(dbuf, &vmap);
> + if (!vmap.vaddr)
> + return -EFAULT;
> +
> + memset(vmap.vaddr, 0, dbuf->size);
> + dma_buf_vunmap(dbuf, &vmap);
> +
> + return 0;
> +}
> +
> +struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t size, u64 alignment)
> +{
> + struct amdxdna_dev *xdna = to_xdna_dev(dev);
> + DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> + struct amdxdna_carveout *carveout;
> + struct amdxdna_cbuf_priv *cbuf;
> + struct dma_buf *dbuf;
> + int ret;
> +
> + cbuf = kzalloc_obj(*cbuf);
> + if (!cbuf)
> + return ERR_PTR(-ENOMEM);
> + cbuf->xdna = xdna;
> +
> + carveout = xdna->carveout;
> + mutex_lock(&carveout->lock);
> + ret = drm_mm_insert_node_generic(&carveout->mm, &cbuf->node, size,
> + alignment, 0, DRM_MM_INSERT_BEST);
> + mutex_unlock(&carveout->lock);
> + if (ret)
> + goto free_cbuf;
> +
> + exp_info.size = size;
> + exp_info.ops = &amdxdna_cbuf_dmabuf_ops;
> + exp_info.priv = cbuf;
> + exp_info.flags = O_RDWR;
> + dbuf = dma_buf_export(&exp_info);
> + if (IS_ERR(dbuf)) {
> + ret = PTR_ERR(dbuf);
> + goto remove_node;
> + }
> +
> + ret = amdxdna_cbuf_clear(dbuf);
> + if (ret) {
> + dma_buf_put(dbuf);
> + goto out;
> + }
> + return dbuf;
> +
> +remove_node:
> + drm_mm_remove_node(&cbuf->node);
> +free_cbuf:
> + kfree(cbuf);
> +out:
> + return ERR_PTR(ret);
> +}
> diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.h b/drivers/accel/amdxdna/amdxdna_cbuf.h
> new file mode 100644
> index 000000000000..8e89336ffd50
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_cbuf.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
> + */
> +#ifndef _AMDXDNA_CBUF_H_
> +#define _AMDXDNA_CBUF_H_
> +
> +#include "amdxdna_pci_drv.h"
> +#include <drm/drm_device.h>
> +#include <linux/dma-buf.h>
> +
> +bool amdxdna_use_carveout(struct amdxdna_dev *xdna);
> +int amdxdna_carveout_init(struct amdxdna_dev *xdna, u64 carveout_addr, u64 carveout_size);
> +void amdxdna_carveout_fini(struct amdxdna_dev *xdna);
> +void amdxdna_get_carveout_conf(struct amdxdna_dev *xdna, u64 *addr, u64 *size);
> +struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t size, u64 alignment);
> +
> +#endif
> diff --git a/drivers/accel/amdxdna/amdxdna_debugfs.c b/drivers/accel/amdxdna/amdxdna_debugfs.c
> new file mode 100644
> index 000000000000..a6ec17c63629
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_debugfs.c
> @@ -0,0 +1,129 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
> + */
> +
> +#include "amdxdna_cbuf.h"
> +#include "amdxdna_debugfs.h"
> +
> +#include <drm/drm_file.h>
> +#include <linux/debugfs.h>
> +#include <linux/pm_runtime.h>
> +#include <linux/seq_file.h>
> +#include <linux/string.h>
> +#include <linux/uaccess.h>
> +
> +#define _DBGFS_FOPS(_open, _release, _write) \
> +{ \
> + .owner = THIS_MODULE, \
> + .open = _open, \
> + .read = seq_read, \
> + .llseek = seq_lseek, \
> + .release = _release, \
> + .write = _write, \
> +}
> +
> +#define AMDXDNA_DBGFS_FOPS(_name, _show, _write) \
> + static int amdxdna_dbgfs_##_name##_open(struct inode *inode, struct file *file) \
> + { \
> + return single_open(file, _show, inode->i_private); \
> + } \
> + static int amdxdna_dbgfs_##_name##_release(struct inode *inode, struct file *file) \
> + { \
> + return single_release(inode, file); \
> + } \
> + static const struct file_operations amdxdna_fops_##_name = \
> + _DBGFS_FOPS(amdxdna_dbgfs_##_name##_open, amdxdna_dbgfs_##_name##_release, _write)
> +
> +#define AMDXDNA_DBGFS_FILE(_name, _mode) { #_name, &amdxdna_fops_##_name, _mode }
> +
> +#define file_to_xdna(file) (((struct seq_file *)(file)->private_data)->private)
> +
> +static ssize_t amdxdna_carveout_write(struct file *file, const char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + struct amdxdna_dev *xdna = file_to_xdna(file);
> + char kbuf[128];
> + u64 size, addr;
> + char *sep;
> + int ret;
> +
> + if (count == 0 || count >= sizeof(kbuf))
> + return -EINVAL;
> +
> + if (copy_from_user(kbuf, buf, count))
> + return -EFAULT;
> + kbuf[count] = '\0';
> + strim(kbuf);
> + XDNA_DBG(xdna, "Trying to set carveout to %s", kbuf);
> +
> + sep = strchr(kbuf, '@');
> + if (!sep)
> + return -EINVAL;
> + *sep = '\0';
> + sep++;
> +
> + ret = kstrtou64(kbuf, 0, &size);
> + if (ret)
> + return ret;
> +
> + ret = kstrtou64(sep, 0, &addr);
> + if (ret)
> + return ret;
> +
> + /* Sanity check the addr and size. */
> + if (!size)
> + return -EINVAL;
> + if (!IS_ALIGNED(addr, PAGE_SIZE) || !IS_ALIGNED(size, PAGE_SIZE))
> + return -EINVAL;
> +
> + guard(mutex)(&xdna->dev_lock);
> +
> + ret = amdxdna_carveout_init(xdna, addr, size);
> + if (ret)
> + return ret;
> +
> + return count;
> +}
> +
> +static int amdxdna_carveout_show(struct seq_file *m, void *unused)
> +{
> + struct amdxdna_dev *xdna = m->private;
> + u64 addr, size;
> +
> + guard(mutex)(&xdna->dev_lock);
> + amdxdna_get_carveout_conf(xdna, &addr, &size);
> + seq_printf(m, "0x%llx@0x%llx\n", size, addr);
> + return 0;
> +}
> +
> +/*
> + * Input/output format: <carveout_size>@<carveout_address>
> + */
> +AMDXDNA_DBGFS_FOPS(carveout, amdxdna_carveout_show, amdxdna_carveout_write);
> +
> +static const struct {
> + const char *name;
> + const struct file_operations *fops;
> + umode_t mode;
> +} amdxdna_dbgfs_files[] = {
> + AMDXDNA_DBGFS_FILE(carveout, 0600),
> +};
> +
> +void amdxdna_debugfs_init(struct amdxdna_dev *xdna)
> +{
> + struct drm_minor *minor = xdna->ddev.accel;
> + int i;
> +
> + /*
> + * It should be okay that debugfs fails to init.
> + * We rely on DRM framework to finish debugfs.
> + */
> + for (i = 0; i < ARRAY_SIZE(amdxdna_dbgfs_files); i++) {
> + debugfs_create_file(amdxdna_dbgfs_files[i].name,
> + amdxdna_dbgfs_files[i].mode,
> + minor->debugfs_root,
> + xdna,
> + amdxdna_dbgfs_files[i].fops);
> + }
> +}
> diff --git a/drivers/accel/amdxdna/amdxdna_debugfs.h b/drivers/accel/amdxdna/amdxdna_debugfs.h
> new file mode 100644
> index 000000000000..2abb45de3f7e
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_debugfs.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
> + */
> +#ifndef _AMDXDNA_DEBUGFS_H_
> +#define _AMDXDNA_DEBUGFS_H_
> +
> +#include "amdxdna_pci_drv.h"
> +
> +#if defined(CONFIG_DEBUG_FS)
> +void amdxdna_debugfs_init(struct amdxdna_dev *xdna);
> +#else
> +static inline void amdxdna_debugfs_init(struct amdxdna_dev *xdna)
> +{
> +}
> +#endif /* CONFIG_DEBUG_FS */
> +
> +#endif
> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
> index 238ee244d4a6..ebfc472aa9e7 100644
> --- a/drivers/accel/amdxdna/amdxdna_gem.c
> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
> @@ -16,6 +16,7 @@
> #include <linux/pagemap.h>
> #include <linux/vmalloc.h>
>
> +#include "amdxdna_cbuf.h"
> #include "amdxdna_ctx.h"
> #include "amdxdna_gem.h"
> #include "amdxdna_pci_drv.h"
> @@ -516,10 +517,6 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
> static inline bool
> amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
> {
> - /* Do not count imported BOs since the buffer is not allocated by us. */
> - if (is_import_bo(abo))
> - return true;
> -
> /* Already counted as part of HEAP BO */
> if (abo->type == AMDXDNA_BO_DEV)
> return true;
> @@ -571,9 +568,7 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
> if (abo->type == AMDXDNA_BO_DEV_HEAP)
> drm_mm_takedown(&abo->mm);
>
> - if (amdxdna_iova_on(xdna))
> - amdxdna_iommu_unmap_bo(xdna, abo);
> -
> + amdxdna_dma_unmap_bo(xdna, abo);
> amdxdna_gem_vunmap(abo);
> mutex_destroy(&abo->lock);
>
> @@ -591,18 +586,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
>
> guard(mutex)(&abo->lock);
> abo->open_ref++;
> + if (abo->open_ref > 1)
> + return 0;
>
> - if (abo->open_ref == 1) {
> - /* Attached to the client when first opened by it. */
> - abo->client = filp->driver_priv;
> - amdxdna_gem_add_bo_usage(abo);
> - }
> - if (amdxdna_iova_on(xdna)) {
> - ret = amdxdna_iommu_map_bo(xdna, abo);
> + /* Attached to the client when first opened by it. */
> + abo->client = filp->driver_priv;
> +
> + /* No need to set up dma addr mapping in PASID mode. */
> + if (!amdxdna_pasid_on(abo->client)) {
> + ret = amdxdna_dma_map_bo(xdna, abo);
> if (ret)
> return ret;
> }
>
> + amdxdna_gem_add_bo_usage(abo);
> return 0;
> }
>
> @@ -620,6 +617,39 @@ static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *
> }
> }
>
> +static int amdxdna_gem_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
> +{
> + struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
> + int ret;
> +
> + iosys_map_clear(map);
> +
> + dma_resv_assert_held(obj->resv);
> +
> + if (is_import_bo(abo))
> + ret = dma_buf_vmap(abo->dma_buf, map);
> + else
> + ret = drm_gem_shmem_object_vmap(obj, map);
> + if (ret)
> + return ret;
> + if (!map->vaddr)
> + return -ENOMEM;
> +
> + return 0;
> +}
> +
> +static void amdxdna_gem_obj_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
> +{
> + struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
> +
> + dma_resv_assert_held(obj->resv);
> +
> + if (is_import_bo(abo))
> + dma_buf_vunmap(abo->dma_buf, map);
> + else
> + drm_gem_shmem_object_vunmap(obj, map);
> +}
> +
> static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
> {
> struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
> @@ -645,8 +675,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
> .pin = drm_gem_shmem_object_pin,
> .unpin = drm_gem_shmem_object_unpin,
> .get_sg_table = drm_gem_shmem_object_get_sg_table,
> - .vmap = drm_gem_shmem_object_vmap,
> - .vunmap = drm_gem_shmem_object_vunmap,
> + .vmap = amdxdna_gem_obj_vmap,
> + .vunmap = amdxdna_gem_obj_vunmap,
> .mmap = amdxdna_gem_obj_mmap,
> .vm_ops = &drm_gem_shmem_vm_ops,
> .export = amdxdna_gem_prime_export,
> @@ -714,6 +744,36 @@ amdxdna_gem_create_ubuf_object(struct drm_device *dev, struct amdxdna_drm_create
> return to_xdna_obj(gobj);
> }
>
> +static struct amdxdna_gem_obj *
> +amdxdna_gem_create_cbuf_object(struct drm_device *dev, struct amdxdna_drm_create_bo *args)
> +{
> + struct amdxdna_dev *xdna = to_xdna_dev(dev);
> + size_t size = PAGE_ALIGN(args->size);
> + struct drm_gem_object *gobj;
> + struct amdxdna_gem_obj *ret;
> + struct dma_buf *dma_buf;
> + u64 align;
> +
> + if (!size) {
> + XDNA_ERR(xdna, "Invalid BO size 0x%llx", args->size);
> + return ERR_PTR(-EINVAL);
> + }
> +
> + align = (args->type == AMDXDNA_BO_DEV_HEAP) ? xdna->dev_info->dev_mem_size : 0;
> + dma_buf = amdxdna_get_cbuf(dev, size, align);
> + if (IS_ERR(dma_buf))
> + return ERR_CAST(dma_buf);
> +
> + gobj = amdxdna_gem_prime_import(dev, dma_buf);
> + if (IS_ERR(gobj))
> + ret = ERR_CAST(gobj);
> + else
> + ret = to_xdna_obj(gobj);
> +
> + dma_buf_put(dma_buf);
> + return ret;
> +}
> +
> struct drm_gem_object *
> amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
> {
> @@ -769,6 +829,8 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
>
> if (args->vaddr)
> abo = amdxdna_gem_create_ubuf_object(dev, args);
> + else if (amdxdna_use_carveout(to_xdna_dev(dev)))
> + abo = amdxdna_gem_create_cbuf_object(dev, args);
> else
> abo = amdxdna_gem_create_shmem_object(dev, args);
> if (IS_ERR(abo))
> @@ -884,7 +946,6 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
> args->type, args->vaddr, args->size, args->flags);
> switch (args->type) {
> case AMDXDNA_BO_CMD:
> - fallthrough;
> case AMDXDNA_BO_SHARE:
> abo = amdxdna_drm_create_share_bo(dev, args, filp);
> break;
> diff --git a/drivers/accel/amdxdna/amdxdna_iommu.c b/drivers/accel/amdxdna/amdxdna_iommu.c
> index 5a9f06183487..eff00131d0f8 100644
> --- a/drivers/accel/amdxdna/amdxdna_iommu.c
> +++ b/drivers/accel/amdxdna/amdxdna_iommu.c
> @@ -35,14 +35,15 @@ static struct iova *amdxdna_iommu_alloc_iova(struct amdxdna_dev *xdna,
> return iova;
> }
>
> -int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
> +int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
> {
> + unsigned long contig_sz;
> struct sg_table *sgt;
> dma_addr_t dma_addr;
> struct iova *iova;
> ssize_t size;
>
> - if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != AMDXDNA_BO_SHMEM)
> + if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != AMDXDNA_BO_SHARE)
> return 0;
>
> sgt = drm_gem_shmem_get_pages_sgt(&abo->base);
> @@ -51,47 +52,63 @@ int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
> return PTR_ERR(sgt);
> }
>
> - if (!sgt->orig_nents || !sg_page(sgt->sgl)) {
> - XDNA_ERR(xdna, "sgl is zero length or not page backed");
> + if (!sgt->orig_nents) {
> + XDNA_ERR(xdna, "sgl is zero length");
> return -EOPNOTSUPP;
> }
>
> - iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
> - (abo->type == AMDXDNA_BO_DEV_HEAP));
> - if (IS_ERR(iova)) {
> - XDNA_ERR(xdna, "Alloc iova failed, ret %ld", PTR_ERR(iova));
> - return PTR_ERR(iova);
> + if (amdxdna_iova_on(xdna)) {
> + if (!sg_page(sgt->sgl)) {
> + XDNA_ERR(xdna, "sgl is not page backed");
> + return -EOPNOTSUPP;
> + }
> +
> + iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
> + (abo->type == AMDXDNA_BO_DEV_HEAP));
> + if (IS_ERR(iova)) {
> + XDNA_ERR(xdna, "Alloc iova failed, ret %ld", PTR_ERR(iova));
> + return PTR_ERR(iova);
> + }
> +
> + size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
> + IOMMU_READ | IOMMU_WRITE);
> + if (size < 0) {
> + XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
> + __free_iova(&xdna->iovad, iova);
> + return size;
> + }
> + if (size < abo->mem.size) {
> + iommu_unmap(xdna->domain, dma_addr, size);
> + __free_iova(&xdna->iovad, iova);
> + return -ENXIO;
> + }
> + abo->mem.dma_addr = dma_addr;
> + } else {
> + /* Device doesn't support scatter/gather list, fail non-contiguous mapping. */
> + contig_sz = drm_prime_get_contiguous_size(sgt);
> + if (contig_sz < abo->mem.size) {
> + XDNA_ERR(xdna,
> + "noncontiguous dma addr, contig size:%ld, expected size:%ld",
> + contig_sz, abo->mem.size);
> + return -EINVAL;
> + }
> + abo->mem.dma_addr = sg_dma_address(sgt->sgl);
> }
> -
> - size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
> - IOMMU_READ | IOMMU_WRITE);
> - if (size < 0) {
> - XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
> - __free_iova(&xdna->iovad, iova);
> - return size;
> - }
> -
> - if (size < abo->mem.size) {
> - iommu_unmap(xdna->domain, dma_addr, size);
> - __free_iova(&xdna->iovad, iova);
> - return -ENXIO;
> - }
> -
> - abo->mem.dma_addr = dma_addr;
> -
> return 0;
> }
>
> -void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
> +void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
> {
> size_t size;
>
> if (abo->mem.dma_addr == AMDXDNA_INVALID_ADDR)
> return;
>
> - size = iova_align(&xdna->iovad, abo->mem.size);
> - iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
> - free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, abo->mem.dma_addr));
> + if (amdxdna_iova_on(xdna)) {
> + size = iova_align(&xdna->iovad, abo->mem.size);
> + iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
> + free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, abo->mem.dma_addr));
> + }
> abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
> }
>
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index 21eddfc538d0..1b08a08343cf 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -14,7 +14,9 @@
> #include <linux/iommu.h>
> #include <linux/pci.h>
>
> +#include "amdxdna_cbuf.h"
> #include "amdxdna_ctx.h"
> +#include "amdxdna_debugfs.h"
> #include "amdxdna_gem.h"
> #include "amdxdna_pci_drv.h"
> #include "amdxdna_pm.h"
> @@ -67,11 +69,40 @@ static const struct amdxdna_device_id amdxdna_ids[] = {
> {0}
> };
>
> +static int amdxdna_sva_init(struct amdxdna_client *client)
> +{
> + struct amdxdna_dev *xdna = client->xdna;
> +
> + client->sva = iommu_sva_bind_device(xdna->ddev.dev, client->mm);
> + if (IS_ERR(client->sva)) {
> + XDNA_ERR(xdna, "SVA bind device failed, ret %ld", PTR_ERR(client->sva));
> + return PTR_ERR(client->sva);
> + }
> +
> + client->pasid = iommu_sva_get_pasid(client->sva);
> + if (client->pasid == IOMMU_PASID_INVALID) {
> + iommu_sva_unbind_device(client->sva);
> + XDNA_ERR(xdna, "SVA get pasid failed");
> + return -ENODEV;
> + }
> +
> + return 0;
> +}
> +
> +static void amdxdna_sva_fini(struct amdxdna_client *client)
> +{
> + if (IS_ERR_OR_NULL(client->sva))
> + return;
> +
> + iommu_sva_unbind_device(client->sva);
> + client->sva = NULL;
> + client->pasid = IOMMU_PASID_INVALID;
> +}
> +
> static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
> {
> struct amdxdna_dev *xdna = to_xdna_dev(ddev);
> struct amdxdna_client *client;
> - int ret;
>
> client = kzalloc_obj(*client);
> if (!client)
> @@ -80,22 +111,13 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
> client->pid = pid_nr(rcu_access_pointer(filp->pid));
> client->xdna = xdna;
> client->pasid = IOMMU_PASID_INVALID;
> + client->mm = current->mm;
>
> if (!amdxdna_iova_on(xdna)) {
> - client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm);
> - if (IS_ERR(client->sva)) {
> - ret = PTR_ERR(client->sva);
> - XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret);
> - goto failed;
> - }
> - client->pasid = iommu_sva_get_pasid(client->sva);
> - if (client->pasid == IOMMU_PASID_INVALID) {
> - XDNA_ERR(xdna, "SVA get pasid failed");
> - ret = -ENODEV;
> - goto unbind_sva;
> - }
> + /* No need to fail open since user may use pa + carveout later. */
> + if (amdxdna_sva_init(client))
> + XDNA_WARN(xdna, "PASID not available for pid %d", client->pid);
> }
> - client->mm = current->mm;
> mmgrab(client->mm);
> init_srcu_struct(&client->hwctx_srcu);
> xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
> @@ -110,14 +132,6 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
>
> XDNA_DBG(xdna, "pid %d opened", client->pid);
> return 0;
> -
> -unbind_sva:
> - if (!IS_ERR_OR_NULL(client->sva))
> - iommu_sva_unbind_device(client->sva);
> -failed:
> - kfree(client);
> -
> - return ret;
> }
>
> static void amdxdna_client_cleanup(struct amdxdna_client *client)
> @@ -131,11 +145,8 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
> drm_gem_object_put(to_gobj(client->dev_heap));
>
> mutex_destroy(&client->mm_lock);
> -
> - if (!IS_ERR_OR_NULL(client->sva))
> - iommu_sva_unbind_device(client->sva);
> mmdrop(client->mm);
> -
> + amdxdna_sva_fini(client);
> kfree(client);
> }
>
> @@ -242,15 +253,17 @@ static void amdxdna_show_fdinfo(struct drm_printer *p, struct drm_file *filp)
>
> /*
> * Note for driver specific BO memory usage stat.
> - * Total memory alloc = amdxdna-internal-alloc + amdxdna-external-alloc
> + * Total memory in use = amdxdna-internal-alloc + amdxdna-external-alloc, which
> + * includes both imported and created BOs. To avoid double counts, it includes
> + * HEAP BO, but not DEV BO. DEV BO is counted by amdxdna-heap-alloc.
> */
> drm_fdinfo_print_size(p, drv_name, "heap", "alloc", heap_usage);
> drm_fdinfo_print_size(p, drv_name, "internal", "alloc", internal_usage);
> drm_fdinfo_print_size(p, drv_name, "external", "alloc", external_usage);
> /*
> * Note for DRM standard BO memory stat.
> - * drm-total-memory counts both DEV BO and HEAP BO
> - * drm-shared-memory counts BO imported
> + * drm-total-memory counts both DEV BO and HEAP BO. The DEV BO size is double counted.
> + * drm-shared-memory counts BO shared with other processes/devices.
> */
> drm_show_memory_stats(p, filp);
> }
> @@ -299,25 +312,38 @@ amdxdna_get_dev_info(struct pci_dev *pdev)
> return NULL;
> }
>
> +static void amdxdna_xdna_drm_release(struct drm_device *drm, void *res)
> +{
> + struct amdxdna_dev *xdna = res;
> +
> + amdxdna_carveout_fini(xdna);
> +}
> +
> static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> {
> struct device *dev = &pdev->dev;
> struct amdxdna_dev *xdna;
> + struct drm_device *ddev;
> int ret;
>
> xdna = devm_drm_dev_alloc(dev, &amdxdna_drm_drv, typeof(*xdna), ddev);
> if (IS_ERR(xdna))
> return PTR_ERR(xdna);
> + ddev = &xdna->ddev;
>
> xdna->dev_info = amdxdna_get_dev_info(pdev);
> if (!xdna->dev_info)
> return -ENODEV;
>
> - drmm_mutex_init(&xdna->ddev, &xdna->dev_lock);
> + drmm_mutex_init(ddev, &xdna->dev_lock);
> init_rwsem(&xdna->notifier_lock);
> INIT_LIST_HEAD(&xdna->client_list);
> pci_set_drvdata(pdev, xdna);
>
> + ret = drmm_add_action(ddev, amdxdna_xdna_drm_release, xdna);
> + if (ret)
> + return ret;
> +
> if (IS_ENABLED(CONFIG_LOCKDEP)) {
> fs_reclaim_acquire(GFP_KERNEL);
> might_lock(&xdna->notifier_lock);
> @@ -348,12 +374,13 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> goto failed_dev_fini;
> }
>
> - ret = drm_dev_register(&xdna->ddev, 0);
> + ret = drm_dev_register(ddev, 0);
> if (ret) {
> XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
> goto failed_sysfs_fini;
> }
>
> + amdxdna_debugfs_init(xdna);
> return 0;
>
> failed_sysfs_fini:
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> index bdd0dc83f92e..b1548cf16f59 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> @@ -104,6 +104,8 @@ struct amdxdna_fw_ver {
> u32 build;
> };
>
> +struct amdxdna_carveout;
> +
> struct amdxdna_dev {
> struct drm_device ddev;
> struct amdxdna_dev_hdl *dev_handle;
> @@ -121,6 +123,8 @@ struct amdxdna_dev {
> struct iova_domain iovad;
> /* Accurate board name queried from firmware, or default_vbnv as fallback */
> const char *vbnv;
> +
> + struct amdxdna_carveout *carveout;
> };
>
> /*
> @@ -172,11 +176,11 @@ void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
>
> int amdxdna_iommu_init(struct amdxdna_dev *xdna);
> void amdxdna_iommu_fini(struct amdxdna_dev *xdna);
> -int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
> -void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
> void *amdxdna_iommu_alloc(struct amdxdna_dev *xdna, size_t size, dma_addr_t *dma_addr);
> void amdxdna_iommu_free(struct amdxdna_dev *xdna, size_t size,
> void *cpu_addr, dma_addr_t dma_addr);
> +int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
> +void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
>
> static inline bool amdxdna_iova_on(struct amdxdna_dev *xdna)
> {
prev parent reply other threads:[~2026-05-04 19:08 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-27 17:09 [PATCH V2] accel/amdxdna: Add carveout memory support for non-IOMMU systems Lizhi Hou
2026-05-04 19:08 ` Mario Limonciello [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=88835cfd-dfcb-48d7-9f85-d2956df1082e@kernel.org \
--to=superm1@kernel.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=linux-kernel@vger.kernel.org \
--cc=lizhi.hou@amd.com \
--cc=maciej.falkowski@linux.intel.com \
--cc=max.zhen@amd.com \
--cc=ogabbay@kernel.org \
--cc=quic_jhugo@quicinc.com \
--cc=sonal.santan@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox