From: Leon Romanovsky <leon@kernel.org>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: "Leon Romanovsky" <leonro@nvidia.com>,
"Christoph Hellwig" <hch@lst.de>,
"Jason Gunthorpe" <jgg@nvidia.com>,
"Andrew Morton" <akpm@linux-foundation.org>,
"Bjorn Helgaas" <bhelgaas@google.com>,
"Christian König" <christian.koenig@amd.com>,
dri-devel@lists.freedesktop.org, iommu@lists.linux.dev,
"Jens Axboe" <axboe@kernel.dk>,
"Jérôme Glisse" <jglisse@redhat.com>,
"Joerg Roedel" <joro@8bytes.org>,
kvm@vger.kernel.org, linaro-mm-sig@lists.linaro.org,
linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-media@vger.kernel.org, linux-mm@kvack.org,
linux-pci@vger.kernel.org,
"Logan Gunthorpe" <logang@deltatee.com>,
"Marek Szyprowski" <m.szyprowski@samsung.com>,
"Robin Murphy" <robin.murphy@arm.com>,
"Sumit Semwal" <sumit.semwal@linaro.org>,
"Vivek Kasireddy" <vivek.kasireddy@intel.com>,
"Will Deacon" <will@kernel.org>
Subject: [PATCH 04/10] PCI/P2PDMA: Refactor to separate core P2P functionality from memory allocation
Date: Wed, 23 Jul 2025 16:00:05 +0300 [thread overview]
Message-ID: <fde513e6135516368fa873cdff9144c1b29a477f.1753274085.git.leonro@nvidia.com> (raw)
In-Reply-To: <cover.1753274085.git.leonro@nvidia.com>
From: Leon Romanovsky <leonro@nvidia.com>
Refactor the PCI P2PDMA subsystem to separate the core peer-to-peer DMA
functionality from the optional memory allocation layer. This creates a
two-tier architecture:
The core layer provides P2P mapping functionality for physical addresses
based on PCI device MMIO BARs and integrates with the DMA API for
mapping operations. This layer is required for all P2PDMA users.
The optional upper layer provides memory allocation capabilities
including gen_pool allocator, struct page support, and sysfs interface
for user space access.
This separation allows subsystems like VFIO to use only the core P2P
mapping functionality without the overhead of memory allocation features
they don't need. The core functionality is now available through the
new pci_p2pdma_enable() function that returns a p2pdma_provider
structure.
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/pci/p2pdma.c | 108 +++++++++++++++++++++++++------------
include/linux/pci-p2pdma.h | 5 ++
2 files changed, 80 insertions(+), 33 deletions(-)
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 5a310026bd24f..8e2525618d922 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -25,11 +25,12 @@ struct pci_p2pdma {
struct gen_pool *pool;
bool p2pmem_published;
struct xarray map_types;
+ struct p2pdma_provider mem;
};
struct pci_p2pdma_pagemap {
struct dev_pagemap pgmap;
- struct p2pdma_provider mem;
+ struct p2pdma_provider *mem;
};
static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap)
@@ -204,7 +205,7 @@ static void p2pdma_page_free(struct page *page)
struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_pgmap(page));
/* safe to dereference while a reference is held to the percpu ref */
struct pci_p2pdma *p2pdma = rcu_dereference_protected(
- to_pci_dev(pgmap->mem.owner)->p2pdma, 1);
+ to_pci_dev(pgmap->mem->owner)->p2pdma, 1);
struct percpu_ref *ref;
gen_pool_free_owner(p2pdma->pool, (uintptr_t)page_to_virt(page),
@@ -227,44 +228,77 @@ static void pci_p2pdma_release(void *data)
/* Flush and disable pci_alloc_p2p_mem() */
pdev->p2pdma = NULL;
- synchronize_rcu();
+ if (p2pdma->pool)
+ synchronize_rcu();
+ xa_destroy(&p2pdma->map_types);
+
+ if (!p2pdma->pool)
+ return;
gen_pool_destroy(p2pdma->pool);
sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
- xa_destroy(&p2pdma->map_types);
}
-static int pci_p2pdma_setup(struct pci_dev *pdev)
+/**
+ * pci_p2pdma_enable - Enable peer-to-peer DMA support for a PCI device
+ * @pdev: The PCI device to enable P2PDMA for
+ *
+ * This function initializes the peer-to-peer DMA infrastructure for a PCI
+ * device. It allocates and sets up the necessary data structures to support
+ * P2PDMA operations, including mapping type tracking.
+ */
+struct p2pdma_provider *pci_p2pdma_enable(struct pci_dev *pdev)
{
- int error = -ENOMEM;
struct pci_p2pdma *p2p;
+ int ret;
p2p = devm_kzalloc(&pdev->dev, sizeof(*p2p), GFP_KERNEL);
if (!p2p)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
xa_init(&p2p->map_types);
+ p2p->mem.owner = &pdev->dev;
+ /* On all p2p platforms bus_offset is the same for all BARs */
+ p2p->mem.bus_offset =
+ pci_bus_address(pdev, 0) - pci_resource_start(pdev, 0);
- p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
- if (!p2p->pool)
- goto out;
+ ret = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
+ if (ret)
+ goto out_p2p;
- error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
- if (error)
- goto out_pool_destroy;
+ rcu_assign_pointer(pdev->p2pdma, p2p);
+ return &p2p->mem;
- error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group);
- if (error)
+out_p2p:
+ devm_kfree(&pdev->dev, p2p);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_enable);
+
+static int pci_p2pdma_setup_pool(struct pci_dev *pdev)
+{
+ struct pci_p2pdma *p2pdma;
+ int ret;
+
+ p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
+ if (p2pdma->pool)
+ /* We already setup pools, do nothing, */
+ return 0;
+
+ p2pdma->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
+ if (!p2pdma->pool)
+ return -ENOMEM;
+
+ ret = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group);
+ if (ret)
goto out_pool_destroy;
- rcu_assign_pointer(pdev->p2pdma, p2p);
return 0;
out_pool_destroy:
- gen_pool_destroy(p2p->pool);
-out:
- devm_kfree(&pdev->dev, p2p);
- return error;
+ gen_pool_destroy(p2pdma->pool);
+ p2pdma->pool = NULL;
+ return ret;
}
static void pci_p2pdma_unmap_mappings(void *data)
@@ -276,7 +310,7 @@ static void pci_p2pdma_unmap_mappings(void *data)
* unmap_mapping_range() on the inode, teardown any existing userspace
* mappings and prevent new ones from being created.
*/
- sysfs_remove_file_from_group(&p2p_pgmap->mem.owner->kobj,
+ sysfs_remove_file_from_group(&p2p_pgmap->mem->owner->kobj,
&p2pmem_alloc_attr.attr,
p2pmem_group.name);
}
@@ -295,6 +329,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
u64 offset)
{
struct pci_p2pdma_pagemap *p2p_pgmap;
+ struct p2pdma_provider *mem;
struct dev_pagemap *pgmap;
struct pci_p2pdma *p2pdma;
void *addr;
@@ -312,15 +347,22 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
if (size + offset > pci_resource_len(pdev, bar))
return -EINVAL;
- if (!pdev->p2pdma) {
- error = pci_p2pdma_setup(pdev);
+ p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
+ if (!p2pdma) {
+ mem = pci_p2pdma_enable(pdev);
+ if (IS_ERR(mem))
+ return PTR_ERR(mem);
+
+ error = pci_p2pdma_setup_pool(pdev);
if (error)
return error;
}
p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
- if (!p2p_pgmap)
- return -ENOMEM;
+ if (!p2p_pgmap) {
+ error = -ENOMEM;
+ goto free_pool;
+ }
pgmap = &p2p_pgmap->pgmap;
pgmap->range.start = pci_resource_start(pdev, bar) + offset;
@@ -328,9 +370,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
pgmap->nr_range = 1;
pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
pgmap->ops = &p2pdma_pgmap_ops;
- p2p_pgmap->mem.owner = &pdev->dev;
- p2p_pgmap->mem.bus_offset =
- pci_bus_address(pdev, bar) - pci_resource_start(pdev, bar);
+ p2p_pgmap->mem = mem;
addr = devm_memremap_pages(&pdev->dev, pgmap);
if (IS_ERR(addr)) {
@@ -343,7 +383,6 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
if (error)
goto pages_free;
- p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr,
pci_bus_address(pdev, bar) + offset,
range_len(&pgmap->range), dev_to_node(&pdev->dev),
@@ -359,7 +398,10 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
pages_free:
devm_memunmap_pages(&pdev->dev, pgmap);
pgmap_free:
- devm_kfree(&pdev->dev, pgmap);
+ devm_kfree(&pdev->dev, p2p_pgmap);
+free_pool:
+ sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
+ gen_pool_destroy(p2pdma->pool);
return error;
}
EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
@@ -1008,11 +1050,11 @@ void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state,
{
struct pci_p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(page_pgmap(page));
- if (state->mem == &p2p_pgmap->mem)
+ if (state->mem == p2p_pgmap->mem)
return;
- state->mem = &p2p_pgmap->mem;
- state->map = pci_p2pdma_map_type(&p2p_pgmap->mem, dev);
+ state->mem = p2p_pgmap->mem;
+ state->map = pci_p2pdma_map_type(p2p_pgmap->mem, dev);
}
/**
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index eef96636c67e6..83f11dc8659a7 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -27,6 +27,7 @@ struct p2pdma_provider {
};
#ifdef CONFIG_PCI_P2PDMA
+struct p2pdma_provider *pci_p2pdma_enable(struct pci_dev *pdev);
int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
u64 offset);
int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
@@ -45,6 +46,10 @@ int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
bool use_p2pdma);
#else /* CONFIG_PCI_P2PDMA */
+static inline struct p2pdma_provider *pci_p2pdma_enable(struct pci_dev *pdev)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar,
size_t size, u64 offset)
{
--
2.50.1
next prev parent reply other threads:[~2025-07-23 13:02 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-23 13:00 [PATCH 00/10] vfio/pci: Allow MMIO regions to be exported through dma-buf Leon Romanovsky
2025-07-23 13:00 ` [PATCH 01/10] PCI/P2PDMA: Remove redundant bus_offset from map state Leon Romanovsky
2025-07-24 7:50 ` Christoph Hellwig
2025-07-23 13:00 ` [PATCH 02/10] PCI/P2PDMA: Introduce p2pdma_provider structure for cleaner abstraction Leon Romanovsky
2025-07-24 7:51 ` Christoph Hellwig
2025-07-24 7:55 ` Leon Romanovsky
2025-07-24 7:59 ` Christoph Hellwig
2025-07-24 8:07 ` Leon Romanovsky
2025-07-27 18:51 ` Jason Gunthorpe
2025-07-29 7:52 ` Christoph Hellwig
2025-07-29 8:53 ` Leon Romanovsky
2025-07-29 10:41 ` Christoph Hellwig
2025-07-29 11:39 ` Leon Romanovsky
2025-07-29 13:15 ` Jason Gunthorpe
2025-07-29 16:12 ` Jason Gunthorpe
2025-07-23 13:00 ` [PATCH 03/10] PCI/P2PDMA: Simplify bus address mapping API Leon Romanovsky
2025-07-24 7:52 ` Christoph Hellwig
2025-07-23 13:00 ` Leon Romanovsky [this message]
2025-07-23 13:00 ` [PATCH 05/10] PCI/P2PDMA: Export pci_p2pdma_map_type() function Leon Romanovsky
2025-07-24 8:03 ` Christoph Hellwig
2025-07-24 8:13 ` Leon Romanovsky
2025-07-25 16:30 ` Logan Gunthorpe
2025-07-25 18:54 ` Leon Romanovsky
2025-07-25 19:12 ` Logan Gunthorpe
2025-07-27 6:01 ` Leon Romanovsky
2025-07-27 19:05 ` Jason Gunthorpe
2025-07-28 16:12 ` Logan Gunthorpe
2025-07-28 16:41 ` Leon Romanovsky
2025-07-28 17:07 ` Logan Gunthorpe
2025-07-28 23:11 ` Jason Gunthorpe
2025-07-29 20:54 ` Logan Gunthorpe
2025-07-29 22:14 ` Jason Gunthorpe
2025-07-30 8:03 ` Leon Romanovsky
2025-07-29 7:52 ` Christoph Hellwig
2025-07-29 8:45 ` Leon Romanovsky
2025-07-27 19:02 ` Jason Gunthorpe
2025-07-23 13:00 ` [PATCH 06/10] types: move phys_vec definition to common header Leon Romanovsky
2025-07-23 13:00 ` [PATCH 07/10] vfio: Export vfio device get and put registration helpers Leon Romanovsky
2025-07-23 13:00 ` [PATCH 08/10] vfio/pci: Enable peer-to-peer DMA transactions by default Leon Romanovsky
2025-07-23 13:00 ` [PATCH 09/10] vfio/pci: Share the core device pointer while invoking feature functions Leon Romanovsky
2025-07-28 20:55 ` Alex Williamson
2025-07-29 8:39 ` Leon Romanovsky
2025-07-23 13:00 ` [PATCH 10/10] vfio/pci: Add dma-buf export support for MMIO regions Leon Romanovsky
2025-07-24 5:13 ` Kasireddy, Vivek
2025-07-24 5:44 ` Leon Romanovsky
2025-07-25 5:34 ` Kasireddy, Vivek
2025-07-27 6:16 ` Leon Romanovsky
2025-07-29 19:44 ` Robin Murphy
2025-07-29 20:13 ` Jason Gunthorpe
2025-07-30 9:32 ` Leon Romanovsky
2025-07-30 14:49 ` Robin Murphy
2025-07-30 16:01 ` Jason Gunthorpe
2025-07-30 19:58 ` [PATCH 00/10] vfio/pci: Allow MMIO regions to be exported through dma-buf Alex Williamson
2025-07-31 0:21 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=fde513e6135516368fa873cdff9144c1b29a477f.1753274085.git.leonro@nvidia.com \
--to=leon@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=alex.williamson@redhat.com \
--cc=axboe@kernel.dk \
--cc=bhelgaas@google.com \
--cc=christian.koenig@amd.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=hch@lst.de \
--cc=iommu@lists.linux.dev \
--cc=jgg@nvidia.com \
--cc=jglisse@redhat.com \
--cc=joro@8bytes.org \
--cc=kvm@vger.kernel.org \
--cc=leonro@nvidia.com \
--cc=linaro-mm-sig@lists.linaro.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-media@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-pci@vger.kernel.org \
--cc=logang@deltatee.com \
--cc=m.szyprowski@samsung.com \
--cc=robin.murphy@arm.com \
--cc=sumit.semwal@linaro.org \
--cc=vivek.kasireddy@intel.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).