From: Keith Busch <kbusch@kernel.org>
To: Caleb Sander Mateos <csander@purestorage.com>
Cc: Jens Axboe <axboe@kernel.dk>, Christoph Hellwig <hch@lst.de>,
Sagi Grimberg <sagi@grimberg.me>,
Kanchan Joshi <joshi.k@samsung.com>,
linux-nvme@lists.infradead.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v4 0/2] nvme/pci: PRP list DMA pool partitioning
Date: Tue, 22 Apr 2025 11:48:00 -0600 [thread overview]
Message-ID: <aAfWUGAMTpwsHf2b@kbusch-mbp.dhcp.thefacebook.com> (raw)
In-Reply-To: <20250422161959.1958205-1-csander@purestorage.com>
On Tue, Apr 22, 2025 at 10:19:57AM -0600, Caleb Sander Mateos wrote:
> This reduces the _raw_spin_lock_irqsave overhead by about half, to
> 1.2%.
Could you try this atop your series? I hope to see if we can squeeze a
little more out by keeping the spinlock and list links local to the node
using them.
---
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index cbd7734922f91..08a1488155084 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -412,9 +412,10 @@ nvme_setup_prp_pools(struct nvme_dev *dev, unsigned numa_node)
if (prp_pools->small)
return prp_pools; /* already initialized */
- prp_pools->large = dma_pool_create("prp list page", dev->dev,
+ prp_pools->large = dma_pool_create_node("prp list page", dev->dev,
NVME_CTRL_PAGE_SIZE,
- NVME_CTRL_PAGE_SIZE, 0);
+ NVME_CTRL_PAGE_SIZE, 0,
+ numa_node);
if (!prp_pools->large)
return ERR_PTR(-ENOMEM);
@@ -422,8 +423,9 @@ nvme_setup_prp_pools(struct nvme_dev *dev, unsigned numa_node)
small_align = 512;
/* Optimisation for I/Os between 4k and 128k */
- prp_pools->small = dma_pool_create("prp list 256", dev->dev,
- 256, small_align, 0);
+ prp_pools->small = dma_pool_create_node("prp list 256", dev->dev,
+ 256, small_align, 0,
+ numa_node);
if (!prp_pools->small) {
dma_pool_destroy(prp_pools->large);
prp_pools->large = NULL;
diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index f632ecfb42384..36cb5f66111c6 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -18,8 +18,16 @@ struct device;
#ifdef CONFIG_HAS_DMA
-struct dma_pool *dma_pool_create(const char *name, struct device *dev,
- size_t size, size_t align, size_t allocation);
+struct dma_pool *dma_pool_create_node(const char *name, struct device *dev,
+ size_t size, size_t align, size_t allocation, int node);
+
+static inline struct dma_pool *dma_pool_create(const char *name,
+ struct device *dev, size_t size, size_t align,
+ size_t allocation)
+{
+ return dma_pool_create_node(name, dev, size, align, allocation,
+ NUMA_NO_NODE);
+}
void dma_pool_destroy(struct dma_pool *pool);
@@ -35,6 +43,10 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
void dmam_pool_destroy(struct dma_pool *pool);
#else /* !CONFIG_HAS_DMA */
+static inline struct dma_pool *dma_pool_create_node(const char *name,
+ struct device *dev, size_t size, size_t align, size_t allocation,
+ int node);
+{ return NULL; }
static inline struct dma_pool *dma_pool_create(const char *name,
struct device *dev, size_t size, size_t align, size_t allocation)
{ return NULL; }
diff --git a/mm/dmapool.c b/mm/dmapool.c
index f0bfc6c490f4e..e07242b18c576 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -56,6 +56,7 @@ struct dma_pool { /* the pool */
unsigned int size;
unsigned int allocation;
unsigned int boundary;
+ int node;
char name[32];
struct list_head pools;
};
@@ -199,12 +200,13 @@ static void pool_block_push(struct dma_pool *pool, struct dma_block *block,
/**
- * dma_pool_create - Creates a pool of consistent memory blocks, for dma.
+ * dma_pool_create_node - Creates a pool of consistent memory blocks, for dma.
* @name: name of pool, for diagnostics
* @dev: device that will be doing the DMA
* @size: size of the blocks in this pool.
* @align: alignment requirement for blocks; must be a power of two
* @boundary: returned blocks won't cross this power of two boundary
+ * @node: NUMA node to use when allocating structs 'dma_pool' and 'dma_page'
* Context: not in_interrupt()
*
* Given one of these pools, dma_pool_alloc()
@@ -221,8 +223,8 @@ static void pool_block_push(struct dma_pool *pool, struct dma_block *block,
* Return: a dma allocation pool with the requested characteristics, or
* %NULL if one can't be created.
*/
-struct dma_pool *dma_pool_create(const char *name, struct device *dev,
- size_t size, size_t align, size_t boundary)
+struct dma_pool *dma_pool_create_node(const char *name, struct device *dev,
+ size_t size, size_t align, size_t boundary, int node)
{
struct dma_pool *retval;
size_t allocation;
@@ -251,13 +253,14 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
boundary = min(boundary, allocation);
- retval = kzalloc(sizeof(*retval), GFP_KERNEL);
+ retval = kzalloc_node(sizeof(*retval), GFP_KERNEL, node);
if (!retval)
return retval;
strscpy(retval->name, name, sizeof(retval->name));
retval->dev = dev;
+ retval->node = node;
INIT_LIST_HEAD(&retval->page_list);
spin_lock_init(&retval->lock);
@@ -335,7 +338,7 @@ static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags)
{
struct dma_page *page;
- page = kmalloc(sizeof(*page), mem_flags);
+ page = kmalloc_node(sizeof(*page), mem_flags, pool->node);
if (!page)
return NULL;
--
next prev parent reply other threads:[~2025-04-22 18:40 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-22 16:19 [PATCH v4 0/2] nvme/pci: PRP list DMA pool partitioning Caleb Sander Mateos
2025-04-22 16:19 ` [PATCH v4 1/2] nvme/pci: factor out nvme_init_hctx() helper Caleb Sander Mateos
2025-04-22 16:28 ` Keith Busch
2025-04-22 16:19 ` [PATCH v4 2/2] nvme/pci: make PRP list DMA pools per-NUMA-node Caleb Sander Mateos
2025-04-22 16:34 ` Keith Busch
2025-04-22 17:48 ` Keith Busch [this message]
2025-04-22 22:04 ` [PATCH v4 0/2] nvme/pci: PRP list DMA pool partitioning Caleb Sander Mateos
2025-04-22 22:46 ` Keith Busch
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aAfWUGAMTpwsHf2b@kbusch-mbp.dhcp.thefacebook.com \
--to=kbusch@kernel.org \
--cc=axboe@kernel.dk \
--cc=csander@purestorage.com \
--cc=hch@lst.de \
--cc=joshi.k@samsung.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox