Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Keith Busch <kbusch@kernel.org>
To: Caleb Sander Mateos <csander@purestorage.com>
Cc: Jens Axboe <axboe@kernel.dk>, Christoph Hellwig <hch@lst.de>,
	Sagi Grimberg <sagi@grimberg.me>,
	Kanchan Joshi <joshi.k@samsung.com>,
	linux-nvme@lists.infradead.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v4 0/2] nvme/pci: PRP list DMA pool partitioning
Date: Tue, 22 Apr 2025 11:48:00 -0600	[thread overview]
Message-ID: <aAfWUGAMTpwsHf2b@kbusch-mbp.dhcp.thefacebook.com> (raw)
In-Reply-To: <20250422161959.1958205-1-csander@purestorage.com>

On Tue, Apr 22, 2025 at 10:19:57AM -0600, Caleb Sander Mateos wrote:
> This reduces the _raw_spin_lock_irqsave overhead by about half, to
> 1.2%.

Could you try this atop your series? I hope to see if we can squeeze a
little more out by keeping the spinlock and list links local to the node
using them.

---
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index cbd7734922f91..08a1488155084 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -412,9 +412,10 @@ nvme_setup_prp_pools(struct nvme_dev *dev, unsigned numa_node)
 	if (prp_pools->small)
 		return prp_pools; /* already initialized */
 
-	prp_pools->large = dma_pool_create("prp list page", dev->dev,
+	prp_pools->large = dma_pool_create_node("prp list page", dev->dev,
 						NVME_CTRL_PAGE_SIZE,
-						NVME_CTRL_PAGE_SIZE, 0);
+						NVME_CTRL_PAGE_SIZE, 0,
+						numa_node);
 	if (!prp_pools->large)
 		return ERR_PTR(-ENOMEM);
 
@@ -422,8 +423,9 @@ nvme_setup_prp_pools(struct nvme_dev *dev, unsigned numa_node)
 		small_align = 512;
 
 	/* Optimisation for I/Os between 4k and 128k */
-	prp_pools->small = dma_pool_create("prp list 256", dev->dev,
-						256, small_align, 0);
+	prp_pools->small = dma_pool_create_node("prp list 256", dev->dev,
+						256, small_align, 0,
+						numa_node);
 	if (!prp_pools->small) {
 		dma_pool_destroy(prp_pools->large);
 		prp_pools->large = NULL;
diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index f632ecfb42384..36cb5f66111c6 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -18,8 +18,16 @@ struct device;
 
 #ifdef CONFIG_HAS_DMA
 
-struct dma_pool *dma_pool_create(const char *name, struct device *dev, 
-			size_t size, size_t align, size_t allocation);
+struct dma_pool *dma_pool_create_node(const char *name, struct device *dev,
+			size_t size, size_t align, size_t allocation, int node);
+
+static inline struct dma_pool *dma_pool_create(const char *name,
+			struct device *dev, size_t size, size_t align,
+			size_t allocation)
+{
+	return dma_pool_create_node(name, dev, size, align, allocation,
+				    NUMA_NO_NODE);
+}
 
 void dma_pool_destroy(struct dma_pool *pool);
 
@@ -35,6 +43,10 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
 void dmam_pool_destroy(struct dma_pool *pool);
 
 #else /* !CONFIG_HAS_DMA */
+static inline struct dma_pool *dma_pool_create_node(const char *name,
+	struct device *dev, size_t size, size_t align, size_t allocation,
+	int node);
+{ return NULL; }
 static inline struct dma_pool *dma_pool_create(const char *name,
 	struct device *dev, size_t size, size_t align, size_t allocation)
 { return NULL; }
diff --git a/mm/dmapool.c b/mm/dmapool.c
index f0bfc6c490f4e..e07242b18c576 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -56,6 +56,7 @@ struct dma_pool {		/* the pool */
 	unsigned int size;
 	unsigned int allocation;
 	unsigned int boundary;
+	int node;
 	char name[32];
 	struct list_head pools;
 };
@@ -199,12 +200,13 @@ static void pool_block_push(struct dma_pool *pool, struct dma_block *block,
 
 
 /**
- * dma_pool_create - Creates a pool of consistent memory blocks, for dma.
+ * dma_pool_create_node - Creates a pool of consistent memory blocks, for dma.
  * @name: name of pool, for diagnostics
  * @dev: device that will be doing the DMA
  * @size: size of the blocks in this pool.
  * @align: alignment requirement for blocks; must be a power of two
  * @boundary: returned blocks won't cross this power of two boundary
+ * @node: NUMA node to use when allocating structs 'dma_pool' and 'dma_page'
  * Context: not in_interrupt()
  *
  * Given one of these pools, dma_pool_alloc()
@@ -221,8 +223,8 @@ static void pool_block_push(struct dma_pool *pool, struct dma_block *block,
  * Return: a dma allocation pool with the requested characteristics, or
  * %NULL if one can't be created.
  */
-struct dma_pool *dma_pool_create(const char *name, struct device *dev,
-				 size_t size, size_t align, size_t boundary)
+struct dma_pool *dma_pool_create_node(const char *name, struct device *dev,
+			size_t size, size_t align, size_t boundary, int node)
 {
 	struct dma_pool *retval;
 	size_t allocation;
@@ -251,13 +253,14 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
 
 	boundary = min(boundary, allocation);
 
-	retval = kzalloc(sizeof(*retval), GFP_KERNEL);
+	retval = kzalloc_node(sizeof(*retval), GFP_KERNEL, node);
 	if (!retval)
 		return retval;
 
 	strscpy(retval->name, name, sizeof(retval->name));
 
 	retval->dev = dev;
+	retval->node = node;
 
 	INIT_LIST_HEAD(&retval->page_list);
 	spin_lock_init(&retval->lock);
@@ -335,7 +338,7 @@ static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags)
 {
 	struct dma_page *page;
 
-	page = kmalloc(sizeof(*page), mem_flags);
+	page = kmalloc_node(sizeof(*page), mem_flags, pool->node);
 	if (!page)
 		return NULL;
 
--


  parent reply	other threads:[~2025-04-22 18:40 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-22 16:19 [PATCH v4 0/2] nvme/pci: PRP list DMA pool partitioning Caleb Sander Mateos
2025-04-22 16:19 ` [PATCH v4 1/2] nvme/pci: factor out nvme_init_hctx() helper Caleb Sander Mateos
2025-04-22 16:28   ` Keith Busch
2025-04-22 16:19 ` [PATCH v4 2/2] nvme/pci: make PRP list DMA pools per-NUMA-node Caleb Sander Mateos
2025-04-22 16:34   ` Keith Busch
2025-04-22 17:48 ` Keith Busch [this message]
2025-04-22 22:04   ` [PATCH v4 0/2] nvme/pci: PRP list DMA pool partitioning Caleb Sander Mateos
2025-04-22 22:46     ` Keith Busch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aAfWUGAMTpwsHf2b@kbusch-mbp.dhcp.thefacebook.com \
    --to=kbusch@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=csander@purestorage.com \
    --cc=hch@lst.de \
    --cc=joshi.k@samsung.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox