* [PATCH] nvme-pci: place chain addresses in iod
@ 2022-12-20 18:21 Keith Busch
2022-12-21 8:16 ` Christoph Hellwig
0 siblings, 1 reply; 3+ messages in thread
From: Keith Busch @ 2022-12-20 18:21 UTC (permalink / raw)
To: linux-nvme, hch; +Cc: sagi, Keith Busch
From: Keith Busch <kbusch@kernel.org>
The iod space is appended at the end of the preallocate 'struct
request', and padded to the cache line size. This leaves some free
memory (in most kernel configs) up for grabs.
Instead of appending the prp list chaining addresses after the
scatterlist, inline these in the struct nvme_iod. This leaves room for
one more scatterlist element in the mempool for a nice even number: 128.
And without increasing the size of the preallocated requests, we can
hold up to 5 chaining elements, allowing the driver to increase its max
transfer size to 8MB.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
drivers/nvme/host/pci.c | 44 ++++++++++++-----------------------------
1 file changed, 13 insertions(+), 31 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 578f384025440..01ec07e04e2c0 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -42,8 +42,9 @@
* These can be higher, but we need to ensure that any command doesn't
* require an sg allocation that needs more than a page of data.
*/
-#define NVME_MAX_KB_SZ 4096
-#define NVME_MAX_SEGS 127
+#define NVME_MAX_KB_SZ 8192
+#define NVME_MAX_SEGS 128
+#define NVME_MAX_CHAINS 5
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0444);
@@ -232,6 +233,7 @@ struct nvme_iod {
dma_addr_t first_dma;
dma_addr_t meta_dma;
struct sg_table sgt;
+ void *list[NVME_MAX_CHAINS];
};
static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev)
@@ -385,16 +387,6 @@ static int nvme_pci_npages_prp(void)
return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8);
}
-/*
- * Calculates the number of pages needed for the SGL segments. For example a 4k
- * page can accommodate 256 SGL descriptors.
- */
-static int nvme_pci_npages_sgl(void)
-{
- return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc),
- NVME_CTRL_PAGE_SIZE);
-}
-
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
@@ -508,12 +500,6 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
spin_unlock(&nvmeq->sq_lock);
}
-static void **nvme_pci_iod_list(struct request *req)
-{
- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- return (void **)(iod->sgt.sgl + blk_rq_nr_phys_segments(req));
-}
-
static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req,
int nseg)
{
@@ -539,7 +525,7 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req)
int i;
for (i = 0; i < iod->nr_allocations; i++) {
- __le64 *prp_list = nvme_pci_iod_list(req)[i];
+ __le64 *prp_list = iod->list[i];
dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);
dma_pool_free(dev->prp_page_pool, prp_list, dma_addr);
@@ -562,10 +548,10 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0);
if (iod->nr_allocations == 0)
- dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
+ dma_pool_free(dev->prp_small_pool, iod->list[0],
iod->first_dma);
else if (iod->use_sgl)
- dma_pool_free(dev->prp_page_pool, nvme_pci_iod_list(req)[0],
+ dma_pool_free(dev->prp_page_pool, iod->list[0],
iod->first_dma);
else
nvme_free_prps(dev, req);
@@ -597,7 +583,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
u64 dma_addr = sg_dma_address(sg);
int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1);
__le64 *prp_list;
- void **list = nvme_pci_iod_list(req);
dma_addr_t prp_dma;
int nprps, i;
@@ -635,7 +620,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
iod->nr_allocations = -1;
return BLK_STS_RESOURCE;
}
- list[0] = prp_list;
+ iod->list[0] = prp_list;
iod->first_dma = prp_dma;
i = 0;
for (;;) {
@@ -644,7 +629,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
if (!prp_list)
goto free_prps;
- list[iod->nr_allocations++] = prp_list;
+ iod->list[iod->nr_allocations++] = prp_list;
prp_list[0] = old_prp_list[i - 1];
old_prp_list[i - 1] = cpu_to_le64(prp_dma);
i = 1;
@@ -726,7 +711,7 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
return BLK_STS_RESOURCE;
}
- nvme_pci_iod_list(req)[0] = sg_list;
+ iod->list[0] = sg_list;
iod->first_dma = sgl_dma;
nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries);
@@ -2659,11 +2644,8 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
{
- size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl());
- size_t alloc_size = sizeof(__le64 *) * npages +
- sizeof(struct scatterlist) * NVME_MAX_SEGS;
+ size_t alloc_size = sizeof(struct scatterlist) * NVME_MAX_SEGS;
- WARN_ON_ONCE(alloc_size > PAGE_SIZE);
dev->iod_mempool = mempool_create_node(1,
mempool_kmalloc, mempool_kfree,
(void *)alloc_size, GFP_KERNEL,
@@ -3483,9 +3465,9 @@ static int __init nvme_init(void)
BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
- BUILD_BUG_ON(DIV_ROUND_UP(nvme_pci_npages_prp(), NVME_CTRL_PAGE_SIZE) >
- S8_MAX);
BUILD_BUG_ON(NVME_MAX_SEGS > SGES_PER_PAGE);
+ BUILD_BUG_ON(sizeof(struct scatterlist) * NVME_MAX_SEGS > PAGE_SIZE);
+ BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_CHAINS);
return pci_register_driver(&nvme_driver);
}
--
2.30.2
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] nvme-pci: place chain addresses in iod
2022-12-20 18:21 [PATCH] nvme-pci: place chain addresses in iod Keith Busch
@ 2022-12-21 8:16 ` Christoph Hellwig
2022-12-25 9:53 ` Sagi Grimberg
0 siblings, 1 reply; 3+ messages in thread
From: Christoph Hellwig @ 2022-12-21 8:16 UTC (permalink / raw)
To: Keith Busch; +Cc: linux-nvme, hch, sagi, Keith Busch
On Tue, Dec 20, 2022 at 10:21:31AM -0800, Keith Busch wrote:
> -#define NVME_MAX_KB_SZ 4096
> -#define NVME_MAX_SEGS 127
> +#define NVME_MAX_KB_SZ 8192
> +#define NVME_MAX_SEGS 128
> +#define NVME_MAX_CHAINS 5
NVME_MAX_CHAINS sounds a bit odd. Maybe MAX_ALLOCATIONS to match
the nr_allocations field?
>
> static int use_threaded_interrupts;
> module_param(use_threaded_interrupts, int, 0444);
> @@ -232,6 +233,7 @@ struct nvme_iod {
> dma_addr_t first_dma;
> dma_addr_t meta_dma;
> struct sg_table sgt;
> + void *list[NVME_MAX_CHAINS];
I think this should a union nvme_data_ptr * so that we have some typing
information.
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] nvme-pci: place chain addresses in iod
2022-12-21 8:16 ` Christoph Hellwig
@ 2022-12-25 9:53 ` Sagi Grimberg
0 siblings, 0 replies; 3+ messages in thread
From: Sagi Grimberg @ 2022-12-25 9:53 UTC (permalink / raw)
To: Christoph Hellwig, Keith Busch; +Cc: linux-nvme, Keith Busch
> On Tue, Dec 20, 2022 at 10:21:31AM -0800, Keith Busch wrote:
>> -#define NVME_MAX_KB_SZ 4096
>> -#define NVME_MAX_SEGS 127
>> +#define NVME_MAX_KB_SZ 8192
>> +#define NVME_MAX_SEGS 128
>> +#define NVME_MAX_CHAINS 5
>
> NVME_MAX_CHAINS sounds a bit odd. Maybe MAX_ALLOCATIONS to match
> the nr_allocations field?
Makes sense.
>
>>
>> static int use_threaded_interrupts;
>> module_param(use_threaded_interrupts, int, 0444);
>> @@ -232,6 +233,7 @@ struct nvme_iod {
>> dma_addr_t first_dma;
>> dma_addr_t meta_dma;
>> struct sg_table sgt;
>> + void *list[NVME_MAX_CHAINS];
>
> I think this should a union nvme_data_ptr * so that we have some typing
> information.
nvme_data_ptr is a wire struct, Maybe it would be better if nvme-pci had
its own union for this.
Also, "list" is not a great name, primarily because it is not a list...
maybe data_ptrs ?
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2022-12-25 10:46 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-12-20 18:21 [PATCH] nvme-pci: place chain addresses in iod Keith Busch
2022-12-21 8:16 ` Christoph Hellwig
2022-12-25 9:53 ` Sagi Grimberg
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox