* [PATCH v2] nvmet-rdma: use sbitmap to replace rsp free list
@ 2024-10-08 9:37 Guixin Liu
2024-10-08 13:31 ` Jens Axboe
2024-10-08 21:17 ` Keith Busch
0 siblings, 2 replies; 3+ messages in thread
From: Guixin Liu @ 2024-10-08 9:37 UTC (permalink / raw)
To: hch, sagi, kch; +Cc: linux-nvme
We can use sbitmap to manage all the nvmet_rdma_rsp instead of using
free lists and spinlock, and we can use an additional tag to
determine whether the nvmet_rdma_rsp is extra allocated.
In addition, performance has improved:
1. testing environment is local rxe rdma devie and mem-based
backstore device.
2. fio command, test the average 5 times:
fio -filename=/dev/nvme0n1 --ioengine=libaio -direct=1
-size=1G -name=1 -thread -runtime=60 -time_based -rw=read -numjobs=16
-iodepth=128 -bs=4k -group_reporting
3. Before: 241k IOPS, After: 256k IOPS, an increase of about 5%.
Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
---
Changes from v1 to v2:
- Add performance improvement expain.
- Add NVMET_RDMA_DISCRETE_RSP_TAG macro for -1 tag.
drivers/nvme/target/rdma.c | 56 ++++++++++++++++++--------------------
1 file changed, 27 insertions(+), 29 deletions(-)
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 689bb5d3cfdc..34132520ebce 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -39,6 +39,8 @@
#define NVMET_RDMA_BACKLOG 128
+#define NVMET_RDMA_DISCRETE_RSP_TAG -1
+
struct nvmet_rdma_srq;
struct nvmet_rdma_cmd {
@@ -75,7 +77,7 @@ struct nvmet_rdma_rsp {
u32 invalidate_rkey;
struct list_head wait_list;
- struct list_head free_list;
+ int tag;
};
enum nvmet_rdma_queue_state {
@@ -98,8 +100,7 @@ struct nvmet_rdma_queue {
struct nvmet_sq nvme_sq;
struct nvmet_rdma_rsp *rsps;
- struct list_head free_rsps;
- spinlock_t rsps_lock;
+ struct sbitmap rsp_tags;
struct nvmet_rdma_cmd *cmds;
struct work_struct release_work;
@@ -172,7 +173,8 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_rsp *r);
static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
- struct nvmet_rdma_rsp *r);
+ struct nvmet_rdma_rsp *r,
+ int tag);
static const struct nvmet_fabrics_ops nvmet_rdma_ops;
@@ -210,15 +212,12 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
static inline struct nvmet_rdma_rsp *
nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
{
- struct nvmet_rdma_rsp *rsp;
- unsigned long flags;
+ struct nvmet_rdma_rsp *rsp = NULL;
+ int tag;
- spin_lock_irqsave(&queue->rsps_lock, flags);
- rsp = list_first_entry_or_null(&queue->free_rsps,
- struct nvmet_rdma_rsp, free_list);
- if (likely(rsp))
- list_del(&rsp->free_list);
- spin_unlock_irqrestore(&queue->rsps_lock, flags);
+ tag = sbitmap_get(&queue->rsp_tags);
+ if (tag >= 0)
+ rsp = &queue->rsps[tag];
if (unlikely(!rsp)) {
int ret;
@@ -226,13 +225,12 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
if (unlikely(!rsp))
return NULL;
- ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
+ ret = nvmet_rdma_alloc_rsp(queue->dev, rsp,
+ NVMET_RDMA_DISCRETE_RSP_TAG);
if (unlikely(ret)) {
kfree(rsp);
return NULL;
}
-
- rsp->allocated = true;
}
return rsp;
@@ -241,17 +239,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
static inline void
nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
{
- unsigned long flags;
-
- if (unlikely(rsp->allocated)) {
+ if (unlikely(rsp->tag == NVMET_RDMA_DISCRETE_RSP_TAG)) {
nvmet_rdma_free_rsp(rsp->queue->dev, rsp);
kfree(rsp);
return;
}
- spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
- list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
- spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
+ sbitmap_clear_bit(&rsp->queue->rsp_tags, rsp->tag);
}
static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev,
@@ -404,7 +398,7 @@ static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev,
}
static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
- struct nvmet_rdma_rsp *r)
+ struct nvmet_rdma_rsp *r, int tag)
{
/* NVMe CQE / RDMA SEND */
r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL);
@@ -432,6 +426,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
r->read_cqe.done = nvmet_rdma_read_data_done;
/* Data Out / RDMA WRITE */
r->write_cqe.done = nvmet_rdma_write_data_done;
+ r->tag = tag;
return 0;
@@ -454,21 +449,23 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
{
struct nvmet_rdma_device *ndev = queue->dev;
int nr_rsps = queue->recv_queue_size * 2;
- int ret = -EINVAL, i;
+ int ret = -ENOMEM, i;
+
+ if (sbitmap_init_node(&queue->rsp_tags, nr_rsps, -1, GFP_KERNEL,
+ NUMA_NO_NODE, false, true))
+ goto out;
queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp),
GFP_KERNEL);
if (!queue->rsps)
- goto out;
+ goto out_free_sbitmap;
for (i = 0; i < nr_rsps; i++) {
struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
- ret = nvmet_rdma_alloc_rsp(ndev, rsp);
+ ret = nvmet_rdma_alloc_rsp(ndev, rsp, i);
if (ret)
goto out_free;
-
- list_add_tail(&rsp->free_list, &queue->free_rsps);
}
return 0;
@@ -477,6 +474,8 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
while (--i >= 0)
nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
kfree(queue->rsps);
+out_free_sbitmap:
+ sbitmap_free(&queue->rsp_tags);
out:
return ret;
}
@@ -489,6 +488,7 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
for (i = 0; i < nr_rsps; i++)
nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
kfree(queue->rsps);
+ sbitmap_free(&queue->rsp_tags);
}
static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
@@ -1447,8 +1447,6 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
INIT_LIST_HEAD(&queue->rsp_wait_list);
INIT_LIST_HEAD(&queue->rsp_wr_wait_list);
spin_lock_init(&queue->rsp_wr_wait_lock);
- INIT_LIST_HEAD(&queue->free_rsps);
- spin_lock_init(&queue->rsps_lock);
INIT_LIST_HEAD(&queue->queue_list);
queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL);
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v2] nvmet-rdma: use sbitmap to replace rsp free list
2024-10-08 9:37 [PATCH v2] nvmet-rdma: use sbitmap to replace rsp free list Guixin Liu
@ 2024-10-08 13:31 ` Jens Axboe
2024-10-08 21:17 ` Keith Busch
1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2024-10-08 13:31 UTC (permalink / raw)
To: Guixin Liu, hch, sagi, kch; +Cc: linux-nvme
On 10/8/24 3:37 AM, Guixin Liu wrote:
> We can use sbitmap to manage all the nvmet_rdma_rsp instead of using
> free lists and spinlock, and we can use an additional tag to
> determine whether the nvmet_rdma_rsp is extra allocated.
>
> In addition, performance has improved:
> 1. testing environment is local rxe rdma devie and mem-based
> backstore device.
> 2. fio command, test the average 5 times:
> fio -filename=/dev/nvme0n1 --ioengine=libaio -direct=1
> -size=1G -name=1 -thread -runtime=60 -time_based -rw=read -numjobs=16
> -iodepth=128 -bs=4k -group_reporting
> 3. Before: 241k IOPS, After: 256k IOPS, an increase of about 5%.
Reviewed-by: Jens Axboe <axboe@kernel.dk>
--
Jens Axboe
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v2] nvmet-rdma: use sbitmap to replace rsp free list
2024-10-08 9:37 [PATCH v2] nvmet-rdma: use sbitmap to replace rsp free list Guixin Liu
2024-10-08 13:31 ` Jens Axboe
@ 2024-10-08 21:17 ` Keith Busch
1 sibling, 0 replies; 3+ messages in thread
From: Keith Busch @ 2024-10-08 21:17 UTC (permalink / raw)
To: Guixin Liu; +Cc: hch, sagi, kch, linux-nvme
On Tue, Oct 08, 2024 at 05:37:08PM +0800, Guixin Liu wrote:
> We can use sbitmap to manage all the nvmet_rdma_rsp instead of using
> free lists and spinlock, and we can use an additional tag to
> determine whether the nvmet_rdma_rsp is extra allocated.
>
> In addition, performance has improved:
> 1. testing environment is local rxe rdma devie and mem-based
> backstore device.
> 2. fio command, test the average 5 times:
> fio -filename=/dev/nvme0n1 --ioengine=libaio -direct=1
> -size=1G -name=1 -thread -runtime=60 -time_based -rw=read -numjobs=16
> -iodepth=128 -bs=4k -group_reporting
> 3. Before: 241k IOPS, After: 256k IOPS, an increase of about 5%.
Applied, thanks.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-10-08 21:19 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-08 9:37 [PATCH v2] nvmet-rdma: use sbitmap to replace rsp free list Guixin Liu
2024-10-08 13:31 ` Jens Axboe
2024-10-08 21:17 ` Keith Busch
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox