* [PATCH] nvmet-rdma: use sbitmap to replace rsp free list
@ 2024-09-25 10:51 Guixin Liu
2024-10-05 14:06 ` Guixin Liu
2024-10-07 7:05 ` Christoph Hellwig
0 siblings, 2 replies; 4+ messages in thread
From: Guixin Liu @ 2024-09-25 10:51 UTC (permalink / raw)
To: hch, sagi, kch; +Cc: linux-nvme
We can use sbitmap to manage all the nvmet_rdma_rsp instead of using
free lists and spinlock, and we can use an additional tag to
determine whether the nvmet_rdma_rsp is extra allocated.
Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
---
drivers/nvme/target/rdma.c | 53 +++++++++++++++++---------------------
1 file changed, 24 insertions(+), 29 deletions(-)
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 689bb5d3cfdc..4fa1aa0f46a7 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -75,7 +75,7 @@ struct nvmet_rdma_rsp {
u32 invalidate_rkey;
struct list_head wait_list;
- struct list_head free_list;
+ int tag;
};
enum nvmet_rdma_queue_state {
@@ -98,8 +98,7 @@ struct nvmet_rdma_queue {
struct nvmet_sq nvme_sq;
struct nvmet_rdma_rsp *rsps;
- struct list_head free_rsps;
- spinlock_t rsps_lock;
+ struct sbitmap rsp_tags;
struct nvmet_rdma_cmd *cmds;
struct work_struct release_work;
@@ -172,7 +171,8 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_rsp *r);
static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
- struct nvmet_rdma_rsp *r);
+ struct nvmet_rdma_rsp *r,
+ int tag);
static const struct nvmet_fabrics_ops nvmet_rdma_ops;
@@ -210,15 +210,12 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
static inline struct nvmet_rdma_rsp *
nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
{
- struct nvmet_rdma_rsp *rsp;
- unsigned long flags;
+ struct nvmet_rdma_rsp *rsp = NULL;
+ int tag;
- spin_lock_irqsave(&queue->rsps_lock, flags);
- rsp = list_first_entry_or_null(&queue->free_rsps,
- struct nvmet_rdma_rsp, free_list);
- if (likely(rsp))
- list_del(&rsp->free_list);
- spin_unlock_irqrestore(&queue->rsps_lock, flags);
+ tag = sbitmap_get(&queue->rsp_tags);
+ if (tag >= 0)
+ rsp = &queue->rsps[tag];
if (unlikely(!rsp)) {
int ret;
@@ -226,13 +223,11 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
if (unlikely(!rsp))
return NULL;
- ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
+ ret = nvmet_rdma_alloc_rsp(queue->dev, rsp, -1);
if (unlikely(ret)) {
kfree(rsp);
return NULL;
}
-
- rsp->allocated = true;
}
return rsp;
@@ -241,17 +236,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
static inline void
nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
{
- unsigned long flags;
-
- if (unlikely(rsp->allocated)) {
+ if (unlikely(rsp->tag < 0)) {
nvmet_rdma_free_rsp(rsp->queue->dev, rsp);
kfree(rsp);
return;
}
- spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
- list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
- spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
+ sbitmap_clear_bit(&rsp->queue->rsp_tags, rsp->tag);
}
static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev,
@@ -404,7 +395,7 @@ static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev,
}
static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
- struct nvmet_rdma_rsp *r)
+ struct nvmet_rdma_rsp *r, int tag)
{
/* NVMe CQE / RDMA SEND */
r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL);
@@ -432,6 +423,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
r->read_cqe.done = nvmet_rdma_read_data_done;
/* Data Out / RDMA WRITE */
r->write_cqe.done = nvmet_rdma_write_data_done;
+ r->tag = tag;
return 0;
@@ -454,21 +446,23 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
{
struct nvmet_rdma_device *ndev = queue->dev;
int nr_rsps = queue->recv_queue_size * 2;
- int ret = -EINVAL, i;
+ int ret = -ENOMEM, i;
+
+ if (sbitmap_init_node(&queue->rsp_tags, nr_rsps, -1, GFP_KERNEL,
+ NUMA_NO_NODE, false, true))
+ goto out;
queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp),
GFP_KERNEL);
if (!queue->rsps)
- goto out;
+ goto out_free_sbitmap;
for (i = 0; i < nr_rsps; i++) {
struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
- ret = nvmet_rdma_alloc_rsp(ndev, rsp);
+ ret = nvmet_rdma_alloc_rsp(ndev, rsp, i);
if (ret)
goto out_free;
-
- list_add_tail(&rsp->free_list, &queue->free_rsps);
}
return 0;
@@ -477,6 +471,8 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
while (--i >= 0)
nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
kfree(queue->rsps);
+out_free_sbitmap:
+ sbitmap_free(&queue->rsp_tags);
out:
return ret;
}
@@ -489,6 +485,7 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
for (i = 0; i < nr_rsps; i++)
nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
kfree(queue->rsps);
+ sbitmap_free(&queue->rsp_tags);
}
static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
@@ -1447,8 +1444,6 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
INIT_LIST_HEAD(&queue->rsp_wait_list);
INIT_LIST_HEAD(&queue->rsp_wr_wait_list);
spin_lock_init(&queue->rsp_wr_wait_lock);
- INIT_LIST_HEAD(&queue->free_rsps);
- spin_lock_init(&queue->rsps_lock);
INIT_LIST_HEAD(&queue->queue_list);
queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL);
--
2.43.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH] nvmet-rdma: use sbitmap to replace rsp free list
2024-09-25 10:51 [PATCH] nvmet-rdma: use sbitmap to replace rsp free list Guixin Liu
@ 2024-10-05 14:06 ` Guixin Liu
2024-10-07 7:05 ` Christoph Hellwig
1 sibling, 0 replies; 4+ messages in thread
From: Guixin Liu @ 2024-10-05 14:06 UTC (permalink / raw)
To: hch, sagi, kch; +Cc: linux-nvme
gentle ping...
在 2024/9/25 18:51, Guixin Liu 写道:
> We can use sbitmap to manage all the nvmet_rdma_rsp instead of using
> free lists and spinlock, and we can use an additional tag to
> determine whether the nvmet_rdma_rsp is extra allocated.
>
> Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
> ---
> drivers/nvme/target/rdma.c | 53 +++++++++++++++++---------------------
> 1 file changed, 24 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 689bb5d3cfdc..4fa1aa0f46a7 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -75,7 +75,7 @@ struct nvmet_rdma_rsp {
> u32 invalidate_rkey;
>
> struct list_head wait_list;
> - struct list_head free_list;
> + int tag;
> };
>
> enum nvmet_rdma_queue_state {
> @@ -98,8 +98,7 @@ struct nvmet_rdma_queue {
> struct nvmet_sq nvme_sq;
>
> struct nvmet_rdma_rsp *rsps;
> - struct list_head free_rsps;
> - spinlock_t rsps_lock;
> + struct sbitmap rsp_tags;
> struct nvmet_rdma_cmd *cmds;
>
> struct work_struct release_work;
> @@ -172,7 +171,8 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
> static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
> struct nvmet_rdma_rsp *r);
> static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
> - struct nvmet_rdma_rsp *r);
> + struct nvmet_rdma_rsp *r,
> + int tag);
>
> static const struct nvmet_fabrics_ops nvmet_rdma_ops;
>
> @@ -210,15 +210,12 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
> static inline struct nvmet_rdma_rsp *
> nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
> {
> - struct nvmet_rdma_rsp *rsp;
> - unsigned long flags;
> + struct nvmet_rdma_rsp *rsp = NULL;
> + int tag;
>
> - spin_lock_irqsave(&queue->rsps_lock, flags);
> - rsp = list_first_entry_or_null(&queue->free_rsps,
> - struct nvmet_rdma_rsp, free_list);
> - if (likely(rsp))
> - list_del(&rsp->free_list);
> - spin_unlock_irqrestore(&queue->rsps_lock, flags);
> + tag = sbitmap_get(&queue->rsp_tags);
> + if (tag >= 0)
> + rsp = &queue->rsps[tag];
>
> if (unlikely(!rsp)) {
> int ret;
> @@ -226,13 +223,11 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
> rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
> if (unlikely(!rsp))
> return NULL;
> - ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
> + ret = nvmet_rdma_alloc_rsp(queue->dev, rsp, -1);
> if (unlikely(ret)) {
> kfree(rsp);
> return NULL;
> }
> -
> - rsp->allocated = true;
> }
>
> return rsp;
> @@ -241,17 +236,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
> static inline void
> nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
> {
> - unsigned long flags;
> -
> - if (unlikely(rsp->allocated)) {
> + if (unlikely(rsp->tag < 0)) {
> nvmet_rdma_free_rsp(rsp->queue->dev, rsp);
> kfree(rsp);
> return;
> }
>
> - spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
> - list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
> - spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
> + sbitmap_clear_bit(&rsp->queue->rsp_tags, rsp->tag);
> }
>
> static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev,
> @@ -404,7 +395,7 @@ static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev,
> }
>
> static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
> - struct nvmet_rdma_rsp *r)
> + struct nvmet_rdma_rsp *r, int tag)
> {
> /* NVMe CQE / RDMA SEND */
> r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL);
> @@ -432,6 +423,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
> r->read_cqe.done = nvmet_rdma_read_data_done;
> /* Data Out / RDMA WRITE */
> r->write_cqe.done = nvmet_rdma_write_data_done;
> + r->tag = tag;
>
> return 0;
>
> @@ -454,21 +446,23 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
> {
> struct nvmet_rdma_device *ndev = queue->dev;
> int nr_rsps = queue->recv_queue_size * 2;
> - int ret = -EINVAL, i;
> + int ret = -ENOMEM, i;
> +
> + if (sbitmap_init_node(&queue->rsp_tags, nr_rsps, -1, GFP_KERNEL,
> + NUMA_NO_NODE, false, true))
> + goto out;
>
> queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp),
> GFP_KERNEL);
> if (!queue->rsps)
> - goto out;
> + goto out_free_sbitmap;
>
> for (i = 0; i < nr_rsps; i++) {
> struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
>
> - ret = nvmet_rdma_alloc_rsp(ndev, rsp);
> + ret = nvmet_rdma_alloc_rsp(ndev, rsp, i);
> if (ret)
> goto out_free;
> -
> - list_add_tail(&rsp->free_list, &queue->free_rsps);
> }
>
> return 0;
> @@ -477,6 +471,8 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
> while (--i >= 0)
> nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
> kfree(queue->rsps);
> +out_free_sbitmap:
> + sbitmap_free(&queue->rsp_tags);
> out:
> return ret;
> }
> @@ -489,6 +485,7 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
> for (i = 0; i < nr_rsps; i++)
> nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
> kfree(queue->rsps);
> + sbitmap_free(&queue->rsp_tags);
> }
>
> static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
> @@ -1447,8 +1444,6 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
> INIT_LIST_HEAD(&queue->rsp_wait_list);
> INIT_LIST_HEAD(&queue->rsp_wr_wait_list);
> spin_lock_init(&queue->rsp_wr_wait_lock);
> - INIT_LIST_HEAD(&queue->free_rsps);
> - spin_lock_init(&queue->rsps_lock);
> INIT_LIST_HEAD(&queue->queue_list);
>
> queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL);
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH] nvmet-rdma: use sbitmap to replace rsp free list
2024-09-25 10:51 [PATCH] nvmet-rdma: use sbitmap to replace rsp free list Guixin Liu
2024-10-05 14:06 ` Guixin Liu
@ 2024-10-07 7:05 ` Christoph Hellwig
2024-10-08 9:06 ` Guixin Liu
1 sibling, 1 reply; 4+ messages in thread
From: Christoph Hellwig @ 2024-10-07 7:05 UTC (permalink / raw)
To: Guixin Liu; +Cc: hch, sagi, kch, linux-nvme
On Wed, Sep 25, 2024 at 06:51:00PM +0800, Guixin Liu wrote:
> We can use sbitmap to manage all the nvmet_rdma_rsp instead of using
> free lists and spinlock, and we can use an additional tag to
> determine whether the nvmet_rdma_rsp is extra allocated.
Can you explain why? I guess it's better performance for some definition
of "performance", but it would be great to state that here, preferably
including numbers.
> static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
> - struct nvmet_rdma_rsp *r);
> + struct nvmet_rdma_rsp *r,
> + int tag);
Nit: take can go on the line above.
> - ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
> + ret = nvmet_rdma_alloc_rsp(queue->dev, rsp, -1);
Please add a symbolic name (NVME_RDMA_RSP_DYNAMIC?) for the magic -1
tag.
> @@ -241,17 +236,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
> static inline void
> nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
> {
> - unsigned long flags;
> -
> - if (unlikely(rsp->allocated)) {
> + if (unlikely(rsp->tag < 0)) {
.. and use it here.
Otherwise this looks reasonable to me.
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH] nvmet-rdma: use sbitmap to replace rsp free list
2024-10-07 7:05 ` Christoph Hellwig
@ 2024-10-08 9:06 ` Guixin Liu
0 siblings, 0 replies; 4+ messages in thread
From: Guixin Liu @ 2024-10-08 9:06 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: sagi, kch, linux-nvme
在 2024/10/7 15:05, Christoph Hellwig 写道:
> On Wed, Sep 25, 2024 at 06:51:00PM +0800, Guixin Liu wrote:
>> We can use sbitmap to manage all the nvmet_rdma_rsp instead of using
>> free lists and spinlock, and we can use an additional tag to
>> determine whether the nvmet_rdma_rsp is extra allocated.
> Can you explain why? I guess it's better performance for some definition
> of "performance", but it would be great to state that here, preferably
> including numbers.
Sure, my test result:
Use local rxe rdma device and mem-based back device.
Fio command, test the average 5 times:
fio -filename=/dev/nvme0n1 --ioengine=libaio -direct=1 -size=1G -name=1
-thread -runtime=60 -time_based -rw=read -numjobs=16 -iodepth=128 -bs=4k
-group_reporting
Before: 241k IOPS
After: 256k IOPS
I will add the result into v2.
>> static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
>> - struct nvmet_rdma_rsp *r);
>> + struct nvmet_rdma_rsp *r,
>> + int tag);
> Nit: take can go on the line above.
OK
>> - ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
>> + ret = nvmet_rdma_alloc_rsp(queue->dev, rsp, -1);
> Please add a symbolic name (NVME_RDMA_RSP_DYNAMIC?) for the magic -1
> tag.
OK
>> @@ -241,17 +236,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
>> static inline void
>> nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
>> {
>> - unsigned long flags;
>> -
>> - if (unlikely(rsp->allocated)) {
>> + if (unlikely(rsp->tag < 0)) {
> .. and use it here.
>
> Otherwise this looks reasonable to me.
Thanks, Best Regards,
Guixin Liu
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-10-08 9:22 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-25 10:51 [PATCH] nvmet-rdma: use sbitmap to replace rsp free list Guixin Liu
2024-10-05 14:06 ` Guixin Liu
2024-10-07 7:05 ` Christoph Hellwig
2024-10-08 9:06 ` Guixin Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox