Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] nvmet-rdma: use sbitmap to replace rsp free list
@ 2024-09-25 10:51 Guixin Liu
  2024-10-05 14:06 ` Guixin Liu
  2024-10-07  7:05 ` Christoph Hellwig
  0 siblings, 2 replies; 4+ messages in thread
From: Guixin Liu @ 2024-09-25 10:51 UTC (permalink / raw)
  To: hch, sagi, kch; +Cc: linux-nvme

We can use sbitmap to manage all the nvmet_rdma_rsp instead of using
free lists and spinlock, and we can use an additional tag to
determine whether the nvmet_rdma_rsp is extra allocated.

Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
---
 drivers/nvme/target/rdma.c | 53 +++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 29 deletions(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 689bb5d3cfdc..4fa1aa0f46a7 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -75,7 +75,7 @@ struct nvmet_rdma_rsp {
 	u32			invalidate_rkey;
 
 	struct list_head	wait_list;
-	struct list_head	free_list;
+	int			tag;
 };
 
 enum nvmet_rdma_queue_state {
@@ -98,8 +98,7 @@ struct nvmet_rdma_queue {
 	struct nvmet_sq		nvme_sq;
 
 	struct nvmet_rdma_rsp	*rsps;
-	struct list_head	free_rsps;
-	spinlock_t		rsps_lock;
+	struct sbitmap		rsp_tags;
 	struct nvmet_rdma_cmd	*cmds;
 
 	struct work_struct	release_work;
@@ -172,7 +171,8 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
 static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
 				struct nvmet_rdma_rsp *r);
 static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
-				struct nvmet_rdma_rsp *r);
+				struct nvmet_rdma_rsp *r,
+				int tag);
 
 static const struct nvmet_fabrics_ops nvmet_rdma_ops;
 
@@ -210,15 +210,12 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
 static inline struct nvmet_rdma_rsp *
 nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
 {
-	struct nvmet_rdma_rsp *rsp;
-	unsigned long flags;
+	struct nvmet_rdma_rsp *rsp = NULL;
+	int tag;
 
-	spin_lock_irqsave(&queue->rsps_lock, flags);
-	rsp = list_first_entry_or_null(&queue->free_rsps,
-				struct nvmet_rdma_rsp, free_list);
-	if (likely(rsp))
-		list_del(&rsp->free_list);
-	spin_unlock_irqrestore(&queue->rsps_lock, flags);
+	tag = sbitmap_get(&queue->rsp_tags);
+	if (tag >= 0)
+		rsp = &queue->rsps[tag];
 
 	if (unlikely(!rsp)) {
 		int ret;
@@ -226,13 +223,11 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
 		rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
 		if (unlikely(!rsp))
 			return NULL;
-		ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
+		ret = nvmet_rdma_alloc_rsp(queue->dev, rsp, -1);
 		if (unlikely(ret)) {
 			kfree(rsp);
 			return NULL;
 		}
-
-		rsp->allocated = true;
 	}
 
 	return rsp;
@@ -241,17 +236,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
 static inline void
 nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
 {
-	unsigned long flags;
-
-	if (unlikely(rsp->allocated)) {
+	if (unlikely(rsp->tag < 0)) {
 		nvmet_rdma_free_rsp(rsp->queue->dev, rsp);
 		kfree(rsp);
 		return;
 	}
 
-	spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
-	list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
-	spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
+	sbitmap_clear_bit(&rsp->queue->rsp_tags, rsp->tag);
 }
 
 static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev,
@@ -404,7 +395,7 @@ static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev,
 }
 
 static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
-		struct nvmet_rdma_rsp *r)
+		struct nvmet_rdma_rsp *r, int tag)
 {
 	/* NVMe CQE / RDMA SEND */
 	r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL);
@@ -432,6 +423,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
 	r->read_cqe.done = nvmet_rdma_read_data_done;
 	/* Data Out / RDMA WRITE */
 	r->write_cqe.done = nvmet_rdma_write_data_done;
+	r->tag = tag;
 
 	return 0;
 
@@ -454,21 +446,23 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
 {
 	struct nvmet_rdma_device *ndev = queue->dev;
 	int nr_rsps = queue->recv_queue_size * 2;
-	int ret = -EINVAL, i;
+	int ret = -ENOMEM, i;
+
+	if (sbitmap_init_node(&queue->rsp_tags, nr_rsps, -1, GFP_KERNEL,
+			NUMA_NO_NODE, false, true))
+		goto out;
 
 	queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp),
 			GFP_KERNEL);
 	if (!queue->rsps)
-		goto out;
+		goto out_free_sbitmap;
 
 	for (i = 0; i < nr_rsps; i++) {
 		struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
 
-		ret = nvmet_rdma_alloc_rsp(ndev, rsp);
+		ret = nvmet_rdma_alloc_rsp(ndev, rsp, i);
 		if (ret)
 			goto out_free;
-
-		list_add_tail(&rsp->free_list, &queue->free_rsps);
 	}
 
 	return 0;
@@ -477,6 +471,8 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
 	while (--i >= 0)
 		nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
 	kfree(queue->rsps);
+out_free_sbitmap:
+	sbitmap_free(&queue->rsp_tags);
 out:
 	return ret;
 }
@@ -489,6 +485,7 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
 	for (i = 0; i < nr_rsps; i++)
 		nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
 	kfree(queue->rsps);
+	sbitmap_free(&queue->rsp_tags);
 }
 
 static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
@@ -1447,8 +1444,6 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
 	INIT_LIST_HEAD(&queue->rsp_wait_list);
 	INIT_LIST_HEAD(&queue->rsp_wr_wait_list);
 	spin_lock_init(&queue->rsp_wr_wait_lock);
-	INIT_LIST_HEAD(&queue->free_rsps);
-	spin_lock_init(&queue->rsps_lock);
 	INIT_LIST_HEAD(&queue->queue_list);
 
 	queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL);
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] nvmet-rdma: use sbitmap to replace rsp free list
  2024-09-25 10:51 [PATCH] nvmet-rdma: use sbitmap to replace rsp free list Guixin Liu
@ 2024-10-05 14:06 ` Guixin Liu
  2024-10-07  7:05 ` Christoph Hellwig
  1 sibling, 0 replies; 4+ messages in thread
From: Guixin Liu @ 2024-10-05 14:06 UTC (permalink / raw)
  To: hch, sagi, kch; +Cc: linux-nvme

gentle ping...

在 2024/9/25 18:51, Guixin Liu 写道:
> We can use sbitmap to manage all the nvmet_rdma_rsp instead of using
> free lists and spinlock, and we can use an additional tag to
> determine whether the nvmet_rdma_rsp is extra allocated.
>
> Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
> ---
>   drivers/nvme/target/rdma.c | 53 +++++++++++++++++---------------------
>   1 file changed, 24 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 689bb5d3cfdc..4fa1aa0f46a7 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -75,7 +75,7 @@ struct nvmet_rdma_rsp {
>   	u32			invalidate_rkey;
>   
>   	struct list_head	wait_list;
> -	struct list_head	free_list;
> +	int			tag;
>   };
>   
>   enum nvmet_rdma_queue_state {
> @@ -98,8 +98,7 @@ struct nvmet_rdma_queue {
>   	struct nvmet_sq		nvme_sq;
>   
>   	struct nvmet_rdma_rsp	*rsps;
> -	struct list_head	free_rsps;
> -	spinlock_t		rsps_lock;
> +	struct sbitmap		rsp_tags;
>   	struct nvmet_rdma_cmd	*cmds;
>   
>   	struct work_struct	release_work;
> @@ -172,7 +171,8 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
>   static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
>   				struct nvmet_rdma_rsp *r);
>   static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
> -				struct nvmet_rdma_rsp *r);
> +				struct nvmet_rdma_rsp *r,
> +				int tag);
>   
>   static const struct nvmet_fabrics_ops nvmet_rdma_ops;
>   
> @@ -210,15 +210,12 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
>   static inline struct nvmet_rdma_rsp *
>   nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
>   {
> -	struct nvmet_rdma_rsp *rsp;
> -	unsigned long flags;
> +	struct nvmet_rdma_rsp *rsp = NULL;
> +	int tag;
>   
> -	spin_lock_irqsave(&queue->rsps_lock, flags);
> -	rsp = list_first_entry_or_null(&queue->free_rsps,
> -				struct nvmet_rdma_rsp, free_list);
> -	if (likely(rsp))
> -		list_del(&rsp->free_list);
> -	spin_unlock_irqrestore(&queue->rsps_lock, flags);
> +	tag = sbitmap_get(&queue->rsp_tags);
> +	if (tag >= 0)
> +		rsp = &queue->rsps[tag];
>   
>   	if (unlikely(!rsp)) {
>   		int ret;
> @@ -226,13 +223,11 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
>   		rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
>   		if (unlikely(!rsp))
>   			return NULL;
> -		ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
> +		ret = nvmet_rdma_alloc_rsp(queue->dev, rsp, -1);
>   		if (unlikely(ret)) {
>   			kfree(rsp);
>   			return NULL;
>   		}
> -
> -		rsp->allocated = true;
>   	}
>   
>   	return rsp;
> @@ -241,17 +236,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
>   static inline void
>   nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
>   {
> -	unsigned long flags;
> -
> -	if (unlikely(rsp->allocated)) {
> +	if (unlikely(rsp->tag < 0)) {
>   		nvmet_rdma_free_rsp(rsp->queue->dev, rsp);
>   		kfree(rsp);
>   		return;
>   	}
>   
> -	spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
> -	list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
> -	spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
> +	sbitmap_clear_bit(&rsp->queue->rsp_tags, rsp->tag);
>   }
>   
>   static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev,
> @@ -404,7 +395,7 @@ static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev,
>   }
>   
>   static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
> -		struct nvmet_rdma_rsp *r)
> +		struct nvmet_rdma_rsp *r, int tag)
>   {
>   	/* NVMe CQE / RDMA SEND */
>   	r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL);
> @@ -432,6 +423,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
>   	r->read_cqe.done = nvmet_rdma_read_data_done;
>   	/* Data Out / RDMA WRITE */
>   	r->write_cqe.done = nvmet_rdma_write_data_done;
> +	r->tag = tag;
>   
>   	return 0;
>   
> @@ -454,21 +446,23 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
>   {
>   	struct nvmet_rdma_device *ndev = queue->dev;
>   	int nr_rsps = queue->recv_queue_size * 2;
> -	int ret = -EINVAL, i;
> +	int ret = -ENOMEM, i;
> +
> +	if (sbitmap_init_node(&queue->rsp_tags, nr_rsps, -1, GFP_KERNEL,
> +			NUMA_NO_NODE, false, true))
> +		goto out;
>   
>   	queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp),
>   			GFP_KERNEL);
>   	if (!queue->rsps)
> -		goto out;
> +		goto out_free_sbitmap;
>   
>   	for (i = 0; i < nr_rsps; i++) {
>   		struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
>   
> -		ret = nvmet_rdma_alloc_rsp(ndev, rsp);
> +		ret = nvmet_rdma_alloc_rsp(ndev, rsp, i);
>   		if (ret)
>   			goto out_free;
> -
> -		list_add_tail(&rsp->free_list, &queue->free_rsps);
>   	}
>   
>   	return 0;
> @@ -477,6 +471,8 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
>   	while (--i >= 0)
>   		nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
>   	kfree(queue->rsps);
> +out_free_sbitmap:
> +	sbitmap_free(&queue->rsp_tags);
>   out:
>   	return ret;
>   }
> @@ -489,6 +485,7 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
>   	for (i = 0; i < nr_rsps; i++)
>   		nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
>   	kfree(queue->rsps);
> +	sbitmap_free(&queue->rsp_tags);
>   }
>   
>   static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
> @@ -1447,8 +1444,6 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
>   	INIT_LIST_HEAD(&queue->rsp_wait_list);
>   	INIT_LIST_HEAD(&queue->rsp_wr_wait_list);
>   	spin_lock_init(&queue->rsp_wr_wait_lock);
> -	INIT_LIST_HEAD(&queue->free_rsps);
> -	spin_lock_init(&queue->rsps_lock);
>   	INIT_LIST_HEAD(&queue->queue_list);
>   
>   	queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL);


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] nvmet-rdma: use sbitmap to replace rsp free list
  2024-09-25 10:51 [PATCH] nvmet-rdma: use sbitmap to replace rsp free list Guixin Liu
  2024-10-05 14:06 ` Guixin Liu
@ 2024-10-07  7:05 ` Christoph Hellwig
  2024-10-08  9:06   ` Guixin Liu
  1 sibling, 1 reply; 4+ messages in thread
From: Christoph Hellwig @ 2024-10-07  7:05 UTC (permalink / raw)
  To: Guixin Liu; +Cc: hch, sagi, kch, linux-nvme

On Wed, Sep 25, 2024 at 06:51:00PM +0800, Guixin Liu wrote:
> We can use sbitmap to manage all the nvmet_rdma_rsp instead of using
> free lists and spinlock, and we can use an additional tag to
> determine whether the nvmet_rdma_rsp is extra allocated.

Can you explain why?  I guess it's better performance for some definition
of "performance", but it would be great to state that here, preferably
including numbers.

>  static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
> -				struct nvmet_rdma_rsp *r);
> +				struct nvmet_rdma_rsp *r,
> +				int tag);

Nit: take can go on the line above.

> -		ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
> +		ret = nvmet_rdma_alloc_rsp(queue->dev, rsp, -1);

Please add a symbolic name (NVME_RDMA_RSP_DYNAMIC?) for the magic -1
tag.

> @@ -241,17 +236,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
>  static inline void
>  nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
>  {
> -	unsigned long flags;
> -
> -	if (unlikely(rsp->allocated)) {
> +	if (unlikely(rsp->tag < 0)) {

.. and use it here.

Otherwise this looks reasonable to me.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] nvmet-rdma: use sbitmap to replace rsp free list
  2024-10-07  7:05 ` Christoph Hellwig
@ 2024-10-08  9:06   ` Guixin Liu
  0 siblings, 0 replies; 4+ messages in thread
From: Guixin Liu @ 2024-10-08  9:06 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: sagi, kch, linux-nvme


在 2024/10/7 15:05, Christoph Hellwig 写道:
> On Wed, Sep 25, 2024 at 06:51:00PM +0800, Guixin Liu wrote:
>> We can use sbitmap to manage all the nvmet_rdma_rsp instead of using
>> free lists and spinlock, and we can use an additional tag to
>> determine whether the nvmet_rdma_rsp is extra allocated.
> Can you explain why?  I guess it's better performance for some definition
> of "performance", but it would be great to state that here, preferably
> including numbers.

Sure, my test result:

Use local rxe rdma device and mem-based back device.

Fio command, test the average 5 times:

fio -filename=/dev/nvme0n1 --ioengine=libaio -direct=1 -size=1G -name=1 
-thread -runtime=60 -time_based -rw=read -numjobs=16 -iodepth=128 -bs=4k 
-group_reporting

Before: 241k IOPS

After:  256k IOPS

I will add the result into v2.

>>   static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
>> -				struct nvmet_rdma_rsp *r);
>> +				struct nvmet_rdma_rsp *r,
>> +				int tag);
> Nit: take can go on the line above.
OK
>> -		ret = nvmet_rdma_alloc_rsp(queue->dev, rsp);
>> +		ret = nvmet_rdma_alloc_rsp(queue->dev, rsp, -1);
> Please add a symbolic name (NVME_RDMA_RSP_DYNAMIC?) for the magic -1
> tag.
OK
>> @@ -241,17 +236,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
>>   static inline void
>>   nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
>>   {
>> -	unsigned long flags;
>> -
>> -	if (unlikely(rsp->allocated)) {
>> +	if (unlikely(rsp->tag < 0)) {
> .. and use it here.
>
> Otherwise this looks reasonable to me.

Thanks, Best Regards,

Guixin Liu



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-10-08  9:22 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-25 10:51 [PATCH] nvmet-rdma: use sbitmap to replace rsp free list Guixin Liu
2024-10-05 14:06 ` Guixin Liu
2024-10-07  7:05 ` Christoph Hellwig
2024-10-08  9:06   ` Guixin Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox