Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures
@ 2018-06-27 11:58 Max Gurtovoy
  2018-06-27 12:53 ` Max Gurtovoy
  2018-06-28 14:38 ` Sagi Grimberg
  0 siblings, 2 replies; 5+ messages in thread
From: Max Gurtovoy @ 2018-06-27 11:58 UTC (permalink / raw)


Posting receive buffer operation can fail, thus we should
make sure there is no memory leakage in that flow. Also add
an error log in case of a failure.

Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---

Changes from v1:
 - centrelize the error log to nvmet_rdma_post_recv func
 - don't release response during post_recv failure in nvmet_rdma_queue_response.
   Instead, continue and try sending the response to the initiator.

---
 drivers/nvme/target/rdma.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 52e0c5d..88d9f82 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -383,14 +383,22 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
 		struct nvmet_rdma_cmd *cmd)
 {
 	struct ib_recv_wr *bad_wr;
+	int ret;
 
 	ib_dma_sync_single_for_device(ndev->device,
 		cmd->sge[0].addr, cmd->sge[0].length,
 		DMA_FROM_DEVICE);
 
 	if (ndev->srq)
-		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
-	return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
+		ret = ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
+	else
+		ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
+
+	if (unlikely(ret))
+		pr_err("post_recv cmd failed for %s 0x%p\n",
+		       ndev->srq ? "SRQ" : "QP",
+		       ndev->srq ? ndev->srq : cmd->queue->cm_id->qp);
+	return ret;
 }
 
 static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
@@ -765,11 +773,16 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
 	ndev->srq = srq;
 	ndev->srq_size = srq_size;
 
-	for (i = 0; i < srq_size; i++)
-		nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
+	for (i = 0; i < srq_size; i++) {
+		ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
+		if (ret)
+			goto out_free_cmds;
+	}
 
 	return 0;
 
+out_free_cmds:
+	nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
 out_destroy_srq:
 	ib_destroy_srq(srq);
 	return ret;
@@ -899,13 +912,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 	if (!ndev->srq) {
 		for (i = 0; i < queue->recv_queue_size; i++) {
 			queue->cmds[i].queue = queue;
-			nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
+			ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
+			if (ret)
+				goto err_destroy_qp;
 		}
 	}
 
 out:
 	return ret;
 
+err_destroy_qp:
+	rdma_destroy_qp(queue->cm_id);
 err_destroy_cq:
 	ib_free_cq(queue->cq);
 	goto out;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures
  2018-06-27 11:58 [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures Max Gurtovoy
@ 2018-06-27 12:53 ` Max Gurtovoy
  2018-06-28 14:38 ` Sagi Grimberg
  1 sibling, 0 replies; 5+ messages in thread
From: Max Gurtovoy @ 2018-06-27 12:53 UTC (permalink / raw)




On 6/27/2018 2:58 PM, Max Gurtovoy wrote:
> Posting receive buffer operation can fail, thus we should
> make sure there is no memory leakage in that flow. Also add
> an error log in case of a failure.
> 
> Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
> ---
> 
> Changes from v1:
>   - centrelize the error log to nvmet_rdma_post_recv func
>   - don't release response during post_recv failure in nvmet_rdma_queue_response.
>     Instead, continue and try sending the response to the initiator.
> 
> ---
>   drivers/nvme/target/rdma.c | 27 ++++++++++++++++++++++-----
>   1 file changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 52e0c5d..88d9f82 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -383,14 +383,22 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
>   		struct nvmet_rdma_cmd *cmd)
>   {
>   	struct ib_recv_wr *bad_wr;
> +	int ret;
>   
>   	ib_dma_sync_single_for_device(ndev->device,
>   		cmd->sge[0].addr, cmd->sge[0].length,
>   		DMA_FROM_DEVICE);
>   
>   	if (ndev->srq)
> -		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> -	return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
> +		ret = ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> +	else
> +		ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
> +
> +	if (unlikely(ret))
> +		pr_err("post_recv cmd failed for %s 0x%p\n",
> +		       ndev->srq ? "SRQ" : "QP",
> +		       ndev->srq ? ndev->srq : cmd->queue->cm_id->qp);

In second thought maybe I'll leave it:

pr_err("post_recv cmd failed\n");

agreed ?

> +	return ret;
>   }
>   
>   static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
> @@ -765,11 +773,16 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
>   	ndev->srq = srq;
>   	ndev->srq_size = srq_size;
>   
> -	for (i = 0; i < srq_size; i++)
> -		nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
> +	for (i = 0; i < srq_size; i++) {
> +		ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
> +		if (ret)
> +			goto out_free_cmds;
> +	}
>   
>   	return 0;
>   
> +out_free_cmds:
> +	nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
>   out_destroy_srq:
>   	ib_destroy_srq(srq);
>   	return ret;
> @@ -899,13 +912,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
>   	if (!ndev->srq) {
>   		for (i = 0; i < queue->recv_queue_size; i++) {
>   			queue->cmds[i].queue = queue;
> -			nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
> +			ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
> +			if (ret)
> +				goto err_destroy_qp;
>   		}
>   	}
>   
>   out:
>   	return ret;
>   
> +err_destroy_qp:
> +	rdma_destroy_qp(queue->cm_id);
>   err_destroy_cq:
>   	ib_free_cq(queue->cq);
>   	goto out;
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures
  2018-06-27 11:58 [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures Max Gurtovoy
  2018-06-27 12:53 ` Max Gurtovoy
@ 2018-06-28 14:38 ` Sagi Grimberg
  2018-06-28 15:02   ` Max Gurtovoy
  1 sibling, 1 reply; 5+ messages in thread
From: Sagi Grimberg @ 2018-06-28 14:38 UTC (permalink / raw)



> Posting receive buffer operation can fail, thus we should
> make sure there is no memory leakage in that flow. Also add
> an error log in case of a failure.

What memory leak? resulting from post_recv errors?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures
  2018-06-28 14:38 ` Sagi Grimberg
@ 2018-06-28 15:02   ` Max Gurtovoy
  2018-06-28 16:15     ` Sagi Grimberg
  0 siblings, 1 reply; 5+ messages in thread
From: Max Gurtovoy @ 2018-06-28 15:02 UTC (permalink / raw)




On 6/28/2018 5:38 PM, Sagi Grimberg wrote:
> 
>> Posting receive buffer operation can fail, thus we should
>> make sure there is no memory leakage in that flow. Also add
>> an error log in case of a failure.
> 
> What memory leak? resulting from post_recv errors?

We are missing error flows in nvmet_rdma_create_queue_ib and 
nvmet_rdma_init_srq. I can remove the leakage from change log as I see 
that we should be ok (unless of some unexpected bahaviour) in the 
current code when we ignore failure of post_recv.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures
  2018-06-28 15:02   ` Max Gurtovoy
@ 2018-06-28 16:15     ` Sagi Grimberg
  0 siblings, 0 replies; 5+ messages in thread
From: Sagi Grimberg @ 2018-06-28 16:15 UTC (permalink / raw)



>>> Posting receive buffer operation can fail, thus we should
>>> make sure there is no memory leakage in that flow. Also add
>>> an error log in case of a failure.
>>
>> What memory leak? resulting from post_recv errors?
> 
> We are missing error flows in nvmet_rdma_create_queue_ib and 
> nvmet_rdma_init_srq. I can remove the leakage from change log as I see 
> that we should be ok (unless of some unexpected bahaviour) in the 
> current code when we ignore failure of post_recv.

Lets remove the leakage from the change log if its not something that
was actually seen. It is confusing.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-06-28 16:15 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-06-27 11:58 [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures Max Gurtovoy
2018-06-27 12:53 ` Max Gurtovoy
2018-06-28 14:38 ` Sagi Grimberg
2018-06-28 15:02   ` Max Gurtovoy
2018-06-28 16:15     ` Sagi Grimberg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox