* [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures
@ 2018-06-27 11:58 Max Gurtovoy
2018-06-27 12:53 ` Max Gurtovoy
2018-06-28 14:38 ` Sagi Grimberg
0 siblings, 2 replies; 5+ messages in thread
From: Max Gurtovoy @ 2018-06-27 11:58 UTC (permalink / raw)
Posting receive buffer operation can fail, thus we should
make sure there is no memory leakage in that flow. Also add
an error log in case of a failure.
Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---
Changes from v1:
- centrelize the error log to nvmet_rdma_post_recv func
- don't release response during post_recv failure in nvmet_rdma_queue_response.
Instead, continue and try sending the response to the initiator.
---
drivers/nvme/target/rdma.c | 27 ++++++++++++++++++++++-----
1 file changed, 22 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 52e0c5d..88d9f82 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -383,14 +383,22 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_cmd *cmd)
{
struct ib_recv_wr *bad_wr;
+ int ret;
ib_dma_sync_single_for_device(ndev->device,
cmd->sge[0].addr, cmd->sge[0].length,
DMA_FROM_DEVICE);
if (ndev->srq)
- return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
- return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
+ ret = ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
+ else
+ ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
+
+ if (unlikely(ret))
+ pr_err("post_recv cmd failed for %s 0x%p\n",
+ ndev->srq ? "SRQ" : "QP",
+ ndev->srq ? ndev->srq : cmd->queue->cm_id->qp);
+ return ret;
}
static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
@@ -765,11 +773,16 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
ndev->srq = srq;
ndev->srq_size = srq_size;
- for (i = 0; i < srq_size; i++)
- nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
+ for (i = 0; i < srq_size; i++) {
+ ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
+ if (ret)
+ goto out_free_cmds;
+ }
return 0;
+out_free_cmds:
+ nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
out_destroy_srq:
ib_destroy_srq(srq);
return ret;
@@ -899,13 +912,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
if (!ndev->srq) {
for (i = 0; i < queue->recv_queue_size; i++) {
queue->cmds[i].queue = queue;
- nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
+ ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
+ if (ret)
+ goto err_destroy_qp;
}
}
out:
return ret;
+err_destroy_qp:
+ rdma_destroy_qp(queue->cm_id);
err_destroy_cq:
ib_free_cq(queue->cq);
goto out;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures
2018-06-27 11:58 [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures Max Gurtovoy
@ 2018-06-27 12:53 ` Max Gurtovoy
2018-06-28 14:38 ` Sagi Grimberg
1 sibling, 0 replies; 5+ messages in thread
From: Max Gurtovoy @ 2018-06-27 12:53 UTC (permalink / raw)
On 6/27/2018 2:58 PM, Max Gurtovoy wrote:
> Posting receive buffer operation can fail, thus we should
> make sure there is no memory leakage in that flow. Also add
> an error log in case of a failure.
>
> Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
> ---
>
> Changes from v1:
> - centrelize the error log to nvmet_rdma_post_recv func
> - don't release response during post_recv failure in nvmet_rdma_queue_response.
> Instead, continue and try sending the response to the initiator.
>
> ---
> drivers/nvme/target/rdma.c | 27 ++++++++++++++++++++++-----
> 1 file changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 52e0c5d..88d9f82 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -383,14 +383,22 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
> struct nvmet_rdma_cmd *cmd)
> {
> struct ib_recv_wr *bad_wr;
> + int ret;
>
> ib_dma_sync_single_for_device(ndev->device,
> cmd->sge[0].addr, cmd->sge[0].length,
> DMA_FROM_DEVICE);
>
> if (ndev->srq)
> - return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> - return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
> + ret = ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> + else
> + ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
> +
> + if (unlikely(ret))
> + pr_err("post_recv cmd failed for %s 0x%p\n",
> + ndev->srq ? "SRQ" : "QP",
> + ndev->srq ? ndev->srq : cmd->queue->cm_id->qp);
In second thought maybe I'll leave it:
pr_err("post_recv cmd failed\n");
agreed ?
> + return ret;
> }
>
> static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
> @@ -765,11 +773,16 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
> ndev->srq = srq;
> ndev->srq_size = srq_size;
>
> - for (i = 0; i < srq_size; i++)
> - nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
> + for (i = 0; i < srq_size; i++) {
> + ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
> + if (ret)
> + goto out_free_cmds;
> + }
>
> return 0;
>
> +out_free_cmds:
> + nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
> out_destroy_srq:
> ib_destroy_srq(srq);
> return ret;
> @@ -899,13 +912,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
> if (!ndev->srq) {
> for (i = 0; i < queue->recv_queue_size; i++) {
> queue->cmds[i].queue = queue;
> - nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
> + ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
> + if (ret)
> + goto err_destroy_qp;
> }
> }
>
> out:
> return ret;
>
> +err_destroy_qp:
> + rdma_destroy_qp(queue->cm_id);
> err_destroy_cq:
> ib_free_cq(queue->cq);
> goto out;
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures
2018-06-27 11:58 [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures Max Gurtovoy
2018-06-27 12:53 ` Max Gurtovoy
@ 2018-06-28 14:38 ` Sagi Grimberg
2018-06-28 15:02 ` Max Gurtovoy
1 sibling, 1 reply; 5+ messages in thread
From: Sagi Grimberg @ 2018-06-28 14:38 UTC (permalink / raw)
> Posting receive buffer operation can fail, thus we should
> make sure there is no memory leakage in that flow. Also add
> an error log in case of a failure.
What memory leak? resulting from post_recv errors?
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures
2018-06-28 14:38 ` Sagi Grimberg
@ 2018-06-28 15:02 ` Max Gurtovoy
2018-06-28 16:15 ` Sagi Grimberg
0 siblings, 1 reply; 5+ messages in thread
From: Max Gurtovoy @ 2018-06-28 15:02 UTC (permalink / raw)
On 6/28/2018 5:38 PM, Sagi Grimberg wrote:
>
>> Posting receive buffer operation can fail, thus we should
>> make sure there is no memory leakage in that flow. Also add
>> an error log in case of a failure.
>
> What memory leak? resulting from post_recv errors?
We are missing error flows in nvmet_rdma_create_queue_ib and
nvmet_rdma_init_srq. I can remove the leakage from change log as I see
that we should be ok (unless of some unexpected bahaviour) in the
current code when we ignore failure of post_recv.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures
2018-06-28 15:02 ` Max Gurtovoy
@ 2018-06-28 16:15 ` Sagi Grimberg
0 siblings, 0 replies; 5+ messages in thread
From: Sagi Grimberg @ 2018-06-28 16:15 UTC (permalink / raw)
>>> Posting receive buffer operation can fail, thus we should
>>> make sure there is no memory leakage in that flow. Also add
>>> an error log in case of a failure.
>>
>> What memory leak? resulting from post_recv errors?
>
> We are missing error flows in nvmet_rdma_create_queue_ib and
> nvmet_rdma_init_srq. I can remove the leakage from change log as I see
> that we should be ok (unless of some unexpected bahaviour) in the
> current code when we ignore failure of post_recv.
Lets remove the leakage from the change log if its not something that
was actually seen. It is confusing.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2018-06-28 16:15 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-06-27 11:58 [PATCH v2 1/1] nvmet-rdma: Add error flow for post_recv failures Max Gurtovoy
2018-06-27 12:53 ` Max Gurtovoy
2018-06-28 14:38 ` Sagi Grimberg
2018-06-28 15:02 ` Max Gurtovoy
2018-06-28 16:15 ` Sagi Grimberg
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox