* [PATCH 0/2] nvme: short-circuit connection retries
@ 2022-07-14 12:41 Hannes Reinecke
2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
0 siblings, 2 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke
Hi all,
here are two patches updating nvme-tcp and nvme-rdma transport
to modifications we've already have in nvme-fc with commit
f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries").
Gist is to short-circuit reconnection retries if we get a
status back with the DNR bit set, in which case we shouldn't
retry the connection attempt.
As usual, comments and reviews are welcome.
Hannes Reinecke (2):
nvme-tcp: short-circuit connect retries
nvme-rdma: short-circuit connect retries
drivers/nvme/host/rdma.c | 25 ++++++++++++++++++-------
drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
2 files changed, 36 insertions(+), 14 deletions(-)
--
2.29.2
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 1/2] nvme-tcp: short-circuit connect retries
2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke
@ 2022-07-14 12:41 ` Hannes Reinecke
2022-07-14 14:35 ` Sagi Grimberg
2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
1 sibling, 1 reply; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke
When a reconnect attempt fails with a non-retryable status
(eg when the subsystem has been unprovisioned) there hardly
is any reason to retry the reconnect attempt.
So pass the actual error status to nvme_tcp_reconnect_or_remove()
and short-circuit retries if the DNR bit is set.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 70096a2e8762..4220c1ad6b29 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
nvme_tcp_destroy_io_queues(ctrl, remove);
}
-static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
+static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
{
+ bool recon = true;
+
/* If we are resetting/deleting then do nothing */
if (ctrl->state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
@@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
return;
}
- if (nvmf_should_reconnect(ctrl)) {
+ if (status > 0 && (status & NVME_SC_DNR)) {
+ dev_info(ctrl->device, "reconnect failure %d\n", status);
+ recon = false;
+ }
+
+ if (recon && nvmf_should_reconnect(ctrl)) {
dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
ctrl->opts->reconnect_delay);
queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
@@ -2162,10 +2169,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
struct nvme_tcp_ctrl, connect_work);
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+ int ret;
++ctrl->nr_reconnects;
- if (nvme_tcp_setup_ctrl(ctrl, false))
+ ret = nvme_tcp_setup_ctrl(ctrl, false);
+ if (ret)
goto requeue;
dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
@@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
requeue:
dev_info(ctrl->device, "Failed reconnect attempt %d\n",
ctrl->nr_reconnects);
- nvme_tcp_reconnect_or_remove(ctrl);
+ nvme_tcp_reconnect_or_remove(ctrl, ret);
}
static void nvme_tcp_error_recovery_work(struct work_struct *work)
@@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
return;
}
- nvme_tcp_reconnect_or_remove(ctrl);
+ nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);
}
static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
@@ -2229,6 +2238,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, reset_work);
+ int ret;
nvme_stop_ctrl(ctrl);
nvme_tcp_teardown_ctrl(ctrl, false);
@@ -2240,14 +2250,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
return;
}
- if (nvme_tcp_setup_ctrl(ctrl, false))
+ ret = nvme_tcp_setup_ctrl(ctrl, false);
+ if (ret)
goto out_fail;
return;
out_fail:
++ctrl->nr_reconnects;
- nvme_tcp_reconnect_or_remove(ctrl);
+ nvme_tcp_reconnect_or_remove(ctrl, ret);
}
static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
--
2.29.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/2] nvme-rdma: short-circuit connect retries
2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke
2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
@ 2022-07-14 12:41 ` Hannes Reinecke
1 sibling, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke
When a reconnect attempt fails with a non-retryable status
(eg when the subsystem has been unprovisioned) there hardly
is any reason to retry the reconnect attempt.
So pass the actual error status to nvme_tcp_reconnect_or_remove()
and short-circuit retries if the DNR bit is set.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/nvme/host/rdma.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 84ce3347d158..bcc84f181dcd 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1065,8 +1065,10 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
kfree(ctrl);
}
-static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl, int status)
{
+ bool recon = true;
+
/* If we are resetting/deleting then do nothing */
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
@@ -1074,7 +1076,12 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
return;
}
- if (nvmf_should_reconnect(&ctrl->ctrl)) {
+ if (status > 0 && (status & NVME_SC_DNR)) {
+ dev_info(ctrl->ctrl.device, "reconnect failure %d\n", status);
+ recon = false;
+ }
+
+ if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
ctrl->ctrl.opts->reconnect_delay);
queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
@@ -1173,10 +1180,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvme_rdma_ctrl, reconnect_work);
+ int ret;
++ctrl->ctrl.nr_reconnects;
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto requeue;
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -1189,7 +1198,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.nr_reconnects);
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -1212,7 +1221,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
return;
}
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, -ENOTCONN);
}
static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -2274,6 +2283,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl =
container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+ int ret;
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -2284,14 +2294,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
return;
}
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto out_fail;
return;
out_fail:
++ctrl->ctrl.nr_reconnects;
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
--
2.29.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 1/2] nvme-tcp: short-circuit connect retries
2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
@ 2022-07-14 14:35 ` Sagi Grimberg
2022-07-14 15:17 ` Hannes Reinecke
0 siblings, 1 reply; 6+ messages in thread
From: Sagi Grimberg @ 2022-07-14 14:35 UTC (permalink / raw)
To: Hannes Reinecke, Christoph Hellwig; +Cc: Keith Busch, linux-nvme
> When a reconnect attempt fails with a non-retryable status
> (eg when the subsystem has been unprovisioned) there hardly
> is any reason to retry the reconnect attempt.
> So pass the actual error status to nvme_tcp_reconnect_or_remove()
> and short-circuit retries if the DNR bit is set.
>
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
> drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
> 1 file changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 70096a2e8762..4220c1ad6b29 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
> nvme_tcp_destroy_io_queues(ctrl, remove);
> }
>
> -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
> +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
> {
> + bool recon = true;
> +
> /* If we are resetting/deleting then do nothing */
> if (ctrl->state != NVME_CTRL_CONNECTING) {
> WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
> @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
> return;
> }
>
> - if (nvmf_should_reconnect(ctrl)) {
> + if (status > 0 && (status & NVME_SC_DNR)) {
> + dev_info(ctrl->device, "reconnect failure %d\n", status);
> + recon = false;
> + }
> +
Why should we call this if we need to remove for sure?
I suggest we just change the call-site.
> + if (recon && nvmf_should_reconnect(ctrl)) {
> dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
> ctrl->opts->reconnect_delay);
> queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
> @@ -2162,10 +2169,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
> struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
> struct nvme_tcp_ctrl, connect_work);
> struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
> + int ret;
>
> ++ctrl->nr_reconnects;
>
> - if (nvme_tcp_setup_ctrl(ctrl, false))
> + ret = nvme_tcp_setup_ctrl(ctrl, false);
> + if (ret)
> goto requeue;
status = nvme_tcp_setup_ctrl(ctrl, false);
if (!status) {
dev_info(ctrl->device,
"Successfully reconnected (%d attempt)\n",
ctrl->nr_reconnects);
ctrl->nr_reconnects = 0;
} else {
dev_info(ctrl->device,
"Failed reconnect attempt %d (%d)\n",
ctrl->nr_reconnects, status);
if (status > 0 && (status & NVME_SC_DNR)) {
dev_info(ctrl->device,
"Removing controller...\n");
nvme_delete_ctrl(ctrl);
} else {
nvme_tcp_reconnect_or_remove(ctrl);
}
}
}
>
> dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
> @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
> requeue:
> dev_info(ctrl->device, "Failed reconnect attempt %d\n",
> ctrl->nr_reconnects);
> - nvme_tcp_reconnect_or_remove(ctrl);
> + nvme_tcp_reconnect_or_remove(ctrl, ret);
> }
>
> static void nvme_tcp_error_recovery_work(struct work_struct *work)
> @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
> return;
> }
>
> - nvme_tcp_reconnect_or_remove(ctrl);
> + nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);
This emphasizes that this is a redundant parameter.
> }
>
> static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
> @@ -2229,6 +2238,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
> {
> struct nvme_ctrl *ctrl =
> container_of(work, struct nvme_ctrl, reset_work);
> + int ret;
>
> nvme_stop_ctrl(ctrl);
> nvme_tcp_teardown_ctrl(ctrl, false);
> @@ -2240,14 +2250,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
> return;
> }
>
> - if (nvme_tcp_setup_ctrl(ctrl, false))
> + ret = nvme_tcp_setup_ctrl(ctrl, false);
> + if (ret)
> goto out_fail;
Similar error handling here.
status = nvme_tcp_setup_ctrl(ctrl, false);
if (status) {
++ctrl->nr_reconnects;
if (status > 0 && (status & NVME_SC_DNR)) {
dev_info(ctrl->device,
"Removing controller...\n");
nvme_delete_ctrl(ctrl);
} else {
nvme_tcp_reconnect_or_remove(ctrl);
}
}
>
> return;
>
> out_fail:
> ++ctrl->nr_reconnects;
> - nvme_tcp_reconnect_or_remove(ctrl);
> + nvme_tcp_reconnect_or_remove(ctrl, ret);
> }
>
> static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/2] nvme-tcp: short-circuit connect retries
2022-07-14 14:35 ` Sagi Grimberg
@ 2022-07-14 15:17 ` Hannes Reinecke
0 siblings, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 15:17 UTC (permalink / raw)
To: Sagi Grimberg, Christoph Hellwig; +Cc: Keith Busch, linux-nvme
On 7/14/22 16:35, Sagi Grimberg wrote:
>
>> When a reconnect attempt fails with a non-retryable status
>> (eg when the subsystem has been unprovisioned) there hardly
>> is any reason to retry the reconnect attempt.
>> So pass the actual error status to nvme_tcp_reconnect_or_remove()
>> and short-circuit retries if the DNR bit is set.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> ---
>> drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
>> 1 file changed, 18 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
>> index 70096a2e8762..4220c1ad6b29 100644
>> --- a/drivers/nvme/host/tcp.c
>> +++ b/drivers/nvme/host/tcp.c
>> @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct
>> nvme_ctrl *ctrl,
>> nvme_tcp_destroy_io_queues(ctrl, remove);
>> }
>> -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
>> +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int
>> status)
>> {
>> + bool recon = true;
>> +
>> /* If we are resetting/deleting then do nothing */
>> if (ctrl->state != NVME_CTRL_CONNECTING) {
>> WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
>> @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct
>> nvme_ctrl *ctrl)
>> return;
>> }
>> - if (nvmf_should_reconnect(ctrl)) {
>> + if (status > 0 && (status & NVME_SC_DNR)) {
>> + dev_info(ctrl->device, "reconnect failure %d\n", status);
>> + recon = false;
>> + }
>> +
>
> Why should we call this if we need to remove for sure?
> I suggest we just change the call-site.
>
>> + if (recon && nvmf_should_reconnect(ctrl)) {
>> dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
>> ctrl->opts->reconnect_delay);
>> queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
>> @@ -2162,10 +2169,12 @@ static void
>> nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
>> struct nvme_tcp_ctrl *tcp_ctrl =
>> container_of(to_delayed_work(work),
>> struct nvme_tcp_ctrl, connect_work);
>> struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
>> + int ret;
>> ++ctrl->nr_reconnects;
>> - if (nvme_tcp_setup_ctrl(ctrl, false))
>> + ret = nvme_tcp_setup_ctrl(ctrl, false);
>> + if (ret)
>> goto requeue;
>
> status = nvme_tcp_setup_ctrl(ctrl, false);
> if (!status) {
> dev_info(ctrl->device,
> "Successfully reconnected (%d attempt)\n",
> ctrl->nr_reconnects);
> ctrl->nr_reconnects = 0;
> } else {
> dev_info(ctrl->device,
> "Failed reconnect attempt %d (%d)\n",
> ctrl->nr_reconnects, status);
> if (status > 0 && (status & NVME_SC_DNR)) {
> dev_info(ctrl->device,
> "Removing controller...\n");
> nvme_delete_ctrl(ctrl);
> } else {
> nvme_tcp_reconnect_or_remove(ctrl);
> }
> }
> }
>
Good point.
>> dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
>> @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct
>> work_struct *work)
>> requeue:
>> dev_info(ctrl->device, "Failed reconnect attempt %d\n",
>> ctrl->nr_reconnects);
>> - nvme_tcp_reconnect_or_remove(ctrl);
>> + nvme_tcp_reconnect_or_remove(ctrl, ret);
>> }
>> static void nvme_tcp_error_recovery_work(struct work_struct *work)
>> @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct
>> work_struct *work)
>> return;
>> }
>> - nvme_tcp_reconnect_or_remove(ctrl);
>> + nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);
>
> This emphasizes that this is a redundant parameter.
>
Ok, will be reworking it.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 1/2] nvme-tcp: short-circuit connect retries
2022-07-15 6:33 [PATCHv2 0/2] nvme: short-circuit connection retries Hannes Reinecke
@ 2022-07-15 6:33 ` Hannes Reinecke
0 siblings, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-15 6:33 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke
When a reconnect attempt fails with a non-retryable status
(eg when the subsystem has been unprovisioned) there hardly
is any reason to retry the reconnect attempt.
So check the actual error status from nvme_tcp_setup_ctrl(), and
call nvme_delete_ctrl() instead of calling nvme_tcp_reconnect_or_remove()
if the DNR bit is set.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/nvme/host/tcp.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c854e69defb0..a15e3ed02b64 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2157,10 +2157,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
struct nvme_tcp_ctrl, connect_work);
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+ int ret;
++ctrl->nr_reconnects;
- if (nvme_tcp_setup_ctrl(ctrl, false))
+ ret = nvme_tcp_setup_ctrl(ctrl, false);
+ if (ret)
goto requeue;
dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
@@ -2173,6 +2175,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
requeue:
dev_info(ctrl->device, "Failed reconnect attempt %d\n",
ctrl->nr_reconnects);
+ if (ret > 0 && (ret & NVME_SC_DNR)) {
+ dev_info(ctrl->device,
+ "Removing ctrl...\n");
+ nvme_delete_ctrl(ctrl);
+ return;
+ }
nvme_tcp_reconnect_or_remove(ctrl);
}
@@ -2224,6 +2232,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, reset_work);
+ int ret;
nvme_stop_ctrl(ctrl);
nvme_tcp_teardown_ctrl(ctrl, false);
@@ -2235,9 +2244,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
return;
}
- if (nvme_tcp_setup_ctrl(ctrl, false))
+ ret = nvme_tcp_setup_ctrl(ctrl, false);
+ if (ret) {
+ if (ret > 0 && (ret & NVME_SC_DNR)) {
+ dev_info(ctrl->device, "Removing ctrl...\n");
+ nvme_delete_ctrl(ctrl);
+ return;
+ }
goto out_fail;
-
+ }
return;
out_fail:
--
2.29.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2022-07-15 6:34 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke
2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
2022-07-14 14:35 ` Sagi Grimberg
2022-07-14 15:17 ` Hannes Reinecke
2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
-- strict thread matches above, loose matches on Subject: below --
2022-07-15 6:33 [PATCHv2 0/2] nvme: short-circuit connection retries Hannes Reinecke
2022-07-15 6:33 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox