* [PATCH 0/2] nvme: short-circuit connection retries
@ 2022-07-14 12:41 Hannes Reinecke
2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
0 siblings, 2 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke
Hi all,
here are two patches updating nvme-tcp and nvme-rdma transport
to modifications we've already have in nvme-fc with commit
f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries").
Gist is to short-circuit reconnection retries if we get a
status back with the DNR bit set, in which case we shouldn't
retry the connection attempt.
As usual, comments and reviews are welcome.
Hannes Reinecke (2):
nvme-tcp: short-circuit connect retries
nvme-rdma: short-circuit connect retries
drivers/nvme/host/rdma.c | 25 ++++++++++++++++++-------
drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
2 files changed, 36 insertions(+), 14 deletions(-)
--
2.29.2
^ permalink raw reply [flat|nested] 6+ messages in thread* [PATCH 1/2] nvme-tcp: short-circuit connect retries 2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke @ 2022-07-14 12:41 ` Hannes Reinecke 2022-07-14 14:35 ` Sagi Grimberg 2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke 1 sibling, 1 reply; 6+ messages in thread From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw) To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke When a reconnect attempt fails with a non-retryable status (eg when the subsystem has been unprovisioned) there hardly is any reason to retry the reconnect attempt. So pass the actual error status to nvme_tcp_reconnect_or_remove() and short-circuit retries if the DNR bit is set. Signed-off-by: Hannes Reinecke <hare@suse.de> --- drivers/nvme/host/tcp.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 70096a2e8762..4220c1ad6b29 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, nvme_tcp_destroy_io_queues(ctrl, remove); } -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status) { + bool recon = true; + /* If we are resetting/deleting then do nothing */ if (ctrl->state != NVME_CTRL_CONNECTING) { WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW || @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) return; } - if (nvmf_should_reconnect(ctrl)) { + if (status > 0 && (status & NVME_SC_DNR)) { + dev_info(ctrl->device, "reconnect failure %d\n", status); + recon = false; + } + + if (recon && nvmf_should_reconnect(ctrl)) { dev_info(ctrl->device, "Reconnecting in %d seconds...\n", ctrl->opts->reconnect_delay); queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work, @@ -2162,10 +2169,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work), struct nvme_tcp_ctrl, connect_work); struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; + int ret; ++ctrl->nr_reconnects; - if (nvme_tcp_setup_ctrl(ctrl, false)) + ret = nvme_tcp_setup_ctrl(ctrl, false); + if (ret) goto requeue; dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n", @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) requeue: dev_info(ctrl->device, "Failed reconnect attempt %d\n", ctrl->nr_reconnects); - nvme_tcp_reconnect_or_remove(ctrl); + nvme_tcp_reconnect_or_remove(ctrl, ret); } static void nvme_tcp_error_recovery_work(struct work_struct *work) @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) return; } - nvme_tcp_reconnect_or_remove(ctrl); + nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN); } static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) @@ -2229,6 +2238,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, reset_work); + int ret; nvme_stop_ctrl(ctrl); nvme_tcp_teardown_ctrl(ctrl, false); @@ -2240,14 +2250,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work) return; } - if (nvme_tcp_setup_ctrl(ctrl, false)) + ret = nvme_tcp_setup_ctrl(ctrl, false); + if (ret) goto out_fail; return; out_fail: ++ctrl->nr_reconnects; - nvme_tcp_reconnect_or_remove(ctrl); + nvme_tcp_reconnect_or_remove(ctrl, ret); } static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) -- 2.29.2 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 1/2] nvme-tcp: short-circuit connect retries 2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke @ 2022-07-14 14:35 ` Sagi Grimberg 2022-07-14 15:17 ` Hannes Reinecke 0 siblings, 1 reply; 6+ messages in thread From: Sagi Grimberg @ 2022-07-14 14:35 UTC (permalink / raw) To: Hannes Reinecke, Christoph Hellwig; +Cc: Keith Busch, linux-nvme > When a reconnect attempt fails with a non-retryable status > (eg when the subsystem has been unprovisioned) there hardly > is any reason to retry the reconnect attempt. > So pass the actual error status to nvme_tcp_reconnect_or_remove() > and short-circuit retries if the DNR bit is set. > > Signed-off-by: Hannes Reinecke <hare@suse.de> > --- > drivers/nvme/host/tcp.c | 25 ++++++++++++++++++------- > 1 file changed, 18 insertions(+), 7 deletions(-) > > diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c > index 70096a2e8762..4220c1ad6b29 100644 > --- a/drivers/nvme/host/tcp.c > +++ b/drivers/nvme/host/tcp.c > @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, > nvme_tcp_destroy_io_queues(ctrl, remove); > } > > -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) > +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status) > { > + bool recon = true; > + > /* If we are resetting/deleting then do nothing */ > if (ctrl->state != NVME_CTRL_CONNECTING) { > WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW || > @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) > return; > } > > - if (nvmf_should_reconnect(ctrl)) { > + if (status > 0 && (status & NVME_SC_DNR)) { > + dev_info(ctrl->device, "reconnect failure %d\n", status); > + recon = false; > + } > + Why should we call this if we need to remove for sure? I suggest we just change the call-site. > + if (recon && nvmf_should_reconnect(ctrl)) { > dev_info(ctrl->device, "Reconnecting in %d seconds...\n", > ctrl->opts->reconnect_delay); > queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work, > @@ -2162,10 +2169,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) > struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work), > struct nvme_tcp_ctrl, connect_work); > struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; > + int ret; > > ++ctrl->nr_reconnects; > > - if (nvme_tcp_setup_ctrl(ctrl, false)) > + ret = nvme_tcp_setup_ctrl(ctrl, false); > + if (ret) > goto requeue; status = nvme_tcp_setup_ctrl(ctrl, false); if (!status) { dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n", ctrl->nr_reconnects); ctrl->nr_reconnects = 0; } else { dev_info(ctrl->device, "Failed reconnect attempt %d (%d)\n", ctrl->nr_reconnects, status); if (status > 0 && (status & NVME_SC_DNR)) { dev_info(ctrl->device, "Removing controller...\n"); nvme_delete_ctrl(ctrl); } else { nvme_tcp_reconnect_or_remove(ctrl); } } } > > dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n", > @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) > requeue: > dev_info(ctrl->device, "Failed reconnect attempt %d\n", > ctrl->nr_reconnects); > - nvme_tcp_reconnect_or_remove(ctrl); > + nvme_tcp_reconnect_or_remove(ctrl, ret); > } > > static void nvme_tcp_error_recovery_work(struct work_struct *work) > @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) > return; > } > > - nvme_tcp_reconnect_or_remove(ctrl); > + nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN); This emphasizes that this is a redundant parameter. > } > > static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) > @@ -2229,6 +2238,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work) > { > struct nvme_ctrl *ctrl = > container_of(work, struct nvme_ctrl, reset_work); > + int ret; > > nvme_stop_ctrl(ctrl); > nvme_tcp_teardown_ctrl(ctrl, false); > @@ -2240,14 +2250,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work) > return; > } > > - if (nvme_tcp_setup_ctrl(ctrl, false)) > + ret = nvme_tcp_setup_ctrl(ctrl, false); > + if (ret) > goto out_fail; Similar error handling here. status = nvme_tcp_setup_ctrl(ctrl, false); if (status) { ++ctrl->nr_reconnects; if (status > 0 && (status & NVME_SC_DNR)) { dev_info(ctrl->device, "Removing controller...\n"); nvme_delete_ctrl(ctrl); } else { nvme_tcp_reconnect_or_remove(ctrl); } } > > return; > > out_fail: > ++ctrl->nr_reconnects; > - nvme_tcp_reconnect_or_remove(ctrl); > + nvme_tcp_reconnect_or_remove(ctrl, ret); > } > > static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/2] nvme-tcp: short-circuit connect retries 2022-07-14 14:35 ` Sagi Grimberg @ 2022-07-14 15:17 ` Hannes Reinecke 0 siblings, 0 replies; 6+ messages in thread From: Hannes Reinecke @ 2022-07-14 15:17 UTC (permalink / raw) To: Sagi Grimberg, Christoph Hellwig; +Cc: Keith Busch, linux-nvme On 7/14/22 16:35, Sagi Grimberg wrote: > >> When a reconnect attempt fails with a non-retryable status >> (eg when the subsystem has been unprovisioned) there hardly >> is any reason to retry the reconnect attempt. >> So pass the actual error status to nvme_tcp_reconnect_or_remove() >> and short-circuit retries if the DNR bit is set. >> >> Signed-off-by: Hannes Reinecke <hare@suse.de> >> --- >> drivers/nvme/host/tcp.c | 25 ++++++++++++++++++------- >> 1 file changed, 18 insertions(+), 7 deletions(-) >> >> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c >> index 70096a2e8762..4220c1ad6b29 100644 >> --- a/drivers/nvme/host/tcp.c >> +++ b/drivers/nvme/host/tcp.c >> @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct >> nvme_ctrl *ctrl, >> nvme_tcp_destroy_io_queues(ctrl, remove); >> } >> -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) >> +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int >> status) >> { >> + bool recon = true; >> + >> /* If we are resetting/deleting then do nothing */ >> if (ctrl->state != NVME_CTRL_CONNECTING) { >> WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW || >> @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct >> nvme_ctrl *ctrl) >> return; >> } >> - if (nvmf_should_reconnect(ctrl)) { >> + if (status > 0 && (status & NVME_SC_DNR)) { >> + dev_info(ctrl->device, "reconnect failure %d\n", status); >> + recon = false; >> + } >> + > > Why should we call this if we need to remove for sure? > I suggest we just change the call-site. > >> + if (recon && nvmf_should_reconnect(ctrl)) { >> dev_info(ctrl->device, "Reconnecting in %d seconds...\n", >> ctrl->opts->reconnect_delay); >> queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work, >> @@ -2162,10 +2169,12 @@ static void >> nvme_tcp_reconnect_ctrl_work(struct work_struct *work) >> struct nvme_tcp_ctrl *tcp_ctrl = >> container_of(to_delayed_work(work), >> struct nvme_tcp_ctrl, connect_work); >> struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; >> + int ret; >> ++ctrl->nr_reconnects; >> - if (nvme_tcp_setup_ctrl(ctrl, false)) >> + ret = nvme_tcp_setup_ctrl(ctrl, false); >> + if (ret) >> goto requeue; > > status = nvme_tcp_setup_ctrl(ctrl, false); > if (!status) { > dev_info(ctrl->device, > "Successfully reconnected (%d attempt)\n", > ctrl->nr_reconnects); > ctrl->nr_reconnects = 0; > } else { > dev_info(ctrl->device, > "Failed reconnect attempt %d (%d)\n", > ctrl->nr_reconnects, status); > if (status > 0 && (status & NVME_SC_DNR)) { > dev_info(ctrl->device, > "Removing controller...\n"); > nvme_delete_ctrl(ctrl); > } else { > nvme_tcp_reconnect_or_remove(ctrl); > } > } > } > Good point. >> dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n", >> @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct >> work_struct *work) >> requeue: >> dev_info(ctrl->device, "Failed reconnect attempt %d\n", >> ctrl->nr_reconnects); >> - nvme_tcp_reconnect_or_remove(ctrl); >> + nvme_tcp_reconnect_or_remove(ctrl, ret); >> } >> static void nvme_tcp_error_recovery_work(struct work_struct *work) >> @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct >> work_struct *work) >> return; >> } >> - nvme_tcp_reconnect_or_remove(ctrl); >> + nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN); > > This emphasizes that this is a redundant parameter. > Ok, will be reworking it. Cheers, Hannes -- Dr. Hannes Reinecke Kernel Storage Architect hare@suse.de +49 911 74053 688 SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew Myers, Andrew McDonald, Martje Boudien Moerman ^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 2/2] nvme-rdma: short-circuit connect retries 2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke 2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke @ 2022-07-14 12:41 ` Hannes Reinecke 1 sibling, 0 replies; 6+ messages in thread From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw) To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke When a reconnect attempt fails with a non-retryable status (eg when the subsystem has been unprovisioned) there hardly is any reason to retry the reconnect attempt. So pass the actual error status to nvme_tcp_reconnect_or_remove() and short-circuit retries if the DNR bit is set. Signed-off-by: Hannes Reinecke <hare@suse.de> --- drivers/nvme/host/rdma.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 84ce3347d158..bcc84f181dcd 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1065,8 +1065,10 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) kfree(ctrl); } -static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl, int status) { + bool recon = true; + /* If we are resetting/deleting then do nothing */ if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) { WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || @@ -1074,7 +1076,12 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) return; } - if (nvmf_should_reconnect(&ctrl->ctrl)) { + if (status > 0 && (status & NVME_SC_DNR)) { + dev_info(ctrl->ctrl.device, "reconnect failure %d\n", status); + recon = false; + } + + if (recon && nvmf_should_reconnect(&ctrl->ctrl)) { dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n", ctrl->ctrl.opts->reconnect_delay); queue_delayed_work(nvme_wq, &ctrl->reconnect_work, @@ -1173,10 +1180,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_rdma_ctrl, reconnect_work); + int ret; ++ctrl->ctrl.nr_reconnects; - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) goto requeue; dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", @@ -1189,7 +1198,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) requeue: dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ctrl->ctrl.nr_reconnects); - nvme_rdma_reconnect_or_remove(ctrl); + nvme_rdma_reconnect_or_remove(ctrl, ret); } static void nvme_rdma_error_recovery_work(struct work_struct *work) @@ -1212,7 +1221,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) return; } - nvme_rdma_reconnect_or_remove(ctrl); + nvme_rdma_reconnect_or_remove(ctrl, -ENOTCONN); } static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) @@ -2274,6 +2283,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work); + int ret; nvme_stop_ctrl(&ctrl->ctrl); nvme_rdma_shutdown_ctrl(ctrl, false); @@ -2284,14 +2294,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) return; } - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) goto out_fail; return; out_fail: ++ctrl->ctrl.nr_reconnects; - nvme_rdma_reconnect_or_remove(ctrl); + nvme_rdma_reconnect_or_remove(ctrl, ret); } static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { -- 2.29.2 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCHv2 0/2] nvme: short-circuit connection retries
@ 2022-07-15 6:33 Hannes Reinecke
2022-07-15 6:33 ` [PATCH 2/2] nvme-rdma: short-circuit connect retries Hannes Reinecke
0 siblings, 1 reply; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-15 6:33 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke
Hi all,
here are two patches updating nvme-tcp and nvme-rdma transport
to modifications we've already have in nvme-fc with commit
f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries").
Gist is to short-circuit reconnection retries if we get a
status back with the DNR bit set, in which case we shouldn't
retry the connection attempt.
As usual, comments and reviews are welcome.
Changes to the original version:
- Rework with suggestions from Sagi
Hannes Reinecke (2):
nvme-tcp: short-circuit connect retries
nvme-rdma: short-circuit connect retries
drivers/nvme/host/rdma.c | 20 +++++++++++++++++---
drivers/nvme/host/tcp.c | 21 ++++++++++++++++++---
2 files changed, 35 insertions(+), 6 deletions(-)
--
2.29.2
^ permalink raw reply [flat|nested] 6+ messages in thread* [PATCH 2/2] nvme-rdma: short-circuit connect retries 2022-07-15 6:33 [PATCHv2 0/2] nvme: short-circuit connection retries Hannes Reinecke @ 2022-07-15 6:33 ` Hannes Reinecke 0 siblings, 0 replies; 6+ messages in thread From: Hannes Reinecke @ 2022-07-15 6:33 UTC (permalink / raw) To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke When a reconnect attempt fails with a non-retryable status (eg when the subsystem has been unprovisioned) there hardly is any reason to retry the reconnect attempt. So check the actual error status from nvme_rdma_setup_ctrl(), and call nvme_delete_ctrl() instead of calling nvme_rdma_reconnect_or_remove() if the DNR bit is set. Signed-off-by: Hannes Reinecke <hare@suse.de> --- drivers/nvme/host/rdma.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 7d01fb770284..a43e47c93140 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1170,10 +1170,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_rdma_ctrl, reconnect_work); + int ret; ++ctrl->ctrl.nr_reconnects; - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) goto requeue; dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", @@ -1186,6 +1188,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) requeue: dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ctrl->ctrl.nr_reconnects); + if (ret > 0 && (ret & NVME_SC_DNR)) { + dev_info(ctrl->ctrl.device, "Removing ctrl...\n"); + nvme_delete_ctrl(&ctrl->ctrl); + return; + } nvme_rdma_reconnect_or_remove(ctrl); } @@ -2271,6 +2278,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work); + int ret; nvme_stop_ctrl(&ctrl->ctrl); nvme_rdma_shutdown_ctrl(ctrl, false); @@ -2281,9 +2289,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) return; } - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) { + if (ret > 0 && (ret & NVME_SC_DNR)) { + dev_info(ctrl->ctrl.device, "Removing ctrl...\n"); + nvme_delete_ctrl(&ctrl->ctrl); + return; + } goto out_fail; - + } return; out_fail: -- 2.29.2 ^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2022-07-15 6:34 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke 2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke 2022-07-14 14:35 ` Sagi Grimberg 2022-07-14 15:17 ` Hannes Reinecke 2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke -- strict thread matches above, loose matches on Subject: below -- 2022-07-15 6:33 [PATCHv2 0/2] nvme: short-circuit connection retries Hannes Reinecke 2022-07-15 6:33 ` [PATCH 2/2] nvme-rdma: short-circuit connect retries Hannes Reinecke
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox