* [PATCHv2 0/2] nvme: short-circuit connection retries
@ 2022-07-15 6:33 Hannes Reinecke
2022-07-15 6:33 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
2022-07-15 6:33 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
0 siblings, 2 replies; 4+ messages in thread
From: Hannes Reinecke @ 2022-07-15 6:33 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke
Hi all,
here are two patches updating nvme-tcp and nvme-rdma transport
to modifications we've already have in nvme-fc with commit
f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries").
Gist is to short-circuit reconnection retries if we get a
status back with the DNR bit set, in which case we shouldn't
retry the connection attempt.
As usual, comments and reviews are welcome.
Changes to the original version:
- Rework with suggestions from Sagi
Hannes Reinecke (2):
nvme-tcp: short-circuit connect retries
nvme-rdma: short-circuit connect retries
drivers/nvme/host/rdma.c | 20 +++++++++++++++++---
drivers/nvme/host/tcp.c | 21 ++++++++++++++++++---
2 files changed, 35 insertions(+), 6 deletions(-)
--
2.29.2
^ permalink raw reply [flat|nested] 4+ messages in thread* [PATCH 1/2] nvme-tcp: short-circuit connect retries 2022-07-15 6:33 [PATCHv2 0/2] nvme: short-circuit connection retries Hannes Reinecke @ 2022-07-15 6:33 ` Hannes Reinecke 2022-07-15 6:33 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke 1 sibling, 0 replies; 4+ messages in thread From: Hannes Reinecke @ 2022-07-15 6:33 UTC (permalink / raw) To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke When a reconnect attempt fails with a non-retryable status (eg when the subsystem has been unprovisioned) there hardly is any reason to retry the reconnect attempt. So check the actual error status from nvme_tcp_setup_ctrl(), and call nvme_delete_ctrl() instead of calling nvme_tcp_reconnect_or_remove() if the DNR bit is set. Signed-off-by: Hannes Reinecke <hare@suse.de> --- drivers/nvme/host/tcp.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c854e69defb0..a15e3ed02b64 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2157,10 +2157,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work), struct nvme_tcp_ctrl, connect_work); struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; + int ret; ++ctrl->nr_reconnects; - if (nvme_tcp_setup_ctrl(ctrl, false)) + ret = nvme_tcp_setup_ctrl(ctrl, false); + if (ret) goto requeue; dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n", @@ -2173,6 +2175,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) requeue: dev_info(ctrl->device, "Failed reconnect attempt %d\n", ctrl->nr_reconnects); + if (ret > 0 && (ret & NVME_SC_DNR)) { + dev_info(ctrl->device, + "Removing ctrl...\n"); + nvme_delete_ctrl(ctrl); + return; + } nvme_tcp_reconnect_or_remove(ctrl); } @@ -2224,6 +2232,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, reset_work); + int ret; nvme_stop_ctrl(ctrl); nvme_tcp_teardown_ctrl(ctrl, false); @@ -2235,9 +2244,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work) return; } - if (nvme_tcp_setup_ctrl(ctrl, false)) + ret = nvme_tcp_setup_ctrl(ctrl, false); + if (ret) { + if (ret > 0 && (ret & NVME_SC_DNR)) { + dev_info(ctrl->device, "Removing ctrl...\n"); + nvme_delete_ctrl(ctrl); + return; + } goto out_fail; - + } return; out_fail: -- 2.29.2 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] nvme-rdma: short-circuit connect retries 2022-07-15 6:33 [PATCHv2 0/2] nvme: short-circuit connection retries Hannes Reinecke 2022-07-15 6:33 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke @ 2022-07-15 6:33 ` Hannes Reinecke 1 sibling, 0 replies; 4+ messages in thread From: Hannes Reinecke @ 2022-07-15 6:33 UTC (permalink / raw) To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke When a reconnect attempt fails with a non-retryable status (eg when the subsystem has been unprovisioned) there hardly is any reason to retry the reconnect attempt. So check the actual error status from nvme_rdma_setup_ctrl(), and call nvme_delete_ctrl() instead of calling nvme_rdma_reconnect_or_remove() if the DNR bit is set. Signed-off-by: Hannes Reinecke <hare@suse.de> --- drivers/nvme/host/rdma.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 7d01fb770284..a43e47c93140 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1170,10 +1170,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_rdma_ctrl, reconnect_work); + int ret; ++ctrl->ctrl.nr_reconnects; - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) goto requeue; dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", @@ -1186,6 +1188,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) requeue: dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ctrl->ctrl.nr_reconnects); + if (ret > 0 && (ret & NVME_SC_DNR)) { + dev_info(ctrl->ctrl.device, "Removing ctrl...\n"); + nvme_delete_ctrl(&ctrl->ctrl); + return; + } nvme_rdma_reconnect_or_remove(ctrl); } @@ -2271,6 +2278,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work); + int ret; nvme_stop_ctrl(&ctrl->ctrl); nvme_rdma_shutdown_ctrl(ctrl, false); @@ -2281,9 +2289,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) return; } - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) { + if (ret > 0 && (ret & NVME_SC_DNR)) { + dev_info(ctrl->ctrl.device, "Removing ctrl...\n"); + nvme_delete_ctrl(&ctrl->ctrl); + return; + } goto out_fail; - + } return; out_fail: -- 2.29.2 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 0/2] nvme: short-circuit connection retries
@ 2022-07-14 12:41 Hannes Reinecke
2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: short-circuit connect retries Hannes Reinecke
0 siblings, 1 reply; 4+ messages in thread
From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke
Hi all,
here are two patches updating nvme-tcp and nvme-rdma transport
to modifications we've already have in nvme-fc with commit
f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries").
Gist is to short-circuit reconnection retries if we get a
status back with the DNR bit set, in which case we shouldn't
retry the connection attempt.
As usual, comments and reviews are welcome.
Hannes Reinecke (2):
nvme-tcp: short-circuit connect retries
nvme-rdma: short-circuit connect retries
drivers/nvme/host/rdma.c | 25 ++++++++++++++++++-------
drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
2 files changed, 36 insertions(+), 14 deletions(-)
--
2.29.2
^ permalink raw reply [flat|nested] 4+ messages in thread* [PATCH 2/2] nvme-rdma: short-circuit connect retries 2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke @ 2022-07-14 12:41 ` Hannes Reinecke 0 siblings, 0 replies; 4+ messages in thread From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw) To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke When a reconnect attempt fails with a non-retryable status (eg when the subsystem has been unprovisioned) there hardly is any reason to retry the reconnect attempt. So pass the actual error status to nvme_tcp_reconnect_or_remove() and short-circuit retries if the DNR bit is set. Signed-off-by: Hannes Reinecke <hare@suse.de> --- drivers/nvme/host/rdma.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 84ce3347d158..bcc84f181dcd 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1065,8 +1065,10 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) kfree(ctrl); } -static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl, int status) { + bool recon = true; + /* If we are resetting/deleting then do nothing */ if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) { WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || @@ -1074,7 +1076,12 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) return; } - if (nvmf_should_reconnect(&ctrl->ctrl)) { + if (status > 0 && (status & NVME_SC_DNR)) { + dev_info(ctrl->ctrl.device, "reconnect failure %d\n", status); + recon = false; + } + + if (recon && nvmf_should_reconnect(&ctrl->ctrl)) { dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n", ctrl->ctrl.opts->reconnect_delay); queue_delayed_work(nvme_wq, &ctrl->reconnect_work, @@ -1173,10 +1180,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), struct nvme_rdma_ctrl, reconnect_work); + int ret; ++ctrl->ctrl.nr_reconnects; - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) goto requeue; dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", @@ -1189,7 +1198,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) requeue: dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ctrl->ctrl.nr_reconnects); - nvme_rdma_reconnect_or_remove(ctrl); + nvme_rdma_reconnect_or_remove(ctrl, ret); } static void nvme_rdma_error_recovery_work(struct work_struct *work) @@ -1212,7 +1221,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) return; } - nvme_rdma_reconnect_or_remove(ctrl); + nvme_rdma_reconnect_or_remove(ctrl, -ENOTCONN); } static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) @@ -2274,6 +2283,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work); + int ret; nvme_stop_ctrl(&ctrl->ctrl); nvme_rdma_shutdown_ctrl(ctrl, false); @@ -2284,14 +2294,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) return; } - if (nvme_rdma_setup_ctrl(ctrl, false)) + ret = nvme_rdma_setup_ctrl(ctrl, false); + if (ret) goto out_fail; return; out_fail: ++ctrl->ctrl.nr_reconnects; - nvme_rdma_reconnect_or_remove(ctrl); + nvme_rdma_reconnect_or_remove(ctrl, ret); } static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { -- 2.29.2 ^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-07-15 6:34 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2022-07-15 6:33 [PATCHv2 0/2] nvme: short-circuit connection retries Hannes Reinecke 2022-07-15 6:33 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke 2022-07-15 6:33 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke -- strict thread matches above, loose matches on Subject: below -- 2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke 2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: short-circuit connect retries Hannes Reinecke
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox