[PATCH 0/2] nvme: short-circuit connection retries

public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed

* [PATCH 0/2] nvme: short-circuit connection retries
@ 2022-07-14 12:41 Hannes Reinecke
  2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
  2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
  0 siblings, 2 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke

Hi all,

here are two patches updating nvme-tcp and nvme-rdma transport
to modifications we've already have in nvme-fc with commit
f25f8ef70ce2 ("nvme-fc: short-circuit reconnect retries").
Gist is to short-circuit reconnection retries if we get a
status back with the DNR bit set, in which case we shouldn't
retry the connection attempt.

As usual, comments and reviews are welcome.

Hannes Reinecke (2):
  nvme-tcp: short-circuit connect retries
  nvme-rdma: short-circuit connect retries

 drivers/nvme/host/rdma.c | 25 ++++++++++++++++++-------
 drivers/nvme/host/tcp.c  | 25 ++++++++++++++++++-------
 2 files changed, 36 insertions(+), 14 deletions(-)

-- 
2.29.2

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] nvme-tcp: short-circuit connect retries
  2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke
@ 2022-07-14 12:41 ` Hannes Reinecke
  2022-07-14 14:35   ` Sagi Grimberg
  2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
  1 sibling, 1 reply; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke

When a reconnect attempt fails with a non-retryable status
(eg when the subsystem has been unprovisioned) there hardly
is any reason to retry the reconnect attempt.
So pass the actual error status to nvme_tcp_reconnect_or_remove()
and short-circuit retries if the DNR bit is set.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 70096a2e8762..4220c1ad6b29 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
 	nvme_tcp_destroy_io_queues(ctrl, remove);
 }
 
-static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
+static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
 {
+	bool recon = true;
+
 	/* If we are resetting/deleting then do nothing */
 	if (ctrl->state != NVME_CTRL_CONNECTING) {
 		WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
@@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
 		return;
 	}
 
-	if (nvmf_should_reconnect(ctrl)) {
+	if (status > 0 && (status & NVME_SC_DNR)) {
+		dev_info(ctrl->device, "reconnect failure %d\n", status);
+		recon = false;
+	}
+
+	if (recon && nvmf_should_reconnect(ctrl)) {
 		dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
 			ctrl->opts->reconnect_delay);
 		queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
@@ -2162,10 +2169,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 	struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
 			struct nvme_tcp_ctrl, connect_work);
 	struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+	int ret;
 
 	++ctrl->nr_reconnects;
 
-	if (nvme_tcp_setup_ctrl(ctrl, false))
+	ret = nvme_tcp_setup_ctrl(ctrl, false);
+	if (ret)
 		goto requeue;
 
 	dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
@@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 requeue:
 	dev_info(ctrl->device, "Failed reconnect attempt %d\n",
 			ctrl->nr_reconnects);
-	nvme_tcp_reconnect_or_remove(ctrl);
+	nvme_tcp_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_tcp_error_recovery_work(struct work_struct *work)
@@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 		return;
 	}
 
-	nvme_tcp_reconnect_or_remove(ctrl);
+	nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);
 }
 
 static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
@@ -2229,6 +2238,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 {
 	struct nvme_ctrl *ctrl =
 		container_of(work, struct nvme_ctrl, reset_work);
+	int ret;
 
 	nvme_stop_ctrl(ctrl);
 	nvme_tcp_teardown_ctrl(ctrl, false);
@@ -2240,14 +2250,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 		return;
 	}
 
-	if (nvme_tcp_setup_ctrl(ctrl, false))
+	ret = nvme_tcp_setup_ctrl(ctrl, false);
+	if (ret)
 		goto out_fail;
 
 	return;
 
 out_fail:
 	++ctrl->nr_reconnects;
-	nvme_tcp_reconnect_or_remove(ctrl);
+	nvme_tcp_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] nvme-rdma: short-circuit connect retries
  2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke
  2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
@ 2022-07-14 12:41 ` Hannes Reinecke
  1 sibling, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 12:41 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Keith Busch, Sagi Grimberg, linux-nvme, Hannes Reinecke

When a reconnect attempt fails with a non-retryable status
(eg when the subsystem has been unprovisioned) there hardly
is any reason to retry the reconnect attempt.
So pass the actual error status to nvme_tcp_reconnect_or_remove()
and short-circuit retries if the DNR bit is set.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/rdma.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 84ce3347d158..bcc84f181dcd 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1065,8 +1065,10 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
 	kfree(ctrl);
 }
 
-static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
+static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl, int status)
 {
+	bool recon = true;
+
 	/* If we are resetting/deleting then do nothing */
 	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
 		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
@@ -1074,7 +1076,12 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
 		return;
 	}
 
-	if (nvmf_should_reconnect(&ctrl->ctrl)) {
+	if (status > 0 && (status & NVME_SC_DNR)) {
+		dev_info(ctrl->ctrl.device, "reconnect failure %d\n", status);
+		recon = false;
+	}
+
+	if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
 		dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
 			ctrl->ctrl.opts->reconnect_delay);
 		queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
@@ -1173,10 +1180,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 {
 	struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
 			struct nvme_rdma_ctrl, reconnect_work);
+	int ret;
 
 	++ctrl->ctrl.nr_reconnects;
 
-	if (nvme_rdma_setup_ctrl(ctrl, false))
+	ret = nvme_rdma_setup_ctrl(ctrl, false);
+	if (ret)
 		goto requeue;
 
 	dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -1189,7 +1198,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 requeue:
 	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
 			ctrl->ctrl.nr_reconnects);
-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, ret);
 }
 
 static void nvme_rdma_error_recovery_work(struct work_struct *work)
@@ -1212,7 +1221,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 		return;
 	}
 
-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, -ENOTCONN);
 }
 
 static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@@ -2274,6 +2283,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 {
 	struct nvme_rdma_ctrl *ctrl =
 		container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+	int ret;
 
 	nvme_stop_ctrl(&ctrl->ctrl);
 	nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -2284,14 +2294,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 		return;
 	}
 
-	if (nvme_rdma_setup_ctrl(ctrl, false))
+	ret = nvme_rdma_setup_ctrl(ctrl, false);
+	if (ret)
 		goto out_fail;
 
 	return;
 
 out_fail:
 	++ctrl->ctrl.nr_reconnects;
-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, ret);
 }
 
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] nvme-tcp: short-circuit connect retries
  2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
@ 2022-07-14 14:35   ` Sagi Grimberg
  2022-07-14 15:17     ` Hannes Reinecke
  0 siblings, 1 reply; 6+ messages in thread
From: Sagi Grimberg @ 2022-07-14 14:35 UTC (permalink / raw)
  To: Hannes Reinecke, Christoph Hellwig; +Cc: Keith Busch, linux-nvme


> When a reconnect attempt fails with a non-retryable status
> (eg when the subsystem has been unprovisioned) there hardly
> is any reason to retry the reconnect attempt.
> So pass the actual error status to nvme_tcp_reconnect_or_remove()
> and short-circuit retries if the DNR bit is set.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>   drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
>   1 file changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 70096a2e8762..4220c1ad6b29 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
>   	nvme_tcp_destroy_io_queues(ctrl, remove);
>   }
>   
> -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
> +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
>   {
> +	bool recon = true;
> +
>   	/* If we are resetting/deleting then do nothing */
>   	if (ctrl->state != NVME_CTRL_CONNECTING) {
>   		WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
> @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
>   		return;
>   	}
>   
> -	if (nvmf_should_reconnect(ctrl)) {
> +	if (status > 0 && (status & NVME_SC_DNR)) {
> +		dev_info(ctrl->device, "reconnect failure %d\n", status);
> +		recon = false;
> +	}
> +

Why should we call this if we need to remove for sure?
I suggest we just change the call-site.

> +	if (recon && nvmf_should_reconnect(ctrl)) {
>   		dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
>   			ctrl->opts->reconnect_delay);
>   		queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
> @@ -2162,10 +2169,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
>   	struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
>   			struct nvme_tcp_ctrl, connect_work);
>   	struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
> +	int ret;
>   
>   	++ctrl->nr_reconnects;
>   
> -	if (nvme_tcp_setup_ctrl(ctrl, false))
> +	ret = nvme_tcp_setup_ctrl(ctrl, false);
> +	if (ret)
>   		goto requeue;

	status = nvme_tcp_setup_ctrl(ctrl, false);
	if (!status) {
		dev_info(ctrl->device,
			"Successfully reconnected (%d attempt)\n",
                         ctrl->nr_reconnects);
		ctrl->nr_reconnects = 0;
	} else {
		dev_info(ctrl->device,
			"Failed reconnect attempt %d (%d)\n",
				ctrl->nr_reconnects, status);
		if (status > 0 && (status & NVME_SC_DNR)) {
			dev_info(ctrl->device,
				"Removing controller...\n");
			nvme_delete_ctrl(ctrl);
		} else {
			nvme_tcp_reconnect_or_remove(ctrl);
		}
	}
}

>   
>   	dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
> @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
>   requeue:
>   	dev_info(ctrl->device, "Failed reconnect attempt %d\n",
>   			ctrl->nr_reconnects);
> -	nvme_tcp_reconnect_or_remove(ctrl);
> +	nvme_tcp_reconnect_or_remove(ctrl, ret);
>   }
>   
>   static void nvme_tcp_error_recovery_work(struct work_struct *work)
> @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
>   		return;
>   	}
>   
> -	nvme_tcp_reconnect_or_remove(ctrl);
> +	nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);

This emphasizes that this is a redundant parameter.

>   }
>   
>   static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
> @@ -2229,6 +2238,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
>   {
>   	struct nvme_ctrl *ctrl =
>   		container_of(work, struct nvme_ctrl, reset_work);
> +	int ret;
>   
>   	nvme_stop_ctrl(ctrl);
>   	nvme_tcp_teardown_ctrl(ctrl, false);
> @@ -2240,14 +2250,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
>   		return;
>   	}
>   
> -	if (nvme_tcp_setup_ctrl(ctrl, false))
> +	ret = nvme_tcp_setup_ctrl(ctrl, false);
> +	if (ret)
>   		goto out_fail;

Similar error handling here.

	status = nvme_tcp_setup_ctrl(ctrl, false);
	if (status) {
		++ctrl->nr_reconnects;
		if (status > 0 && (status & NVME_SC_DNR)) {
			dev_info(ctrl->device,
				"Removing controller...\n");
			nvme_delete_ctrl(ctrl);
		} else {
			nvme_tcp_reconnect_or_remove(ctrl);
		}
	}

>   
>   	return;
>   
>   out_fail:
>   	++ctrl->nr_reconnects;
> -	nvme_tcp_reconnect_or_remove(ctrl);
> +	nvme_tcp_reconnect_or_remove(ctrl, ret);
>   }
>   
>   static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] nvme-tcp: short-circuit connect retries
  2022-07-14 14:35   ` Sagi Grimberg
@ 2022-07-14 15:17     ` Hannes Reinecke
  0 siblings, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-14 15:17 UTC (permalink / raw)
  To: Sagi Grimberg, Christoph Hellwig; +Cc: Keith Busch, linux-nvme

On 7/14/22 16:35, Sagi Grimberg wrote:
> 
>> When a reconnect attempt fails with a non-retryable status
>> (eg when the subsystem has been unprovisioned) there hardly
>> is any reason to retry the reconnect attempt.
>> So pass the actual error status to nvme_tcp_reconnect_or_remove()
>> and short-circuit retries if the DNR bit is set.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> ---
>>   drivers/nvme/host/tcp.c | 25 ++++++++++++++++++-------
>>   1 file changed, 18 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
>> index 70096a2e8762..4220c1ad6b29 100644
>> --- a/drivers/nvme/host/tcp.c
>> +++ b/drivers/nvme/host/tcp.c
>> @@ -2065,8 +2065,10 @@ static void nvme_tcp_teardown_io_queues(struct 
>> nvme_ctrl *ctrl,
>>       nvme_tcp_destroy_io_queues(ctrl, remove);
>>   }
>> -static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
>> +static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, int 
>> status)
>>   {
>> +    bool recon = true;
>> +
>>       /* If we are resetting/deleting then do nothing */
>>       if (ctrl->state != NVME_CTRL_CONNECTING) {
>>           WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
>> @@ -2074,7 +2076,12 @@ static void nvme_tcp_reconnect_or_remove(struct 
>> nvme_ctrl *ctrl)
>>           return;
>>       }
>> -    if (nvmf_should_reconnect(ctrl)) {
>> +    if (status > 0 && (status & NVME_SC_DNR)) {
>> +        dev_info(ctrl->device, "reconnect failure %d\n", status);
>> +        recon = false;
>> +    }
>> +
> 
> Why should we call this if we need to remove for sure?
> I suggest we just change the call-site.
> 
>> +    if (recon && nvmf_should_reconnect(ctrl)) {
>>           dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
>>               ctrl->opts->reconnect_delay);
>>           queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
>> @@ -2162,10 +2169,12 @@ static void 
>> nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
>>       struct nvme_tcp_ctrl *tcp_ctrl = 
>> container_of(to_delayed_work(work),
>>               struct nvme_tcp_ctrl, connect_work);
>>       struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
>> +    int ret;
>>       ++ctrl->nr_reconnects;
>> -    if (nvme_tcp_setup_ctrl(ctrl, false))
>> +    ret = nvme_tcp_setup_ctrl(ctrl, false);
>> +    if (ret)
>>           goto requeue;
> 
>      status = nvme_tcp_setup_ctrl(ctrl, false);
>      if (!status) {
>          dev_info(ctrl->device,
>              "Successfully reconnected (%d attempt)\n",
>                          ctrl->nr_reconnects);
>          ctrl->nr_reconnects = 0;
>      } else {
>          dev_info(ctrl->device,
>              "Failed reconnect attempt %d (%d)\n",
>                  ctrl->nr_reconnects, status);
>          if (status > 0 && (status & NVME_SC_DNR)) {
>              dev_info(ctrl->device,
>                  "Removing controller...\n");
>              nvme_delete_ctrl(ctrl);
>          } else {
>              nvme_tcp_reconnect_or_remove(ctrl);
>          }
>      }
> }
> 
Good point.

>>       dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
>> @@ -2178,7 +2187,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct 
>> work_struct *work)
>>   requeue:
>>       dev_info(ctrl->device, "Failed reconnect attempt %d\n",
>>               ctrl->nr_reconnects);
>> -    nvme_tcp_reconnect_or_remove(ctrl);
>> +    nvme_tcp_reconnect_or_remove(ctrl, ret);
>>   }
>>   static void nvme_tcp_error_recovery_work(struct work_struct *work)
>> @@ -2203,7 +2212,7 @@ static void nvme_tcp_error_recovery_work(struct 
>> work_struct *work)
>>           return;
>>       }
>> -    nvme_tcp_reconnect_or_remove(ctrl);
>> +    nvme_tcp_reconnect_or_remove(ctrl, -ENOTCONN);
> 
> This emphasizes that this is a redundant parameter.
> 

Ok, will be reworking it.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] nvme-tcp: short-circuit connect retries
  2022-07-15  6:33 [PATCHv2 0/2] nvme: short-circuit connection retries Hannes Reinecke
@ 2022-07-15  6:33 ` Hannes Reinecke
  0 siblings, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2022-07-15  6:33 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke

When a reconnect attempt fails with a non-retryable status
(eg when the subsystem has been unprovisioned) there hardly
is any reason to retry the reconnect attempt.
So check the actual error status from nvme_tcp_setup_ctrl(), and
call nvme_delete_ctrl() instead of calling nvme_tcp_reconnect_or_remove()
if the DNR bit is set.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/tcp.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c854e69defb0..a15e3ed02b64 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2157,10 +2157,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 	struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
 			struct nvme_tcp_ctrl, connect_work);
 	struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
+	int ret;
 
 	++ctrl->nr_reconnects;
 
-	if (nvme_tcp_setup_ctrl(ctrl, false))
+	ret = nvme_tcp_setup_ctrl(ctrl, false);
+	if (ret)
 		goto requeue;
 
 	dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
@@ -2173,6 +2175,12 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 requeue:
 	dev_info(ctrl->device, "Failed reconnect attempt %d\n",
 			ctrl->nr_reconnects);
+	if (ret > 0 && (ret & NVME_SC_DNR)) {
+		dev_info(ctrl->device,
+			 "Removing ctrl...\n");
+		nvme_delete_ctrl(ctrl);
+		return;
+	}
 	nvme_tcp_reconnect_or_remove(ctrl);
 }
 
@@ -2224,6 +2232,7 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 {
 	struct nvme_ctrl *ctrl =
 		container_of(work, struct nvme_ctrl, reset_work);
+	int ret;
 
 	nvme_stop_ctrl(ctrl);
 	nvme_tcp_teardown_ctrl(ctrl, false);
@@ -2235,9 +2244,15 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
 		return;
 	}
 
-	if (nvme_tcp_setup_ctrl(ctrl, false))
+	ret = nvme_tcp_setup_ctrl(ctrl, false);
+	if (ret) {
+		if (ret > 0 && (ret & NVME_SC_DNR)) {
+			dev_info(ctrl->device, "Removing ctrl...\n");
+			nvme_delete_ctrl(ctrl);
+			return;
+		}
 		goto out_fail;
-
+	}
 	return;
 
 out_fail:
-- 
2.29.2



^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-07-15  6:34 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-07-14 12:41 [PATCH 0/2] nvme: short-circuit connection retries Hannes Reinecke
2022-07-14 12:41 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke
2022-07-14 14:35   ` Sagi Grimberg
2022-07-14 15:17     ` Hannes Reinecke
2022-07-14 12:41 ` [PATCH 2/2] nvme-rdma: " Hannes Reinecke
  -- strict thread matches above, loose matches on Subject: below --
2022-07-15  6:33 [PATCHv2 0/2] nvme: short-circuit connection retries Hannes Reinecke
2022-07-15  6:33 ` [PATCH 1/2] nvme-tcp: short-circuit connect retries Hannes Reinecke

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox