Re: [PATCH 02/10] nvme-fabrics: introduce nvmf_reconnect_or_remove API

public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed

From: James Smart <jsmart2021@gmail.com>
To: Max Gurtovoy <mgurtovoy@nvidia.com>,
	linux-nvme@lists.infradead.org, hch@lst.de, kbusch@kernel.org,
	sagi@grimberg.me
Cc: chaitanyak@nvidia.com, israelr@nvidia.com, oren@nvidia.com, hare@suse.de
Subject: Re: [PATCH 02/10] nvme-fabrics: introduce nvmf_reconnect_or_remove API
Date: Tue, 2 Nov 2021 16:38:34 -0700	[thread overview]
Message-ID: <b4fb943d-3860-62d2-985e-0abd075bed48@gmail.com> (raw)
In-Reply-To: <20211020103844.7533-3-mgurtovoy@nvidia.com>

On 10/20/2021 3:38 AM, Max Gurtovoy wrote:
> This logic is duplicated today for RDMA and TCP controllers. Move it to
> the fabrics driver and export it as a new API.
> 
> Also update the RDMA/TCP transport drivers to use this API and remove
> the duplicated code.
> 
> Reviewed-by: Israel Rukshin <israelr@nvidia.com>
> Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
> ---
>   drivers/nvme/host/fabrics.c | 21 +++++++++++++++++++++
>   drivers/nvme/host/fabrics.h |  1 +
>   drivers/nvme/host/rdma.c    | 25 +++----------------------
>   drivers/nvme/host/tcp.c     | 26 +++-----------------------
>   4 files changed, 28 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index 668c6bb7a567..4a1ef67c6fb3 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -472,6 +472,27 @@ bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
>   }
>   EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
>   
> +void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl)
> +{
> +	/* If we are resetting/deleting then do nothing */
> +	if (ctrl->state != NVME_CTRL_CONNECTING) {
> +		WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
> +			ctrl->state == NVME_CTRL_LIVE);
> +		return;
> +	}
> +
> +	if (nvmf_should_reconnect(ctrl)) {
> +		dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
> +			ctrl->opts->reconnect_delay);
> +		queue_delayed_work(nvme_wq, &ctrl->connect_work,
> +				ctrl->opts->reconnect_delay * HZ);
> +	} else {
> +		dev_info(ctrl->device, "Removing controller...\n");
> +		nvme_delete_ctrl(ctrl);
> +	}
> +}
> +EXPORT_SYMBOL_GPL(nvmf_reconnect_or_remove);
> +

This won't be sufficient for FC so it can't use it.  I'd have to think 
if there's a way to restructure or wrapper it. But not a great fit.

I do think what FC is doing relative to NVME_SC_DNR should be done in 
rdma/tcp as well.

In other words, this should minimally be:

void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
{
         /* If we are resetting/deleting then do nothing */
         if (ctrl->state != NVME_CTRL_CONNECTING) {
                 WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
                         ctrl->state == NVME_CTRL_LIVE);
                 return;
         }

         if (!(status > 0 && status & NVME_SC_DNR) &&
             nvmf_should_reconnect(ctrl)) {
                 dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
                         ctrl->opts->reconnect_delay);
                 queue_delayed_work(nvme_wq, &ctrl->connect_work,
                                 ctrl->opts->reconnect_delay * HZ);
         } else {
                 dev_info(ctrl->device, "Removing controller...\n");
                 nvme_delete_ctrl(ctrl);
         }
}
EXPORT_SYMBOL_GPL(nvmf_reconnect_or_remove);

then change the callee's to set status to pass the return value from the 
status that caused the reschedule. It'll either be set to a -Exxx value 
or to a NVME status code returned by one of the core routines during the 
controller init. This allows an uncorrectable failure during controller 
init will just fail w/o rescheduling.


...
> @@ -1181,7 +1162,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
>   requeue:
>   	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
>   			ctrl->ctrl.nr_reconnects);
> -	nvme_rdma_reconnect_or_remove(ctrl);
> +	nvmf_reconnect_or_remove(&ctrl->ctrl);

This would become:

@@ -2,10 +2,12 @@ static void nvme_rdma_reconnect_ctrl_wor
  {
  	struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
  			struct nvme_rdma_ctrl, reconnect_work);
+	int ret;

  	++ctrl->ctrl.nr_reconnects;

-	if (nvme_rdma_setup_ctrl(ctrl, false))
+	ret = nvme_rdma_setup_ctrl(ctrl, false);
+	if (ret)
  		goto requeue;

  	dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -18,5 +20,5 @@ static void nvme_rdma_reconnect_ctrl_wor
  requeue:
  	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
  			ctrl->ctrl.nr_reconnects);
-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, ret);
  }


>   }
>   
>   static void nvme_rdma_error_recovery_work(struct work_struct *work)
> @@ -1202,7 +1183,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
>   		return;
>   	}
>   
> -	nvme_rdma_reconnect_or_remove(ctrl);
> +	nvmf_reconnect_or_remove(&ctrl->ctrl);
>   }

@@ -16,5 +16,5 @@ static void nvme_rdma_error_recovery_wor
  		return;
  	}

-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, 0);
  }


>   
>   static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
> @@ -2265,7 +2246,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
>   
>   out_fail:
>   	++ctrl->ctrl.nr_reconnects;
> -	nvme_rdma_reconnect_or_remove(ctrl);
> +	nvmf_reconnect_or_remove(&ctrl->ctrl);
>   }

@@ -2,6 +2,7 @@ static void nvme_rdma_reset_ctrl_work(st
  {
  	struct nvme_rdma_ctrl *ctrl =
  		container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+	int ret;

  	nvme_stop_ctrl(&ctrl->ctrl);
  	nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -12,12 +13,13 @@ static void nvme_rdma_reset_ctrl_work(st
  		return;
  	}

-	if (nvme_rdma_setup_ctrl(ctrl, false))
+	ret = nvme_rdma_setup_ctrl(ctrl, false);
+	if (ret)
  		goto out_fail;

  	return;

  out_fail:
  	++ctrl->ctrl.nr_reconnects;
-	nvme_rdma_reconnect_or_remove(ctrl);
+	nvme_rdma_reconnect_or_remove(ctrl, ret);
  }


And similar mods to tcp.

-- james

next prev parent reply	other threads:[~2021-11-02 23:38 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-20 10:38 [PATCH v2 0/10] Centrelize common fabrics code to core drivers Max Gurtovoy
2021-10-20 10:38 ` [PATCH 01/10] nvme: add connect_work attribute to nvme ctrl Max Gurtovoy
2021-11-02 22:59   ` James Smart
2021-10-20 10:38 ` [PATCH 02/10] nvme-fabrics: introduce nvmf_reconnect_or_remove API Max Gurtovoy
2021-11-02 23:38   ` James Smart [this message]
2021-10-20 10:38 ` [PATCH 03/10] nvme: add err_work attribute to nvme ctrl Max Gurtovoy
2021-10-20 11:05   ` Hannes Reinecke
2021-11-02 23:53   ` James Smart
2021-10-20 10:38 ` [PATCH 04/10] nvme-fabrics: introduce nvmf_error_recovery API Max Gurtovoy
2021-11-02 23:59   ` James Smart
2021-10-20 10:38 ` [PATCH 05/10] nvme/nvme-fabrics: introduce nvmf_error_recovery_work API Max Gurtovoy
2021-11-03  0:04   ` James Smart
2021-10-20 10:38 ` [PATCH 06/10] nvme/nvme-fabrics: introduce nvmf_reconnect_ctrl_work API Max Gurtovoy
2021-11-03  0:15   ` James Smart
2021-10-20 10:38 ` [PATCH 07/10] nvme-fabrics: add nvmf_init_ctrl/nvmf_uninit_ctrl API Max Gurtovoy
2021-11-03  0:19   ` James Smart
2021-10-20 10:38 ` [PATCH 08/10] nvme-rdma: update WARN_ON condition during reset Max Gurtovoy
2021-10-20 10:38 ` [PATCH 09/10] nvme/nvme-fabrics: move reset ctrl flow to common code Max Gurtovoy
2021-11-03  0:27   ` James Smart
2021-10-20 10:38 ` [PATCH 10/10] nvme-fabrics: set common attributes during nvmf_init_ctrl Max Gurtovoy
2021-11-03  0:30   ` James Smart

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b4fb943d-3860-62d2-985e-0abd075bed48@gmail.com \
    --to=jsmart2021@gmail.com \
    --cc=chaitanyak@nvidia.com \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=israelr@nvidia.com \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=mgurtovoy@nvidia.com \
    --cc=oren@nvidia.com \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox