From: James Smart <jsmart2021@gmail.com>
To: Max Gurtovoy <mgurtovoy@nvidia.com>,
linux-nvme@lists.infradead.org, hch@lst.de, kbusch@kernel.org,
sagi@grimberg.me
Cc: chaitanyak@nvidia.com, israelr@nvidia.com, oren@nvidia.com, hare@suse.de
Subject: Re: [PATCH 02/10] nvme-fabrics: introduce nvmf_reconnect_or_remove API
Date: Tue, 2 Nov 2021 16:38:34 -0700 [thread overview]
Message-ID: <b4fb943d-3860-62d2-985e-0abd075bed48@gmail.com> (raw)
In-Reply-To: <20211020103844.7533-3-mgurtovoy@nvidia.com>
On 10/20/2021 3:38 AM, Max Gurtovoy wrote:
> This logic is duplicated today for RDMA and TCP controllers. Move it to
> the fabrics driver and export it as a new API.
>
> Also update the RDMA/TCP transport drivers to use this API and remove
> the duplicated code.
>
> Reviewed-by: Israel Rukshin <israelr@nvidia.com>
> Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
> ---
> drivers/nvme/host/fabrics.c | 21 +++++++++++++++++++++
> drivers/nvme/host/fabrics.h | 1 +
> drivers/nvme/host/rdma.c | 25 +++----------------------
> drivers/nvme/host/tcp.c | 26 +++-----------------------
> 4 files changed, 28 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index 668c6bb7a567..4a1ef67c6fb3 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -472,6 +472,27 @@ bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
> }
> EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
>
> +void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl)
> +{
> + /* If we are resetting/deleting then do nothing */
> + if (ctrl->state != NVME_CTRL_CONNECTING) {
> + WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
> + ctrl->state == NVME_CTRL_LIVE);
> + return;
> + }
> +
> + if (nvmf_should_reconnect(ctrl)) {
> + dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
> + ctrl->opts->reconnect_delay);
> + queue_delayed_work(nvme_wq, &ctrl->connect_work,
> + ctrl->opts->reconnect_delay * HZ);
> + } else {
> + dev_info(ctrl->device, "Removing controller...\n");
> + nvme_delete_ctrl(ctrl);
> + }
> +}
> +EXPORT_SYMBOL_GPL(nvmf_reconnect_or_remove);
> +
This won't be sufficient for FC so it can't use it. I'd have to think
if there's a way to restructure or wrapper it. But not a great fit.
I do think what FC is doing relative to NVME_SC_DNR should be done in
rdma/tcp as well.
In other words, this should minimally be:
void nvmf_reconnect_or_remove(struct nvme_ctrl *ctrl, int status)
{
/* If we are resetting/deleting then do nothing */
if (ctrl->state != NVME_CTRL_CONNECTING) {
WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
ctrl->state == NVME_CTRL_LIVE);
return;
}
if (!(status > 0 && status & NVME_SC_DNR) &&
nvmf_should_reconnect(ctrl)) {
dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
ctrl->opts->reconnect_delay);
queue_delayed_work(nvme_wq, &ctrl->connect_work,
ctrl->opts->reconnect_delay * HZ);
} else {
dev_info(ctrl->device, "Removing controller...\n");
nvme_delete_ctrl(ctrl);
}
}
EXPORT_SYMBOL_GPL(nvmf_reconnect_or_remove);
then change the callee's to set status to pass the return value from the
status that caused the reschedule. It'll either be set to a -Exxx value
or to a NVME status code returned by one of the core routines during the
controller init. This allows an uncorrectable failure during controller
init will just fail w/o rescheduling.
...
> @@ -1181,7 +1162,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
> requeue:
> dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
> ctrl->ctrl.nr_reconnects);
> - nvme_rdma_reconnect_or_remove(ctrl);
> + nvmf_reconnect_or_remove(&ctrl->ctrl);
This would become:
@@ -2,10 +2,12 @@ static void nvme_rdma_reconnect_ctrl_wor
{
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
struct nvme_rdma_ctrl, reconnect_work);
+ int ret;
++ctrl->ctrl.nr_reconnects;
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto requeue;
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
@@ -18,5 +20,5 @@ static void nvme_rdma_reconnect_ctrl_wor
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.nr_reconnects);
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
> }
>
> static void nvme_rdma_error_recovery_work(struct work_struct *work)
> @@ -1202,7 +1183,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
> return;
> }
>
> - nvme_rdma_reconnect_or_remove(ctrl);
> + nvmf_reconnect_or_remove(&ctrl->ctrl);
> }
@@ -16,5 +16,5 @@ static void nvme_rdma_error_recovery_wor
return;
}
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, 0);
}
>
> static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
> @@ -2265,7 +2246,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
>
> out_fail:
> ++ctrl->ctrl.nr_reconnects;
> - nvme_rdma_reconnect_or_remove(ctrl);
> + nvmf_reconnect_or_remove(&ctrl->ctrl);
> }
@@ -2,6 +2,7 @@ static void nvme_rdma_reset_ctrl_work(st
{
struct nvme_rdma_ctrl *ctrl =
container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
+ int ret;
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, false);
@@ -12,12 +13,13 @@ static void nvme_rdma_reset_ctrl_work(st
return;
}
- if (nvme_rdma_setup_ctrl(ctrl, false))
+ ret = nvme_rdma_setup_ctrl(ctrl, false);
+ if (ret)
goto out_fail;
return;
out_fail:
++ctrl->ctrl.nr_reconnects;
- nvme_rdma_reconnect_or_remove(ctrl);
+ nvme_rdma_reconnect_or_remove(ctrl, ret);
}
And similar mods to tcp.
-- james
next prev parent reply other threads:[~2021-11-02 23:38 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-10-20 10:38 [PATCH v2 0/10] Centrelize common fabrics code to core drivers Max Gurtovoy
2021-10-20 10:38 ` [PATCH 01/10] nvme: add connect_work attribute to nvme ctrl Max Gurtovoy
2021-11-02 22:59 ` James Smart
2021-10-20 10:38 ` [PATCH 02/10] nvme-fabrics: introduce nvmf_reconnect_or_remove API Max Gurtovoy
2021-11-02 23:38 ` James Smart [this message]
2021-10-20 10:38 ` [PATCH 03/10] nvme: add err_work attribute to nvme ctrl Max Gurtovoy
2021-10-20 11:05 ` Hannes Reinecke
2021-11-02 23:53 ` James Smart
2021-10-20 10:38 ` [PATCH 04/10] nvme-fabrics: introduce nvmf_error_recovery API Max Gurtovoy
2021-11-02 23:59 ` James Smart
2021-10-20 10:38 ` [PATCH 05/10] nvme/nvme-fabrics: introduce nvmf_error_recovery_work API Max Gurtovoy
2021-11-03 0:04 ` James Smart
2021-10-20 10:38 ` [PATCH 06/10] nvme/nvme-fabrics: introduce nvmf_reconnect_ctrl_work API Max Gurtovoy
2021-11-03 0:15 ` James Smart
2021-10-20 10:38 ` [PATCH 07/10] nvme-fabrics: add nvmf_init_ctrl/nvmf_uninit_ctrl API Max Gurtovoy
2021-11-03 0:19 ` James Smart
2021-10-20 10:38 ` [PATCH 08/10] nvme-rdma: update WARN_ON condition during reset Max Gurtovoy
2021-10-20 10:38 ` [PATCH 09/10] nvme/nvme-fabrics: move reset ctrl flow to common code Max Gurtovoy
2021-11-03 0:27 ` James Smart
2021-10-20 10:38 ` [PATCH 10/10] nvme-fabrics: set common attributes during nvmf_init_ctrl Max Gurtovoy
2021-11-03 0:30 ` James Smart
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b4fb943d-3860-62d2-985e-0abd075bed48@gmail.com \
--to=jsmart2021@gmail.com \
--cc=chaitanyak@nvidia.com \
--cc=hare@suse.de \
--cc=hch@lst.de \
--cc=israelr@nvidia.com \
--cc=kbusch@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=mgurtovoy@nvidia.com \
--cc=oren@nvidia.com \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox