Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Wagner <dwagner@suse.de>
To: Christoph Hellwig <hch@lst.de>
Cc: Keith Busch <kbusch@kernel.org>, Sagi Grimberg <sagi@grimberg.me>,
	James Smart <james.smart@broadcom.com>,
	Hannes Reinecke <hare@suse.de>,
	linux-nvme@lists.infradead.org, linux-kernel@vger.kernel.org,
	Daniel Wagner <dwagner@suse.de>
Subject: [PATCH v6 5/5] nvme-fabrics: handle transient auth failures
Date: Mon, 15 Apr 2024 14:42:20 +0200	[thread overview]
Message-ID: <20240415124220.5433-6-dwagner@suse.de> (raw)
In-Reply-To: <20240415124220.5433-1-dwagner@suse.de>

Retry authentication a couple of times to avoid error out on transient
errors. E.g. if a new key is deployed on the host and the target. At the
same time the connection drops. The host might use the wrong key to
reconnect.

Suggested-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/host/auth.c    |  4 ++--
 drivers/nvme/host/fabrics.c | 10 +++++++++-
 drivers/nvme/host/fc.c      |  2 ++
 drivers/nvme/host/nvme.h    |  1 +
 drivers/nvme/host/rdma.c    |  1 +
 drivers/nvme/host/tcp.c     |  1 +
 6 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index a264b3ae078b..368dcc6ee11b 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -797,7 +797,7 @@ static void nvme_queue_auth_work(struct work_struct *work)
 					 NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1);
 	if (ret) {
 		chap->status = ret;
-		chap->error = -ECONNREFUSED;
+		chap->error = -EKEYREJECTED;
 		return;
 	}
 
@@ -818,7 +818,7 @@ static void nvme_queue_auth_work(struct work_struct *work)
 	ret = nvme_auth_process_dhchap_success1(ctrl, chap);
 	if (ret) {
 		/* Controller authentication failed */
-		chap->error = -ECONNREFUSED;
+		chap->error = -EKEYREJECTED;
 		goto fail2;
 	}
 
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index d9a73b1b41c4..6dfa45dce232 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -573,8 +573,16 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
  */
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status)
 {
-	if (status < 0)
+	if (status < 0) {
+		/*
+		 * authentication errors can be transient, thus retry a couple
+		 * of times before giving up.
+		 */
+		if (status == -EKEYREJECTED &&
+		    ++ctrl->nr_auth_retries < 3)
+			return true;
 		return false;
+	}
 	else if (status > 0 && (status & NVME_SC_DNR))
 		return false;
 
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index f0b081332749..a22d2af18553 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3176,6 +3176,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 
 	ctrl->ctrl.nr_reconnects = 0;
+	ctrl->ctrl.nr_auth_retries = 0;
 
 	if (changed)
 		nvme_start_ctrl(&ctrl->ctrl);
@@ -3491,6 +3492,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	ctrl->ctrl.opts = opts;
 	ctrl->ctrl.nr_reconnects = 0;
+	ctrl->ctrl.nr_auth_retries = 0;
 	INIT_LIST_HEAD(&ctrl->ctrl_list);
 	ctrl->lport = lport;
 	ctrl->rport = rport;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9b8904a476b8..a9af20f25405 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -382,6 +382,7 @@ struct nvme_ctrl {
 	u16 icdoff;
 	u16 maxcmd;
 	int nr_reconnects;
+	int nr_auth_retries;
 	unsigned long flags;
 	struct nvmf_ctrl_options *opts;
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 821ab3e0fd3b..f5b3201a11b5 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1117,6 +1117,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 			ctrl->ctrl.nr_reconnects);
 
 	ctrl->ctrl.nr_reconnects = 0;
+	ctrl->ctrl.nr_auth_retries = 0;
 
 	return;
 
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 3e0c33323320..45270c626c87 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2266,6 +2266,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
 			ctrl->nr_reconnects);
 
 	ctrl->nr_reconnects = 0;
+	ctrl->nr_auth_retries = 0;
 
 	return;
 
-- 
2.44.0



  parent reply	other threads:[~2024-04-15 12:42 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-15 12:42 [PATCH v6 0/5] nvme-fabrics: short-circuit connect retries Daniel Wagner
2024-04-15 12:42 ` [PATCH v6 1/5] nvmet: lock config semaphore when accessing DH-HMAC-CHAP key Daniel Wagner
2024-04-15 12:42 ` [PATCH v6 2/5] nvmet: return DHCHAP status codes from nvmet_setup_auth() Daniel Wagner
2024-04-15 12:42 ` [PATCH v6 3/5] nvme: authentication error are always non-retryable Daniel Wagner
2024-04-15 20:54   ` Sagi Grimberg
2024-04-15 12:42 ` [PATCH v6 4/5] nvme-fabrics: short-circuit reconnect retries Daniel Wagner
2024-04-15 12:42 ` Daniel Wagner [this message]
2024-04-15 20:51   ` [PATCH v6 5/5] nvme-fabrics: handle transient auth failures Sagi Grimberg
2024-04-16  6:57     ` Daniel Wagner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240415124220.5433-6-dwagner@suse.de \
    --to=dwagner@suse.de \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=james.smart@broadcom.com \
    --cc=kbusch@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox