public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed
From: Mohamed Khalfella <mkhalfella@purestorage.com>
To: Chaitanya Kulkarni <kch@nvidia.com>,
	Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>,
	Keith Busch <kbusch@kernel.org>, Sagi Grimberg <sagi@grimberg.me>
Cc: Aaron Dailey <adailey@purestorage.com>,
	Randy Jennings <randyj@purestorage.com>,
	John Meneghini <jmeneghi@redhat.com>,
	Hannes Reinecke <hare@suse.de>,
	linux-nvme@lists.infradead.org, linux-kernel@vger.kernel.org,
	Mohamed Khalfella <mkhalfella@purestorage.com>
Subject: [RFC PATCH 01/14] nvmet: Rapid Path Failure Recovery set controller identify fields
Date: Tue, 25 Nov 2025 18:11:48 -0800	[thread overview]
Message-ID: <20251126021250.2583630-2-mkhalfella@purestorage.com> (raw)
In-Reply-To: <20251126021250.2583630-1-mkhalfella@purestorage.com>

TP8028 Rapid Path Failure Recovery defined new fields in controller
identify response. The newly defined fields are:

- CIU (Controller Instance UNIQUIFIER): is an 8bit non-zero value that
is assigned a random value when controller first created. The value is
expected to be incremented when RDY bit in CSTS register is asserted
- CIRN (Controller Instance Random Number): is 64bit random value that
gets generated when controller is crated. CIRN is regenerated everytime
RDY bit is CSTS register is asserted.
- CCRL (Cross-Controller Reset Limit) is an 8bit value that defines the
maximum number of in-progress controller reset operations. CCRL is
hardcoded to 4 as recommended by TP8028.

TP4129 KATO Corrections and Clarifications defined CQT (Command Quiesce
Time) which is used along with KATO (Keep Alive Timeout) to set an upper
time limit for attempting Cross-Controller Recovery.

Make the new fields available for IO controllers only since TP8028 is
not very useful for discovery controllers.

Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
---
 drivers/nvme/target/admin-cmd.c |  6 ++++++
 drivers/nvme/target/core.c      |  9 +++++++++
 drivers/nvme/target/nvmet.h     |  2 ++
 include/linux/nvme.h            | 15 ++++++++++++---
 4 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 3e378153a781..aaceb697e4d2 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -696,6 +696,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 
 	id->cntlid = cpu_to_le16(ctrl->cntlid);
 	id->ver = cpu_to_le32(ctrl->subsys->ver);
+	if (!nvmet_is_disc_subsys(ctrl->subsys)) {
+		id->cqt = NVMF_CQT_MS;
+		id->ciu = ctrl->uniquifier;
+		id->cirn = cpu_to_le64(ctrl->random);
+		id->ccrl = NVMF_CCR_LIMIT;
+	}
 
 	/* XXX: figure out what to do about RTD3R/RTD3 */
 	id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 5d7d483bfbe3..409928202503 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1393,6 +1393,10 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 		return;
 	}
 
+	if (!nvmet_is_disc_subsys(ctrl->subsys)) {
+		ctrl->uniquifier = ((u8)(ctrl->uniquifier + 1)) ? : 1;
+		ctrl->random = get_random_u64();
+	}
 	ctrl->csts = NVME_CSTS_RDY;
 
 	/*
@@ -1662,6 +1666,11 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
 	}
 	ctrl->cntlid = ret;
 
+	if (!nvmet_is_disc_subsys(ctrl->subsys)) {
+		ctrl->uniquifier = get_random_u8() ? : 1;
+		ctrl->random = get_random_u64();
+	}
+
 	/*
 	 * Discovery controllers may use some arbitrary high value
 	 * in order to cleanup stale discovery sessions
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 51df72f5e89b..4195c9eff1da 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -263,7 +263,9 @@ struct nvmet_ctrl {
 
 	uuid_t			hostid;
 	u16			cntlid;
+	u8			uniquifier;
 	u32			kato;
+	u64			random;
 
 	struct nvmet_port	*port;
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 655d194f8e72..5135cdc3c120 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -21,6 +21,9 @@
 #define NVMF_TRADDR_SIZE	256
 #define NVMF_TSAS_SIZE		256
 
+#define NVMF_CQT_MS		0
+#define NVMF_CCR_LIMIT		4
+
 #define NVME_DISC_SUBSYS_NAME	"nqn.2014-08.org.nvmexpress.discovery"
 
 #define NVME_NSID_ALL		0xffffffff
@@ -328,7 +331,10 @@ struct nvme_id_ctrl {
 	__le16			crdt1;
 	__le16			crdt2;
 	__le16			crdt3;
-	__u8			rsvd134[122];
+	__u8			rsvd134[1];
+	__u8			ciu;
+	__le64			cirn;
+	__u8			rsvd144[112];
 	__le16			oacs;
 	__u8			acl;
 	__u8			aerl;
@@ -362,7 +368,9 @@ struct nvme_id_ctrl {
 	__u8			anacap;
 	__le32			anagrpmax;
 	__le32			nanagrpid;
-	__u8			rsvd352[160];
+	__u8			rsvd352[34];
+	__le16			cqt;
+	__u8			rsvd388[124];
 	__u8			sqes;
 	__u8			cqes;
 	__le16			maxcmd;
@@ -389,7 +397,8 @@ struct nvme_id_ctrl {
 	__u8			msdbd;
 	__u8			rsvd1804[2];
 	__u8			dctype;
-	__u8			rsvd1807[241];
+	__u8			ccrl;
+	__u8			rsvd1808[240];
 	struct nvme_id_power_state	psd[32];
 	__u8			vs[1024];
 };
-- 
2.51.2



  reply	other threads:[~2025-11-26  2:13 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-26  2:11 [RFC PATCH 00/14] TP8028 Rapid Path Failure Recovery Mohamed Khalfella
2025-11-26  2:11 ` Mohamed Khalfella [this message]
2025-12-16  1:35   ` [RFC PATCH 01/14] nvmet: Rapid Path Failure Recovery set controller identify fields Randy Jennings
2025-11-26  2:11 ` [RFC PATCH 02/14] nvmet/debugfs: Add ctrl uniquifier and random values Mohamed Khalfella
2025-12-16  1:43   ` Randy Jennings
2025-11-26  2:11 ` [RFC PATCH 03/14] nvmet: Implement CCR nvme command Mohamed Khalfella
2025-12-16  3:01   ` Randy Jennings
2025-12-31 21:14     ` Mohamed Khalfella
2025-12-25 13:14   ` Sagi Grimberg
2025-12-25 17:33     ` Mohamed Khalfella
2025-12-27  9:39       ` Sagi Grimberg
2025-12-31 21:35         ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 04/14] nvmet: Implement CCR logpage Mohamed Khalfella
2025-12-16  3:11   ` Randy Jennings
2025-11-26  2:11 ` [RFC PATCH 05/14] nvmet: Send an AEN on CCR completion Mohamed Khalfella
2025-12-16  3:31   ` Randy Jennings
2025-12-25 13:23   ` Sagi Grimberg
2025-12-25 18:13     ` Mohamed Khalfella
2025-12-27  9:48       ` Sagi Grimberg
2025-12-31 22:00         ` Mohamed Khalfella
2026-01-04 21:09           ` Sagi Grimberg
2026-01-07  2:58             ` Randy Jennings
2026-01-30 22:31             ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 06/14] nvme: Rapid Path Failure Recovery read controller identify fields Mohamed Khalfella
2025-12-18 15:22   ` Randy Jennings
2025-12-31 22:26     ` Mohamed Khalfella
2026-01-02 19:06       ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 07/14] nvme: Add RECOVERING nvme controller state Mohamed Khalfella
2025-12-18 23:18   ` Randy Jennings
2025-12-19  1:39     ` Randy Jennings
2025-12-25 13:29   ` Sagi Grimberg
2025-12-25 17:17     ` Mohamed Khalfella
2025-12-27  9:52       ` Sagi Grimberg
2025-12-31 22:45         ` Mohamed Khalfella
2025-12-27  9:55       ` Sagi Grimberg
2025-12-31 22:36         ` Mohamed Khalfella
2025-12-31 23:04           ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 08/14] nvme: Implement cross-controller reset recovery Mohamed Khalfella
2025-12-19  1:21   ` Randy Jennings
2025-12-27 10:14   ` Sagi Grimberg
2025-12-31  0:04     ` Randy Jennings
2026-01-04 21:14       ` Sagi Grimberg
2026-01-07  3:16         ` Randy Jennings
2025-12-31 23:43     ` Mohamed Khalfella
2026-01-04 21:39       ` Sagi Grimberg
2026-01-30 22:01         ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 09/14] nvme: Implement cross-controller reset completion Mohamed Khalfella
2025-12-19  1:31   ` Randy Jennings
2025-12-27 10:24   ` Sagi Grimberg
2025-12-31 23:51     ` Mohamed Khalfella
2026-01-04 21:15       ` Sagi Grimberg
2026-01-30 22:32         ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 10/14] nvme-tcp: Use CCR to recover controller that hits an error Mohamed Khalfella
2025-12-19  2:06   ` Randy Jennings
2026-01-01  0:04     ` Mohamed Khalfella
2025-12-27 10:35   ` Sagi Grimberg
2025-12-31  0:13     ` Randy Jennings
2026-01-04 21:19       ` Sagi Grimberg
2026-01-01  0:27     ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 11/14] nvme-rdma: " Mohamed Khalfella
2025-12-19  2:16   ` Randy Jennings
2025-12-27 10:36   ` Sagi Grimberg
2025-11-26  2:11 ` [RFC PATCH 12/14] nvme-fc: Decouple error recovery from controller reset Mohamed Khalfella
2025-12-19  2:59   ` Randy Jennings
2025-11-26  2:12 ` [RFC PATCH 13/14] nvme-fc: Use CCR to recover controller that hits an error Mohamed Khalfella
2025-12-20  1:21   ` Randy Jennings
2025-11-26  2:12 ` [RFC PATCH 14/14] nvme-fc: Hold inflight requests while in RECOVERING state Mohamed Khalfella
2025-12-20  1:44   ` Randy Jennings

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251126021250.2583630-2-mkhalfella@purestorage.com \
    --to=mkhalfella@purestorage.com \
    --cc=adailey@purestorage.com \
    --cc=axboe@kernel.dk \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=jmeneghi@redhat.com \
    --cc=kbusch@kernel.org \
    --cc=kch@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=randyj@purestorage.com \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox