Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Mohamed Khalfella <mkhalfella@purestorage.com>
To: Chaitanya Kulkarni <kch@nvidia.com>,
	Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>,
	Keith Busch <kbusch@kernel.org>, Sagi Grimberg <sagi@grimberg.me>
Cc: Aaron Dailey <adailey@purestorage.com>,
	Randy Jennings <randyj@purestorage.com>,
	John Meneghini <jmeneghi@redhat.com>,
	Hannes Reinecke <hare@suse.de>,
	linux-nvme@lists.infradead.org, linux-kernel@vger.kernel.org,
	Mohamed Khalfella <mkhalfella@purestorage.com>
Subject: [RFC PATCH 03/14] nvmet: Implement CCR nvme command
Date: Tue, 25 Nov 2025 18:11:50 -0800	[thread overview]
Message-ID: <20251126021250.2583630-4-mkhalfella@purestorage.com> (raw)
In-Reply-To: <20251126021250.2583630-1-mkhalfella@purestorage.com>

Defined by TP8028 Rapid Path Failure Recovery, CCR (Cross-Controller
Reset) command is an nvme command the is issued to source controller by
initiator to reset impacted controller. Implement CCR command for linux
nvme target.

Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
---
 drivers/nvme/target/admin-cmd.c | 79 +++++++++++++++++++++++++++++++++
 drivers/nvme/target/core.c      | 69 ++++++++++++++++++++++++++++
 drivers/nvme/target/nvmet.h     | 13 ++++++
 include/linux/nvme.h            | 23 ++++++++++
 4 files changed, 184 insertions(+)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index aaceb697e4d2..a55ca010d34f 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -376,7 +376,9 @@ static void nvmet_get_cmd_effects_admin(struct nvmet_ctrl *ctrl,
 	log->acs[nvme_admin_get_features] =
 	log->acs[nvme_admin_async_event] =
 	log->acs[nvme_admin_keep_alive] =
+	log->acs[nvme_admin_cross_ctrl_reset] =
 		cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
+
 }
 
 static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
@@ -1615,6 +1617,80 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
 	nvmet_req_complete(req, status);
 }
 
+void nvmet_execute_cross_ctrl_reset(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ictrl, *ctrl = req->sq->ctrl;
+	struct nvme_command *cmd = req->cmd;
+	struct nvmet_ccr *ccr, *new_ccr;
+	int ccr_active, ccr_total;
+	u16 cntlid, status = 0;
+
+	cntlid = le16_to_cpu(cmd->ccr.icid);
+	if (ctrl->cntlid == cntlid) {
+		req->error_loc =
+			offsetof(struct nvme_cross_ctrl_reset_cmd, icid);
+		status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+		goto out;
+	}
+
+	ictrl = nvmet_ctrl_find_get_ccr(ctrl->subsys, ctrl->hostnqn,
+					cmd->ccr.ciu, cntlid,
+					le64_to_cpu(cmd->ccr.cirn));
+	if (!ictrl) {
+		/* Immediate Reset Successful */
+		nvmet_set_result(req, 1);
+		status = NVME_SC_SUCCESS;
+		goto out;
+	}
+
+	new_ccr = kmalloc(sizeof(*ccr), GFP_KERNEL);
+	if (!new_ccr) {
+		status = NVME_SC_INTERNAL;
+		goto out_put_ctrl;
+	}
+
+	ccr_total = ccr_active = 0;
+	mutex_lock(&ctrl->lock);
+	list_for_each_entry(ccr, &ctrl->ccrs, entry) {
+		if (ccr->ctrl == ictrl) {
+			status = NVME_SC_CCR_IN_PROGRESS | NVME_STATUS_DNR;
+			goto out_unlock;
+		}
+
+		ccr_total++;
+		if (ccr->ctrl)
+			ccr_active++;
+	}
+
+	if (ccr_active >= NVMF_CCR_LIMIT) {
+		status = NVME_SC_CCR_LIMIT_EXCEEDED;
+		goto out_unlock;
+	}
+	if (ccr_total >= NVMF_CCR_PER_PAGE) {
+		status = NVME_SC_CCR_LOGPAGE_FULL;
+		goto out_unlock;
+	}
+
+	new_ccr->ciu = cmd->ccr.ciu;
+	new_ccr->icid = cntlid;
+	new_ccr->ctrl = ictrl;
+	list_add_tail(&new_ccr->entry, &ctrl->ccrs);
+	mutex_unlock(&ctrl->lock);
+
+	nvmet_ctrl_fatal_error(ictrl);
+	nvmet_ctrl_put(ictrl);
+	nvmet_req_complete(req, 0);
+	return;
+
+out_unlock:
+	mutex_unlock(&ctrl->lock);
+	kfree(new_ccr);
+out_put_ctrl:
+	nvmet_ctrl_put(ictrl);
+out:
+	nvmet_req_complete(req, status);
+}
+
 u32 nvmet_admin_cmd_data_len(struct nvmet_req *req)
 {
 	struct nvme_command *cmd = req->cmd;
@@ -1692,6 +1768,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 	case nvme_admin_keep_alive:
 		req->execute = nvmet_execute_keep_alive;
 		return 0;
+	case nvme_admin_cross_ctrl_reset:
+		req->execute = nvmet_execute_cross_ctrl_reset;
+		return 0;
 	default:
 		return nvmet_report_invalid_opcode(req);
 	}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 409928202503..7dbe9255ff42 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -114,6 +114,20 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
 	return 0;
 }
 
+void nvmet_ctrl_cleanup_ccrs(struct nvmet_ctrl *ctrl, bool all)
+{
+	struct nvmet_ccr *ccr, *tmp;
+
+	lockdep_assert_held(&ctrl->lock);
+
+	list_for_each_entry_safe(ccr, tmp, &ctrl->ccrs, entry) {
+		if (all || ccr->ctrl == NULL) {
+			list_del(&ccr->entry);
+			kfree(ccr);
+		}
+	}
+}
+
 static u32 nvmet_max_nsid(struct nvmet_subsys *subsys)
 {
 	struct nvmet_ns *cur;
@@ -1396,6 +1410,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 	if (!nvmet_is_disc_subsys(ctrl->subsys)) {
 		ctrl->uniquifier = ((u8)(ctrl->uniquifier + 1)) ? : 1;
 		ctrl->random = get_random_u64();
+		nvmet_ctrl_cleanup_ccrs(ctrl, false);
 	}
 	ctrl->csts = NVME_CSTS_RDY;
 
@@ -1501,6 +1516,38 @@ struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
 	return ctrl;
 }
 
+struct nvmet_ctrl *nvmet_ctrl_find_get_ccr(struct nvmet_subsys *subsys,
+					   const char *hostnqn, u8 ciu,
+					   u16 cntlid, u64 cirn)
+{
+	struct nvmet_ctrl *ctrl;
+	bool found = false;
+
+	mutex_lock(&subsys->lock);
+	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+		if (ctrl->cntlid != cntlid)
+			continue;
+		if (strncmp(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE))
+			continue;
+
+		/* Avoid racing with a controller that is becoming ready */
+		mutex_lock(&ctrl->lock);
+		if (ctrl->uniquifier == ciu && ctrl->random == cirn)
+			found = true;
+		mutex_unlock(&ctrl->lock);
+
+		if (found) {
+			if (kref_get_unless_zero(&ctrl->ref))
+				goto out;
+			break;
+		}
+	};
+	ctrl = NULL;
+out:
+	mutex_unlock(&subsys->lock);
+	return ctrl;
+}
+
 u16 nvmet_check_ctrl_status(struct nvmet_req *req)
 {
 	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
@@ -1626,6 +1673,7 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
 		subsys->clear_ids = 1;
 #endif
 
+	INIT_LIST_HEAD(&ctrl->ccrs);
 	INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
 	INIT_LIST_HEAD(&ctrl->async_events);
 	INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
@@ -1740,12 +1788,33 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
 }
 EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl);
 
+static void nvmet_ctrl_complete_pending_ccr(struct nvmet_ctrl *ctrl)
+{
+	struct nvmet_subsys *subsys = ctrl->subsys;
+	struct nvmet_ctrl *sctrl;
+	struct nvmet_ccr *ccr;
+
+	mutex_lock(&ctrl->lock);
+	nvmet_ctrl_cleanup_ccrs(ctrl, true);
+	mutex_unlock(&ctrl->lock);
+
+	list_for_each_entry(sctrl, &subsys->ctrls, subsys_entry) {
+		mutex_lock(&sctrl->lock);
+		list_for_each_entry(ccr, &sctrl->ccrs, entry) {
+			if (ccr->ctrl == ctrl)
+				ccr->ctrl = NULL;
+		}
+		mutex_unlock(&sctrl->lock);
+	}
+}
+
 static void nvmet_ctrl_free(struct kref *ref)
 {
 	struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
 	struct nvmet_subsys *subsys = ctrl->subsys;
 
 	mutex_lock(&subsys->lock);
+	nvmet_ctrl_complete_pending_ccr(ctrl);
 	nvmet_ctrl_destroy_pr(ctrl);
 	nvmet_release_p2p_ns_map(ctrl);
 	list_del(&ctrl->subsys_entry);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 4195c9eff1da..6c0091b8af8b 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -267,6 +267,7 @@ struct nvmet_ctrl {
 	u32			kato;
 	u64			random;
 
+	struct list_head	ccrs;
 	struct nvmet_port	*port;
 
 	u32			aen_enabled;
@@ -314,6 +315,13 @@ struct nvmet_ctrl {
 	struct nvmet_pr_log_mgr pr_log_mgr;
 };
 
+struct nvmet_ccr {
+	struct nvmet_ctrl	*ctrl;
+	struct list_head	entry;
+	u16			icid;
+	u8			ciu;
+};
+
 struct nvmet_subsys {
 	enum nvme_subsys_type	type;
 
@@ -576,6 +584,7 @@ void nvmet_req_free_sgls(struct nvmet_req *req);
 void nvmet_execute_set_features(struct nvmet_req *req);
 void nvmet_execute_get_features(struct nvmet_req *req);
 void nvmet_execute_keep_alive(struct nvmet_req *req);
+void nvmet_execute_cross_ctrl_reset(struct nvmet_req *req);
 
 u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create);
 u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create);
@@ -618,6 +627,10 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args);
 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
 				       const char *hostnqn, u16 cntlid,
 				       struct nvmet_req *req);
+struct nvmet_ctrl *nvmet_ctrl_find_get_ccr(struct nvmet_subsys *subsys,
+					   const char *hostnqn, u8 ciu,
+					   u16 cntlid, u64 cirn);
+void nvmet_ctrl_cleanup_ccrs(struct nvmet_ctrl *ctrl, bool all);
 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
 u16 nvmet_check_ctrl_status(struct nvmet_req *req);
 ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl,
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 5135cdc3c120..0f305b317aa3 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -23,6 +23,7 @@
 
 #define NVMF_CQT_MS		0
 #define NVMF_CCR_LIMIT		4
+#define NVMF_CCR_PER_PAGE	511
 
 #define NVME_DISC_SUBSYS_NAME	"nqn.2014-08.org.nvmexpress.discovery"
 
@@ -1225,6 +1226,22 @@ struct nvme_zone_mgmt_recv_cmd {
 	__le32			cdw14[2];
 };
 
+struct nvme_cross_ctrl_reset_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__le64			rsvd2[2];
+	union nvme_data_ptr	dptr;
+	__u8			rsvd10;
+	__u8			ciu;
+	__le16			icid;
+	__le32			cdw11;
+	__le64			cirn;
+	__le32			cdw14;
+	__le32			cdw15;
+};
+
 struct nvme_io_mgmt_recv_cmd {
 	__u8			opcode;
 	__u8			flags;
@@ -1323,6 +1340,7 @@ enum nvme_admin_opcode {
 	nvme_admin_virtual_mgmt		= 0x1c,
 	nvme_admin_nvme_mi_send		= 0x1d,
 	nvme_admin_nvme_mi_recv		= 0x1e,
+	nvme_admin_cross_ctrl_reset	= 0x38,
 	nvme_admin_dbbuf		= 0x7C,
 	nvme_admin_format_nvm		= 0x80,
 	nvme_admin_security_send	= 0x81,
@@ -1356,6 +1374,7 @@ enum nvme_admin_opcode {
 		nvme_admin_opcode_name(nvme_admin_virtual_mgmt),	\
 		nvme_admin_opcode_name(nvme_admin_nvme_mi_send),	\
 		nvme_admin_opcode_name(nvme_admin_nvme_mi_recv),	\
+		nvme_admin_opcode_name(nvme_admin_cross_ctrl_reset),	\
 		nvme_admin_opcode_name(nvme_admin_dbbuf),		\
 		nvme_admin_opcode_name(nvme_admin_format_nvm),		\
 		nvme_admin_opcode_name(nvme_admin_security_send),	\
@@ -2009,6 +2028,7 @@ struct nvme_command {
 		struct nvme_dbbuf dbbuf;
 		struct nvme_directive_cmd directive;
 		struct nvme_io_mgmt_recv_cmd imr;
+		struct nvme_cross_ctrl_reset_cmd ccr;
 	};
 };
 
@@ -2173,6 +2193,9 @@ enum {
 	NVME_SC_PMR_SAN_PROHIBITED	= 0x123,
 	NVME_SC_ANA_GROUP_ID_INVALID	= 0x124,
 	NVME_SC_ANA_ATTACH_FAILED	= 0x125,
+	NVME_SC_CCR_IN_PROGRESS		= 0x13f,
+	NVME_SC_CCR_LOGPAGE_FULL	= 0x140,
+	NVME_SC_CCR_LIMIT_EXCEEDED	= 0x141,
 
 	/*
 	 * I/O Command Set Specific - NVM commands:
-- 
2.51.2



  parent reply	other threads:[~2025-11-26  2:13 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-26  2:11 [RFC PATCH 00/14] TP8028 Rapid Path Failure Recovery Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 01/14] nvmet: Rapid Path Failure Recovery set controller identify fields Mohamed Khalfella
2025-12-16  1:35   ` Randy Jennings
2025-11-26  2:11 ` [RFC PATCH 02/14] nvmet/debugfs: Add ctrl uniquifier and random values Mohamed Khalfella
2025-12-16  1:43   ` Randy Jennings
2025-11-26  2:11 ` Mohamed Khalfella [this message]
2025-12-16  3:01   ` [RFC PATCH 03/14] nvmet: Implement CCR nvme command Randy Jennings
2025-12-31 21:14     ` Mohamed Khalfella
2025-12-25 13:14   ` Sagi Grimberg
2025-12-25 17:33     ` Mohamed Khalfella
2025-12-27  9:39       ` Sagi Grimberg
2025-12-31 21:35         ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 04/14] nvmet: Implement CCR logpage Mohamed Khalfella
2025-12-16  3:11   ` Randy Jennings
2025-11-26  2:11 ` [RFC PATCH 05/14] nvmet: Send an AEN on CCR completion Mohamed Khalfella
2025-12-16  3:31   ` Randy Jennings
2025-12-25 13:23   ` Sagi Grimberg
2025-12-25 18:13     ` Mohamed Khalfella
2025-12-27  9:48       ` Sagi Grimberg
2025-12-31 22:00         ` Mohamed Khalfella
2026-01-04 21:09           ` Sagi Grimberg
2026-01-07  2:58             ` Randy Jennings
2026-01-30 22:31             ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 06/14] nvme: Rapid Path Failure Recovery read controller identify fields Mohamed Khalfella
2025-12-18 15:22   ` Randy Jennings
2025-12-31 22:26     ` Mohamed Khalfella
2026-01-02 19:06       ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 07/14] nvme: Add RECOVERING nvme controller state Mohamed Khalfella
2025-12-18 23:18   ` Randy Jennings
2025-12-19  1:39     ` Randy Jennings
2025-12-25 13:29   ` Sagi Grimberg
2025-12-25 17:17     ` Mohamed Khalfella
2025-12-27  9:52       ` Sagi Grimberg
2025-12-31 22:45         ` Mohamed Khalfella
2025-12-27  9:55       ` Sagi Grimberg
2025-12-31 22:36         ` Mohamed Khalfella
2025-12-31 23:04           ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 08/14] nvme: Implement cross-controller reset recovery Mohamed Khalfella
2025-12-19  1:21   ` Randy Jennings
2025-12-27 10:14   ` Sagi Grimberg
2025-12-31  0:04     ` Randy Jennings
2026-01-04 21:14       ` Sagi Grimberg
2026-01-07  3:16         ` Randy Jennings
2025-12-31 23:43     ` Mohamed Khalfella
2026-01-04 21:39       ` Sagi Grimberg
2026-01-30 22:01         ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 09/14] nvme: Implement cross-controller reset completion Mohamed Khalfella
2025-12-19  1:31   ` Randy Jennings
2025-12-27 10:24   ` Sagi Grimberg
2025-12-31 23:51     ` Mohamed Khalfella
2026-01-04 21:15       ` Sagi Grimberg
2026-01-30 22:32         ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 10/14] nvme-tcp: Use CCR to recover controller that hits an error Mohamed Khalfella
2025-12-19  2:06   ` Randy Jennings
2026-01-01  0:04     ` Mohamed Khalfella
2025-12-27 10:35   ` Sagi Grimberg
2025-12-31  0:13     ` Randy Jennings
2026-01-04 21:19       ` Sagi Grimberg
2026-01-01  0:27     ` Mohamed Khalfella
2025-11-26  2:11 ` [RFC PATCH 11/14] nvme-rdma: " Mohamed Khalfella
2025-12-19  2:16   ` Randy Jennings
2025-12-27 10:36   ` Sagi Grimberg
2025-11-26  2:11 ` [RFC PATCH 12/14] nvme-fc: Decouple error recovery from controller reset Mohamed Khalfella
2025-12-19  2:59   ` Randy Jennings
2025-11-26  2:12 ` [RFC PATCH 13/14] nvme-fc: Use CCR to recover controller that hits an error Mohamed Khalfella
2025-12-20  1:21   ` Randy Jennings
2025-11-26  2:12 ` [RFC PATCH 14/14] nvme-fc: Hold inflight requests while in RECOVERING state Mohamed Khalfella
2025-12-20  1:44   ` Randy Jennings

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251126021250.2583630-4-mkhalfella@purestorage.com \
    --to=mkhalfella@purestorage.com \
    --cc=adailey@purestorage.com \
    --cc=axboe@kernel.dk \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=jmeneghi@redhat.com \
    --cc=kbusch@kernel.org \
    --cc=kch@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=randyj@purestorage.com \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox