From: Hannes Reinecke <hare@suse.de>
To: Mohamed Khalfella <mkhalfella@purestorage.com>,
Justin Tee <justin.tee@broadcom.com>,
Naresh Gottumukkala <nareshgottumukkala83@gmail.com>,
Paul Ely <paul.ely@broadcom.com>,
Chaitanya Kulkarni <kch@nvidia.com>,
Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>,
Keith Busch <kbusch@kernel.org>, Sagi Grimberg <sagi@grimberg.me>
Cc: Aaron Dailey <adailey@purestorage.com>,
Randy Jennings <randyj@purestorage.com>,
Dhaval Giani <dgiani@purestorage.com>,
linux-nvme@lists.infradead.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 03/14] nvmet: Implement CCR nvme command
Date: Tue, 3 Feb 2026 04:19:50 +0100 [thread overview]
Message-ID: <77a00fa1-5707-4859-8a7a-e823ca18c9fe@suse.de> (raw)
In-Reply-To: <20260130223531.2478849-4-mkhalfella@purestorage.com>
On 1/30/26 23:34, Mohamed Khalfella wrote:
> Defined by TP8028 Rapid Path Failure Recovery, CCR (Cross-Controller
> Reset) command is an nvme command issued to source controller by
> initiator to reset impacted controller. Implement CCR command for linux
> nvme target.
>
> Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
> ---
> drivers/nvme/target/admin-cmd.c | 74 +++++++++++++++++++++++++++++++++
> drivers/nvme/target/core.c | 71 +++++++++++++++++++++++++++++++
> drivers/nvme/target/nvmet.h | 13 ++++++
> include/linux/nvme.h | 23 ++++++++++
> 4 files changed, 181 insertions(+)
>
> diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
> index ade1145df72d..c0fd8eca2e44 100644
> --- a/drivers/nvme/target/admin-cmd.c
> +++ b/drivers/nvme/target/admin-cmd.c
> @@ -376,7 +376,9 @@ static void nvmet_get_cmd_effects_admin(struct nvmet_ctrl *ctrl,
> log->acs[nvme_admin_get_features] =
> log->acs[nvme_admin_async_event] =
> log->acs[nvme_admin_keep_alive] =
> + log->acs[nvme_admin_cross_ctrl_reset] =
> cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
> +
> }
>
> static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
> @@ -1615,6 +1617,75 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
> nvmet_req_complete(req, status);
> }
>
> +void nvmet_execute_cross_ctrl_reset(struct nvmet_req *req)
> +{
> + struct nvmet_ctrl *ictrl, *sctrl = req->sq->ctrl;
> + struct nvme_command *cmd = req->cmd;
> + struct nvmet_ccr *ccr, *new_ccr;
> + int ccr_active, ccr_total;
> + u16 cntlid, status = NVME_SC_SUCCESS;
> +
> + cntlid = le16_to_cpu(cmd->ccr.icid);
> + if (sctrl->cntlid == cntlid) {
> + req->error_loc =
> + offsetof(struct nvme_cross_ctrl_reset_cmd, icid);
> + status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
> + goto out;
> + }
> +
> + /* Find and get impacted controller */
> + ictrl = nvmet_ctrl_find_get_ccr(sctrl->subsys, sctrl->hostnqn,
> + cmd->ccr.ciu, cntlid,
> + le64_to_cpu(cmd->ccr.cirn));
> + if (!ictrl) {
> + /* Immediate Reset Successful */
> + nvmet_set_result(req, 1);
> + status = NVME_SC_SUCCESS;
> + goto out;
> + }
> +
> + ccr_total = ccr_active = 0;
> + mutex_lock(&sctrl->lock);
> + list_for_each_entry(ccr, &sctrl->ccr_list, entry) {
> + if (ccr->ctrl == ictrl) {
> + status = NVME_SC_CCR_IN_PROGRESS | NVME_STATUS_DNR;
> + goto out_unlock;
> + }
> +
> + ccr_total++;
> + if (ccr->ctrl)
> + ccr_active++;
> + }
> +
> + if (ccr_active >= NVMF_CCR_LIMIT) {
> + status = NVME_SC_CCR_LIMIT_EXCEEDED;
> + goto out_unlock;
> + }
> + if (ccr_total >= NVMF_CCR_PER_PAGE) {
> + status = NVME_SC_CCR_LOGPAGE_FULL;
> + goto out_unlock;
> + }
> +
> + new_ccr = kmalloc(sizeof(*new_ccr), GFP_KERNEL);
> + if (!new_ccr) {
> + status = NVME_SC_INTERNAL;
> + goto out_unlock;
> + }
> +
> + new_ccr->ciu = cmd->ccr.ciu;
> + new_ccr->icid = cntlid;
> + new_ccr->ctrl = ictrl;
> + list_add_tail(&new_ccr->entry, &sctrl->ccr_list);
> +
> +out_unlock:
> + mutex_unlock(&sctrl->lock);
> + if (status == NVME_SC_SUCCESS)
> + nvmet_ctrl_fatal_error(ictrl);
> + nvmet_ctrl_put(ictrl);
> +out:
> + nvmet_req_complete(req, status);
> +}
> +
> u32 nvmet_admin_cmd_data_len(struct nvmet_req *req)
> {
> struct nvme_command *cmd = req->cmd;
> @@ -1692,6 +1763,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
> case nvme_admin_keep_alive:
> req->execute = nvmet_execute_keep_alive;
> return 0;
> + case nvme_admin_cross_ctrl_reset:
> + req->execute = nvmet_execute_cross_ctrl_reset;
> + return 0;
> default:
> return nvmet_report_invalid_opcode(req);
> }
> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
> index 0d2a1206e08f..54dd0dcfa12b 100644
> --- a/drivers/nvme/target/core.c
> +++ b/drivers/nvme/target/core.c
> @@ -114,6 +114,20 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
> return 0;
> }
>
> +void nvmet_ctrl_cleanup_ccrs(struct nvmet_ctrl *ctrl, bool all)
> +{
> + struct nvmet_ccr *ccr, *tmp;
> +
> + lockdep_assert_held(&ctrl->lock);
> +
> + list_for_each_entry_safe(ccr, tmp, &ctrl->ccr_list, entry) {
> + if (all || ccr->ctrl == NULL) {
> + list_del(&ccr->entry);
> + kfree(ccr);
> + }
> + }
> +}
> +
> static u32 nvmet_max_nsid(struct nvmet_subsys *subsys)
> {
> struct nvmet_ns *cur;
> @@ -1396,6 +1410,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
> if (!nvmet_is_disc_subsys(ctrl->subsys)) {
> ctrl->ciu = ((u8)(ctrl->ciu + 1)) ? : 1;
> ctrl->cirn = get_random_u64();
> + nvmet_ctrl_cleanup_ccrs(ctrl, false);
> }
> ctrl->csts = NVME_CSTS_RDY;
>
> @@ -1501,6 +1516,38 @@ struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
> return ctrl;
> }
>
> +struct nvmet_ctrl *nvmet_ctrl_find_get_ccr(struct nvmet_subsys *subsys,
> + const char *hostnqn, u8 ciu,
> + u16 cntlid, u64 cirn)
> +{
> + struct nvmet_ctrl *ctrl;
> + bool found = false;
> +
> + mutex_lock(&subsys->lock);
> + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
> + if (ctrl->cntlid != cntlid)
> + continue;
> + if (strncmp(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE))
> + continue;
> +
Why do we compare the hostnqn here, too? To my understanding the host
NQN is tied to the controller, so the controller ID should be sufficient
here.
> + /* Avoid racing with a controller that is becoming ready */
> + mutex_lock(&ctrl->lock);
> + if (ctrl->ciu == ciu && ctrl->cirn == cirn)
> + found = true;
> + mutex_unlock(&ctrl->lock);
> +
> + if (found) {
> + if (kref_get_unless_zero(&ctrl->ref))
> + goto out;
> + break;
> + }
> + };
> + ctrl = NULL;
> +out:
> + mutex_unlock(&subsys->lock);
> + return ctrl;
> +}
> +
> u16 nvmet_check_ctrl_status(struct nvmet_req *req)
> {
> if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
> @@ -1626,6 +1673,7 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
> subsys->clear_ids = 1;
> #endif
>
> + INIT_LIST_HEAD(&ctrl->ccr_list);
> INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
> INIT_LIST_HEAD(&ctrl->async_events);
> INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
> @@ -1740,12 +1788,35 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
> }
> EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl);
>
> +static void nvmet_ctrl_complete_pending_ccr(struct nvmet_ctrl *ctrl)
> +{
> + struct nvmet_subsys *subsys = ctrl->subsys;
> + struct nvmet_ctrl *sctrl;
> + struct nvmet_ccr *ccr;
> +
> + mutex_lock(&ctrl->lock);
> + nvmet_ctrl_cleanup_ccrs(ctrl, true);
> + mutex_unlock(&ctrl->lock);
> +
> + list_for_each_entry(sctrl, &subsys->ctrls, subsys_entry) {
> + mutex_lock(&sctrl->lock);
> + list_for_each_entry(ccr, &sctrl->ccr_list, entry) {
> + if (ccr->ctrl == ctrl) {
> + ccr->ctrl = NULL;
> + break;
> + }
> + }
> + mutex_unlock(&sctrl->lock);
> + }
> +}
> +
Maybe add documentation here that the first CCR cleanup is for clearing
CCRs issued from this controller, and the second is for CCRs issued _to_
this controller.
> static void nvmet_ctrl_free(struct kref *ref)
> {
> struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
> struct nvmet_subsys *subsys = ctrl->subsys;
>
> mutex_lock(&subsys->lock);
> + nvmet_ctrl_complete_pending_ccr(ctrl);
> nvmet_ctrl_destroy_pr(ctrl);
> nvmet_release_p2p_ns_map(ctrl);
> list_del(&ctrl->subsys_entry);
> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
> index f5d9a01ec60c..93d6ac41cf85 100644
> --- a/drivers/nvme/target/nvmet.h
> +++ b/drivers/nvme/target/nvmet.h
> @@ -269,6 +269,7 @@ struct nvmet_ctrl {
> u32 kato;
> u64 cirn;
>
> + struct list_head ccr_list;
> struct nvmet_port *port;
>
> u32 aen_enabled;
> @@ -315,6 +316,13 @@ struct nvmet_ctrl {
> struct nvmet_pr_log_mgr pr_log_mgr;
> };
>
> +struct nvmet_ccr {
> + struct nvmet_ctrl *ctrl;
> + struct list_head entry;
> + u16 icid;
> + u8 ciu;
> +};
> +
> struct nvmet_subsys {
> enum nvme_subsys_type type;
>
> @@ -578,6 +586,7 @@ void nvmet_req_free_sgls(struct nvmet_req *req);
> void nvmet_execute_set_features(struct nvmet_req *req);
> void nvmet_execute_get_features(struct nvmet_req *req);
> void nvmet_execute_keep_alive(struct nvmet_req *req);
> +void nvmet_execute_cross_ctrl_reset(struct nvmet_req *req);
>
> u16 nvmet_check_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create);
> u16 nvmet_check_io_cqid(struct nvmet_ctrl *ctrl, u16 cqid, bool create);
> @@ -620,6 +629,10 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args);
> struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
> const char *hostnqn, u16 cntlid,
> struct nvmet_req *req);
> +struct nvmet_ctrl *nvmet_ctrl_find_get_ccr(struct nvmet_subsys *subsys,
> + const char *hostnqn, u8 ciu,
> + u16 cntlid, u64 cirn);
> +void nvmet_ctrl_cleanup_ccrs(struct nvmet_ctrl *ctrl, bool all);
> void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
> u16 nvmet_check_ctrl_status(struct nvmet_req *req);
> ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl,
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index 5135cdc3c120..0f305b317aa3 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -23,6 +23,7 @@
>
> #define NVMF_CQT_MS 0
> #define NVMF_CCR_LIMIT 4
> +#define NVMF_CCR_PER_PAGE 511
>
> #define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"
>
> @@ -1225,6 +1226,22 @@ struct nvme_zone_mgmt_recv_cmd {
> __le32 cdw14[2];
> };
>
> +struct nvme_cross_ctrl_reset_cmd {
> + __u8 opcode;
> + __u8 flags;
> + __u16 command_id;
> + __le32 nsid;
> + __le64 rsvd2[2];
> + union nvme_data_ptr dptr;
> + __u8 rsvd10;
> + __u8 ciu;
> + __le16 icid;
> + __le32 cdw11;
> + __le64 cirn;
> + __le32 cdw14;
> + __le32 cdw15;
> +};
> +
> struct nvme_io_mgmt_recv_cmd {
> __u8 opcode;
> __u8 flags;
I would have expected this definitions in the
first patch. But probably not that important.
> @@ -1323,6 +1340,7 @@ enum nvme_admin_opcode {
> nvme_admin_virtual_mgmt = 0x1c,
> nvme_admin_nvme_mi_send = 0x1d,
> nvme_admin_nvme_mi_recv = 0x1e,
> + nvme_admin_cross_ctrl_reset = 0x38,
> nvme_admin_dbbuf = 0x7C,
> nvme_admin_format_nvm = 0x80,
> nvme_admin_security_send = 0x81,
> @@ -1356,6 +1374,7 @@ enum nvme_admin_opcode {
> nvme_admin_opcode_name(nvme_admin_virtual_mgmt), \
> nvme_admin_opcode_name(nvme_admin_nvme_mi_send), \
> nvme_admin_opcode_name(nvme_admin_nvme_mi_recv), \
> + nvme_admin_opcode_name(nvme_admin_cross_ctrl_reset), \
> nvme_admin_opcode_name(nvme_admin_dbbuf), \
> nvme_admin_opcode_name(nvme_admin_format_nvm), \
> nvme_admin_opcode_name(nvme_admin_security_send), \
> @@ -2009,6 +2028,7 @@ struct nvme_command {
> struct nvme_dbbuf dbbuf;
> struct nvme_directive_cmd directive;
> struct nvme_io_mgmt_recv_cmd imr;
> + struct nvme_cross_ctrl_reset_cmd ccr;
> };
> };
>
> @@ -2173,6 +2193,9 @@ enum {
> NVME_SC_PMR_SAN_PROHIBITED = 0x123,
> NVME_SC_ANA_GROUP_ID_INVALID = 0x124,
> NVME_SC_ANA_ATTACH_FAILED = 0x125,
> + NVME_SC_CCR_IN_PROGRESS = 0x13f,
> + NVME_SC_CCR_LOGPAGE_FULL = 0x140,
> + NVME_SC_CCR_LIMIT_EXCEEDED = 0x141,
>
> /*
> * I/O Command Set Specific - NVM commands:
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
next prev parent reply other threads:[~2026-02-03 3:20 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-30 22:34 [PATCH v2 00/14] TP8028 Rapid Path Failure Recovery Mohamed Khalfella
2026-01-30 22:34 ` [PATCH v2 01/14] nvmet: Rapid Path Failure Recovery set controller identify fields Mohamed Khalfella
2026-02-03 3:03 ` Hannes Reinecke
2026-02-03 18:14 ` Mohamed Khalfella
2026-02-04 0:34 ` Hannes Reinecke
2026-02-07 13:41 ` Sagi Grimberg
2026-02-14 0:42 ` Randy Jennings
2026-02-14 3:56 ` Mohamed Khalfella
2026-01-30 22:34 ` [PATCH v2 02/14] nvmet/debugfs: Add ctrl uniquifier and random values Mohamed Khalfella
2026-02-03 3:04 ` Hannes Reinecke
2026-02-07 13:47 ` Sagi Grimberg
2026-02-11 0:50 ` Randy Jennings
2026-02-11 1:02 ` Mohamed Khalfella
2026-01-30 22:34 ` [PATCH v2 03/14] nvmet: Implement CCR nvme command Mohamed Khalfella
2026-02-03 3:19 ` Hannes Reinecke [this message]
2026-02-03 18:40 ` Mohamed Khalfella
2026-02-04 0:38 ` Hannes Reinecke
2026-02-04 0:44 ` Mohamed Khalfella
2026-02-04 0:55 ` Hannes Reinecke
2026-02-04 17:52 ` Mohamed Khalfella
2026-02-07 13:58 ` Sagi Grimberg
2026-02-08 23:10 ` Mohamed Khalfella
2026-02-09 19:27 ` Mohamed Khalfella
2026-02-11 1:34 ` Randy Jennings
2026-02-07 14:11 ` Sagi Grimberg
2026-01-30 22:34 ` [PATCH v2 04/14] nvmet: Implement CCR logpage Mohamed Khalfella
2026-02-03 3:21 ` Hannes Reinecke
2026-02-07 14:11 ` Sagi Grimberg
2026-02-11 1:49 ` Randy Jennings
2026-01-30 22:34 ` [PATCH v2 05/14] nvmet: Send an AEN on CCR completion Mohamed Khalfella
2026-02-03 3:27 ` Hannes Reinecke
2026-02-03 18:48 ` Mohamed Khalfella
2026-02-04 0:43 ` Hannes Reinecke
2026-02-07 14:12 ` Sagi Grimberg
2026-02-11 1:52 ` Randy Jennings
2026-01-30 22:34 ` [PATCH v2 06/14] nvme: Rapid Path Failure Recovery read controller identify fields Mohamed Khalfella
2026-02-03 3:28 ` Hannes Reinecke
2026-02-07 14:13 ` Sagi Grimberg
2026-02-11 1:56 ` Randy Jennings
2026-01-30 22:34 ` [PATCH v2 07/14] nvme: Introduce FENCING and FENCED controller states Mohamed Khalfella
2026-02-03 5:07 ` Hannes Reinecke
2026-02-03 19:13 ` Mohamed Khalfella
2026-01-30 22:34 ` [PATCH v2 08/14] nvme: Implement cross-controller reset recovery Mohamed Khalfella
2026-02-03 5:19 ` Hannes Reinecke
2026-02-03 20:00 ` Mohamed Khalfella
2026-02-04 1:10 ` Hannes Reinecke
2026-02-04 23:24 ` Mohamed Khalfella
2026-02-11 3:44 ` Randy Jennings
2026-02-11 15:19 ` Hannes Reinecke
2026-02-10 22:09 ` James Smart
2026-02-10 22:27 ` Mohamed Khalfella
2026-02-10 22:49 ` James Smart
2026-02-10 23:25 ` Mohamed Khalfella
2026-02-11 0:12 ` Mohamed Khalfella
2026-02-11 3:33 ` Randy Jennings
2026-01-30 22:34 ` [PATCH v2 09/14] nvme: Implement cross-controller reset completion Mohamed Khalfella
2026-02-03 5:22 ` Hannes Reinecke
2026-02-03 20:07 ` Mohamed Khalfella
2026-01-30 22:34 ` [PATCH v2 10/14] nvme-tcp: Use CCR to recover controller that hits an error Mohamed Khalfella
2026-02-03 5:34 ` Hannes Reinecke
2026-02-03 21:24 ` Mohamed Khalfella
2026-02-04 0:48 ` Randy Jennings
2026-02-04 2:57 ` Hannes Reinecke
2026-02-10 1:39 ` Mohamed Khalfella
2026-01-30 22:34 ` [PATCH v2 11/14] nvme-rdma: " Mohamed Khalfella
2026-02-03 5:35 ` Hannes Reinecke
2026-01-30 22:34 ` [PATCH v2 12/14] nvme-fc: Decouple error recovery from controller reset Mohamed Khalfella
2026-02-03 5:40 ` Hannes Reinecke
2026-02-03 21:29 ` Mohamed Khalfella
2026-02-03 19:19 ` James Smart
2026-02-03 22:49 ` James Smart
2026-02-04 0:15 ` Mohamed Khalfella
2026-02-04 0:11 ` Mohamed Khalfella
2026-02-05 0:08 ` James Smart
2026-02-05 0:59 ` Mohamed Khalfella
2026-02-09 22:53 ` Mohamed Khalfella
2026-01-30 22:34 ` [PATCH v2 13/14] nvme-fc: Use CCR to recover controller that hits an error Mohamed Khalfella
2026-02-03 5:43 ` Hannes Reinecke
2026-02-10 22:12 ` James Smart
2026-02-10 22:20 ` Mohamed Khalfella
2026-02-13 19:29 ` Mohamed Khalfella
2026-01-30 22:34 ` [PATCH v2 14/14] nvme-fc: Hold inflight requests while in FENCING state Mohamed Khalfella
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=77a00fa1-5707-4859-8a7a-e823ca18c9fe@suse.de \
--to=hare@suse.de \
--cc=adailey@purestorage.com \
--cc=axboe@kernel.dk \
--cc=dgiani@purestorage.com \
--cc=hch@lst.de \
--cc=justin.tee@broadcom.com \
--cc=kbusch@kernel.org \
--cc=kch@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=mkhalfella@purestorage.com \
--cc=nareshgottumukkala83@gmail.com \
--cc=paul.ely@broadcom.com \
--cc=randyj@purestorage.com \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox