From: hare@kernel.org
To: Christoph Hellwig <hch@lst.de>
Cc: Keith Busch <kbusch@kernel.org>, Sagi Grimberg <sagi@grimberg.me>,
linux-nvme@lists.infradead.org, Hannes Reinecke <hare@kernel.org>
Subject: [PATCH 2/4] nvme-fc: marginal path handling
Date: Mon, 28 Apr 2025 08:36:33 +0200 [thread overview]
Message-ID: <20250428063635.125879-3-hare@kernel.org> (raw)
In-Reply-To: <20250428063635.125879-1-hare@kernel.org>
From: Hannes Reinecke <hare@kernel.org>
FPIN LI (link integrity) messages are received when the attached
fabric detects hardware errors. In response to these messages I/O
should be directed away from the affected ports, and only used
if the 'optimized' paths are unavailable.
To handle this a new controller flag 'NVME_CTRL_MARGINAL' is added
which will cause the multipath scheduler to skip these paths when
checking for 'optimized' paths. They are, however, still eligible
for non-optimized path selected. The flag is cleared upon reset as then the
faulty hardware might be replaced.
Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
drivers/nvme/host/core.c | 1 +
drivers/nvme/host/fc.c | 99 ++++++++++++++++++++++++++++++++++
drivers/nvme/host/multipath.c | 17 +++---
drivers/nvme/host/nvme.h | 6 +++
include/linux/nvme-fc-driver.h | 3 ++
5 files changed, 120 insertions(+), 6 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index eb6ea8acb3cc..ab851df98079 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4833,6 +4833,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
ctrl->passthru_err_log_enabled = false;
clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+ clear_bit(NVME_CTRL_MARGINAL, &ctrl->flags);
spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->namespaces_lock);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 2257c3c96dd2..475a636d1f7d 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -786,6 +786,10 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
"Reconnect", ctrl->cnum);
set_bit(ASSOC_FAILED, &ctrl->flags);
+
+ /* clear 'marginal' flag as controller will be reset */
+ clear_bit(NVME_CTRL_MARGINAL, &ctrl->flags);
+
nvme_reset_ctrl(&ctrl->ctrl);
}
@@ -3713,6 +3717,101 @@ static struct nvmf_transport_ops nvme_fc_transport = {
.create_ctrl = nvme_fc_create_ctrl,
};
+static struct nvme_fc_rport *nvme_fc_rport_from_wwpn(struct nvme_fc_lport *lport,
+ u64 rport_wwpn)
+{
+ struct nvme_fc_rport *rport;
+
+ list_for_each_entry(rport, &lport->endp_list, endp_list) {
+ if (!nvme_fc_rport_get(rport))
+ continue;
+ if (rport->remoteport.port_name == rport_wwpn &&
+ rport->remoteport.port_role & FC_PORT_ROLE_NVME_TARGET)
+ return rport;
+ nvme_fc_rport_put(rport);
+ }
+ return NULL;
+}
+
+static void
+nvme_fc_fpin_li_lport_update(struct nvme_fc_lport *lport, struct fc_fn_li_desc *li)
+{
+ unsigned int i, pname_count = be32_to_cpu(li->pname_count);
+ u64 attached_wwpn = be64_to_cpu(li->attached_wwpn);
+ struct nvme_fc_rport *attached_rport;
+
+ for (i = 0; i < pname_count; i++) {
+ struct nvme_fc_rport *rport;
+ u64 wwpn = be64_to_cpu(li->pname_list[i]);
+
+ rport = nvme_fc_rport_from_wwpn(lport, wwpn);
+ if (!rport)
+ continue;
+ if (wwpn != attached_wwpn) {
+ struct nvme_fc_ctrl *ctrl;
+
+ spin_lock_irq(&rport->lock);
+ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
+ set_bit(NVME_CTRL_MARGINAL, &ctrl->ctrl.flags);
+ spin_unlock_irq(&rport->lock);
+ }
+ nvme_fc_rport_put(rport);
+ }
+
+ attached_rport = nvme_fc_rport_from_wwpn(lport, attached_wwpn);
+ if (attached_rport) {
+ struct nvme_fc_ctrl *ctrl;
+
+ spin_lock_irq(&attached_rport->lock);
+ list_for_each_entry(ctrl, &attached_rport->ctrl_list, ctrl_list)
+ set_bit(NVME_CTRL_MARGINAL, &ctrl->ctrl.flags);
+ spin_unlock_irq(&attached_rport->lock);
+ nvme_fc_rport_put(attached_rport);
+ }
+}
+
+/**
+ * fc_host_fpin_rcv() - Process a received FPIN.
+ * @localport: local port the FPIN was received on
+ * @fpin_len: length of FPIN payload, in bytes
+ * @fpin_buf: pointer to FPIN payload
+ * Notes:
+ * This routine assumes no locks are held on entry.
+ */
+void
+nvme_fc_fpin_rcv(struct nvme_fc_local_port *localport,
+ u32 fpin_len, char *fpin_buf)
+{
+ struct nvme_fc_lport *lport;
+ struct fc_els_fpin *fpin = (struct fc_els_fpin *)fpin_buf;
+ union fc_tlv_desc *tlv;
+ u32 bytes_remain;
+ u32 dtag;
+
+ if (!localport)
+ return;
+ lport = localport_to_lport(localport);
+ tlv = &fpin->fpin_desc[0];
+ bytes_remain = fpin_len - offsetof(struct fc_els_fpin, fpin_desc);
+ bytes_remain = min_t(u32, bytes_remain, be32_to_cpu(fpin->desc_len));
+
+ while (bytes_remain >= FC_TLV_DESC_HDR_SZ &&
+ bytes_remain >= FC_TLV_DESC_SZ_FROM_LENGTH(tlv)) {
+ dtag = be32_to_cpu(tlv->hdr.desc_tag);
+ switch (dtag) {
+ case ELS_DTAG_LNK_INTEGRITY:
+ nvme_fc_fpin_li_lport_update(lport, &tlv->li);
+ break;
+ default:
+ break;
+ }
+
+ bytes_remain -= FC_TLV_DESC_SZ_FROM_LENGTH(tlv);
+ tlv = fc_tlv_next_desc(tlv);
+ }
+}
+EXPORT_SYMBOL(nvme_fc_fpin_rcv);
+
/* Arbitrary successive failures max. With lots of subsystems could be high */
#define DISCOVERY_MAX_FAIL 20
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 250f3da67cc9..6d7425d34dfc 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -273,11 +273,14 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
switch (ns->ana_state) {
case NVME_ANA_OPTIMIZED:
- if (distance < found_distance) {
- found_distance = distance;
- found = ns;
+ if (!nvme_ctrl_is_marginal(ns->ctrl)) {
+ if (distance < found_distance) {
+ found_distance = distance;
+ found = ns;
+ }
+ break;
}
- break;
+ fallthrough;
case NVME_ANA_NONOPTIMIZED:
if (distance < fallback_distance) {
fallback_distance = distance;
@@ -330,7 +333,8 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head)
if (ns->ana_state == NVME_ANA_OPTIMIZED) {
found = ns;
- goto out;
+ if (!nvme_ctrl_is_marginal(ns->ctrl))
+ goto out;
}
if (ns->ana_state == NVME_ANA_NONOPTIMIZED)
found = ns;
@@ -394,7 +398,8 @@ static struct nvme_ns *nvme_queue_depth_path(struct nvme_ns_head *head)
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
{
return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE &&
- ns->ana_state == NVME_ANA_OPTIMIZED;
+ ns->ana_state == NVME_ANA_OPTIMIZED &&
+ !nvme_ctrl_is_marginal(ns->ctrl);
}
static struct nvme_ns *nvme_numa_path(struct nvme_ns_head *head)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index c1b4ef6c5233..503df5dc2934 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -275,6 +275,7 @@ enum nvme_ctrl_flags {
NVME_CTRL_SKIP_ID_CNS_CS = 4,
NVME_CTRL_DIRTY_CAPABILITY = 5,
NVME_CTRL_FROZEN = 6,
+ NVME_CTRL_MARGINAL = 7,
};
struct nvme_ctrl {
@@ -419,6 +420,11 @@ static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
return READ_ONCE(ctrl->state);
}
+static inline bool nvme_ctrl_is_marginal(struct nvme_ctrl *ctrl)
+{
+ return test_bit(NVME_CTRL_MARGINAL, &ctrl->flags);
+}
+
enum nvme_iopolicy {
NVME_IOPOLICY_NUMA,
NVME_IOPOLICY_RR,
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 9f6acadfe0c8..bcd3b1e5a256 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -536,6 +536,9 @@ void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport);
int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport,
u32 dev_loss_tmo);
+void nvme_fc_fpin_rcv(struct nvme_fc_local_port *localport,
+ u32 fpin_len, char *fpin_buf);
+
/*
* Routine called to pass a NVME-FC LS request, received by the lldd,
* to the nvme-fc transport.
--
2.35.3
next prev parent reply other threads:[~2025-04-28 6:37 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-28 6:36 [PATCHv3 0/4] nvme-fc: FPIN link integrity handling hare
2025-04-28 6:36 ` [PATCH 1/4] fc_els: use 'union fc_tlv_desc' hare
2025-04-29 0:32 ` Justin Tee
2025-04-29 5:56 ` Hannes Reinecke
2025-04-28 6:36 ` hare [this message]
2025-04-28 6:36 ` [PATCH 3/4] lpfc: enable FPIN notification for NVMe hare
2025-04-29 0:30 ` Justin Tee
2025-04-29 6:14 ` Hannes Reinecke
2025-04-29 21:28 ` Justin Tee
2025-04-28 6:36 ` [PATCH 4/4] qla2xxx: " hare
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250428063635.125879-3-hare@kernel.org \
--to=hare@kernel.org \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox