* [PATCH v2] nvme-fc: FPIN link integrity handling
@ 2024-04-02 9:30 hare
2024-04-02 12:55 ` Christoph Hellwig
0 siblings, 1 reply; 2+ messages in thread
From: hare @ 2024-04-02 9:30 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke
From: Hannes Reinecke <hare@suse.de>
FPIN LI (link integrity) messages are received when the attached
fabric detects hardware errors. In response to these messages I/O
should be directed away from the affected ports, and only used
if the 'optimized' paths are unavailable.
Upon port reset the paths should be put back in service as the
affected hardware might have been replaced.
This patch adds a new controller flag 'NVME_CTRL_MARGINAL'
which will be checked during multipath path selection, causing the
path to be skipped when checking for 'optimized' paths. If no
optimized paths are available the 'marginal' paths are considered
for path selection alongside the 'non-optimized' paths.
Changes to the original submission:
- Changed flag name to 'marginal'
- Do not block marginal path; influence path selection instead
to de-prioritize marginal paths
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/nvme/host/core.c | 1 +
drivers/nvme/host/fc.c | 108 +++++++++++++++++++++++++++++++++
drivers/nvme/host/multipath.c | 17 ++++--
drivers/nvme/host/nvme.h | 6 ++
drivers/scsi/lpfc/lpfc_els.c | 6 +-
drivers/scsi/qla2xxx/qla_isr.c | 1 +
include/linux/nvme-fc-driver.h | 3 +
7 files changed, 135 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 57b664d12863..f5987a764a01 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4606,6 +4606,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
ctrl->passthru_err_log_enabled = false;
clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+ clear_bit(NVME_CTRL_MARGINAL, &ctrl->flags);
spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->scan_lock);
INIT_LIST_HEAD(&ctrl->namespaces);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 68a5d971657b..4602d5611d62 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -786,6 +786,9 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
"NVME-FC{%d}: controller connectivity lost. Awaiting "
"Reconnect", ctrl->cnum);
+ /* clear 'marginal' flag as controller will be reset */
+ clear_bit(NVME_CTRL_MARGINAL, &ctrl->flags);
+
switch (nvme_ctrl_state(&ctrl->ctrl)) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
@@ -3739,6 +3742,111 @@ static struct nvmf_transport_ops nvme_fc_transport = {
.create_ctrl = nvme_fc_create_ctrl,
};
+static struct nvme_fc_rport *nvme_fc_rport_from_wwpn(struct nvme_fc_lport *lport,
+ u64 rport_wwpn)
+{
+ struct nvme_fc_rport *rport;
+
+ list_for_each_entry(rport, &lport->endp_list, endp_list) {
+ if (!nvme_fc_rport_get(rport))
+ continue;
+ if (rport->remoteport.port_name == rport_wwpn &&
+ rport->remoteport.port_role & FC_PORT_ROLE_NVME_TARGET)
+ return rport;
+ nvme_fc_rport_put(rport);
+ }
+ return NULL;
+}
+
+/*
+ * nvme_fc_fpin_li_lport_update - routine to update Link Integrity
+ * event statistics.
+ * @lport: local port the FPIN was received on
+ * @tlv: pointer to link integrity descriptor
+ *
+ */
+static void
+nvme_fc_fpin_li_lport_update(struct nvme_fc_lport *lport, struct fc_tlv_desc *tlv)
+{
+ unsigned int i, pname_count;
+ struct nvme_fc_rport *attached_rport;
+ struct fc_fn_li_desc *li_desc = (struct fc_fn_li_desc *)tlv;
+ u64 wwpn;
+
+ wwpn = be64_to_cpu(li_desc->attached_wwpn);
+ attached_rport = nvme_fc_rport_from_wwpn(lport, wwpn);
+ pname_count = be32_to_cpu(li_desc->pname_count);
+
+ for (i = 0; pname_count; i++) {
+ struct nvme_fc_rport *rport;
+
+ wwpn = be64_to_cpu(li_desc->pname_list[i]);
+ rport = nvme_fc_rport_from_wwpn(lport, wwpn);
+ if (!rport)
+ continue;
+ if (rport != attached_rport) {
+ struct nvme_fc_ctrl *ctrl;
+
+ spin_lock_irq(&rport->lock);
+ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
+ set_bit(NVME_CTRL_MARGINAL, &ctrl->ctrl.flags);
+ spin_unlock_irq(&rport->lock);
+ }
+ nvme_fc_rport_put(rport);
+ }
+ if (attached_rport) {
+ struct nvme_fc_ctrl *ctrl;
+
+ spin_lock_irq(&attached_rport->lock);
+ list_for_each_entry(ctrl, &attached_rport->ctrl_list, ctrl_list)
+ set_bit(NVME_CTRL_MARGINAL, &ctrl->ctrl.flags);
+ spin_unlock_irq(&attached_rport->lock);
+ nvme_fc_rport_put(attached_rport);
+ }
+}
+
+/**
+ * fc_host_fpin_rcv - routine to process a received FPIN.
+ * @localport: local port the FPIN was received on
+ * @fpin_len: length of FPIN payload, in bytes
+ * @fpin_buf: pointer to FPIN payload
+ * Notes:
+ * This routine assumes no locks are held on entry.
+ */
+void
+nvme_fc_fpin_rcv(struct nvme_fc_local_port *localport,
+ u32 fpin_len, char *fpin_buf)
+{
+ struct nvme_fc_lport *lport;
+ struct fc_els_fpin *fpin = (struct fc_els_fpin *)fpin_buf;
+ struct fc_tlv_desc *tlv;
+ u32 bytes_remain;
+ u32 dtag;
+
+ if (!localport)
+ return;
+ lport = localport_to_lport(localport);
+ tlv = (struct fc_tlv_desc *)&fpin->fpin_desc[0];
+ bytes_remain = fpin_len - offsetof(struct fc_els_fpin, fpin_desc);
+ bytes_remain = min_t(u32, bytes_remain, be32_to_cpu(fpin->desc_len));
+
+ while (bytes_remain >= FC_TLV_DESC_HDR_SZ &&
+ bytes_remain >= FC_TLV_DESC_SZ_FROM_LENGTH(tlv)) {
+ dtag = be32_to_cpu(tlv->desc_tag);
+ switch (dtag) {
+ case ELS_DTAG_LNK_INTEGRITY:
+ nvme_fc_fpin_li_lport_update(lport, tlv);
+ break;
+ default:
+ break;
+ }
+
+ bytes_remain -= FC_TLV_DESC_SZ_FROM_LENGTH(tlv);
+ tlv = fc_tlv_next_desc(tlv);
+ }
+}
+EXPORT_SYMBOL(nvme_fc_fpin_rcv);
+
/* Arbitrary successive failures max. With lots of subsystems could be high */
#define DISCOVERY_MAX_FAIL 20
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 007d695bd607..fe7612b440ca 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -282,11 +282,14 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
switch (ns->ana_state) {
case NVME_ANA_OPTIMIZED:
- if (distance < found_distance) {
- found_distance = distance;
- found = ns;
+ if (!nvme_ctrl_is_marginal(ns->ctrl)) {
+ if (distance < found_distance) {
+ found_distance = distance;
+ found = ns;
+ }
+ break;
}
- break;
+ fallthrough;
case NVME_ANA_NONOPTIMIZED:
if (distance < fallback_distance) {
fallback_distance = distance;
@@ -334,7 +337,8 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
if (ns->ana_state == NVME_ANA_OPTIMIZED) {
found = ns;
- goto out;
+ if (!nvme_ctrl_is_marginal(ns->ctrl))
+ goto out;
}
if (ns->ana_state == NVME_ANA_NONOPTIMIZED)
found = ns;
@@ -361,7 +365,8 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
{
return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE &&
- ns->ana_state == NVME_ANA_OPTIMIZED;
+ ns->ana_state == NVME_ANA_OPTIMIZED &&
+ !nvme_ctrl_is_marginal(ns->ctrl);
}
inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 83c3870d5ed0..400c65655bdf 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -258,6 +258,7 @@ enum nvme_ctrl_flags {
NVME_CTRL_SKIP_ID_CNS_CS = 4,
NVME_CTRL_DIRTY_CAPABILITY = 5,
NVME_CTRL_FROZEN = 6,
+ NVME_CTRL_MARGINAL = 7,
};
struct nvme_ctrl {
@@ -399,6 +400,11 @@ static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
return READ_ONCE(ctrl->state);
}
+static inline bool nvme_ctrl_is_marginal(struct nvme_ctrl *ctrl)
+{
+ return test_bit(NVME_CTRL_MARGINAL, &ctrl->flags);
+}
+
enum nvme_iopolicy {
NVME_IOPOLICY_NUMA,
NVME_IOPOLICY_RR,
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 4d723200690a..ecfe6bc8ab63 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -33,6 +33,7 @@
#include <scsi/scsi_transport_fc.h>
#include <uapi/scsi/fc/fc_fs.h>
#include <uapi/scsi/fc/fc_els.h>
+#include <linux/nvme-fc-driver.h>
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
@@ -10343,9 +10344,12 @@ lpfc_els_rcv_fpin(struct lpfc_vport *vport, void *p, u32 fpin_length)
fpin_length += sizeof(struct fc_els_fpin); /* the entire FPIN */
/* Send every descriptor individually to the upper layer */
- if (deliver)
+ if (deliver) {
fc_host_fpin_rcv(lpfc_shost_from_vport(vport),
fpin_length, (char *)fpin, 0);
+ nvme_fc_fpin_rcv(vport->localport,
+ fpin_length, (char *)fpin);
+ }
desc_cnt++;
}
}
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d48007e18288..b180e10053c5 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -46,6 +46,7 @@ qla27xx_process_purex_fpin(struct scsi_qla_host *vha, struct purex_item *item)
pkt, pkt_size);
fc_host_fpin_rcv(vha->host, pkt_size, (char *)pkt, 0);
+ nvme_fc_fpin_rcv(vha->nvme_local_port, pkt_size, (char *)pkt);
}
const char *const port_state_str[] = {
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 89ea1ebd975a..b23d1e55737a 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -536,6 +536,9 @@ void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport);
int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport,
u32 dev_loss_tmo);
+void nvme_fc_fpin_rcv(struct nvme_fc_local_port *localport,
+ u32 fpin_len, char *fpin_buf);
+
/*
* Routine called to pass a NVME-FC LS request, received by the lldd,
* to the nvme-fc transport.
--
2.35.3
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH v2] nvme-fc: FPIN link integrity handling
2024-04-02 9:30 [PATCH v2] nvme-fc: FPIN link integrity handling hare
@ 2024-04-02 12:55 ` Christoph Hellwig
0 siblings, 0 replies; 2+ messages in thread
From: Christoph Hellwig @ 2024-04-02 12:55 UTC (permalink / raw)
To: hare
Cc: Christoph Hellwig, Sagi Grimberg, Keith Busch, linux-nvme,
Hannes Reinecke
> +static struct nvme_fc_rport *nvme_fc_rport_from_wwpn(struct nvme_fc_lport *lport,
> + u64 rport_wwpn)
Pleae fix all this whacky formatting. Avoid the overly long lines
(in this case by just having the return value on it's own line)
and use two tab continuations instead of wasting most of the continuing
line.
> +/*
> + * nvme_fc_fpin_li_lport_update - routine to update Link Integrity
> + * event statistics.
> + * @lport: local port the FPIN was received on
> + * @tlv: pointer to link integrity descriptor
> + *
> + */
What's the point of this half-kernel doc comment? It doesn't really
add much valye for a local function.
> +static void
> +nvme_fc_fpin_li_lport_update(struct nvme_fc_lport *lport, struct fc_tlv_desc *tlv)
> +{
> + unsigned int i, pname_count;
> + struct nvme_fc_rport *attached_rport;
> + struct fc_fn_li_desc *li_desc = (struct fc_fn_li_desc *)tlv;
The casting here is pretty gross. Sounds like the FC code should use
a union or better fix fc_fn_li_desc to not duplicate the tag/len
fields?
> + u64 wwpn;
> +
> + wwpn = be64_to_cpu(li_desc->attached_wwpn);
> + attached_rport = nvme_fc_rport_from_wwpn(lport, wwpn);
> + pname_count = be32_to_cpu(li_desc->pname_count);
... and if these were initialized at declartion time thing would
get a lot easier to read.
> +EXPORT_SYMBOL(nvme_fc_fpin_rcv);
All nvme exports are EXPORT_SYMBOL_GPL
Also please split the patch up into nvme core, nvme-fc and
driver changes.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-04-02 12:55 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-02 9:30 [PATCH v2] nvme-fc: FPIN link integrity handling hare
2024-04-02 12:55 ` Christoph Hellwig
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.