Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv2 0/2] nvme-multipath: fix deadlock in device_add_disk()
@ 2024-10-08 13:57 Hannes Reinecke
  2024-10-08 13:57 ` [PATCH 1/2] nvme: propagate I/O errors during partition scan Hannes Reinecke
  2024-10-08 13:57 ` [PATCH 2/2] nvme-multipath: retry partition scan on errors Hannes Reinecke
  0 siblings, 2 replies; 3+ messages in thread
From: Hannes Reinecke @ 2024-10-08 13:57 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke

From: Hannes Reinecke <hare@suse.de>

Hi all,

I'm having a testcase which repeatedly disables namespaces on the target
assigning new UUID (to simulate namespace remapping) and enable that
namespace again.
To throw in more fun these namespaces have their ANA group ID changes
to simulate namespace moving around in a cluster, where only the paths
local to the cluster node are active, and all other paths are inaccessible.

Essentially it's doing something like:

echo 0 > ${ns}/enable
<random delay>
echo "<dev>" > ${ns}/device_path
echo "<grpid>" > ${ns}/ana_grpid
uuidgen > ${ns}/device_uuid
echo 1 > ${ns}/enable

ie a similar testcase than the previous patchset, only this time I'm
just doing an 'enable/disable' bit without removing the namespace from
the target.
This is causing lockups in device_add_disk(), as the partition scan is
constantly retrying I/O and never completes.

With this patchset I/O errors during partition scan will never be
retried but will cause nvme_mpath_set_live() to fail.
This allows us to retry nvme_mpath_set_live() on the next rescan
to fixup the situation.

As usual, comments and reviews are welcome.

Changes to the original submission:
- Drop patch to simplify the loop in nvme_update_ana_state()
- Rework patches to return I/O errors during partition scan

Hannes Reinecke (2):
  nvme: propagate I/O errors during partition scan
  nvme-multipath: retry partition scan on errors

 drivers/nvme/host/core.c      | 26 ++++++++++++++++++------
 drivers/nvme/host/multipath.c | 38 +++++++++++++++++++++++++++++++++++
 drivers/nvme/host/nvme.h      |  2 ++
 3 files changed, 60 insertions(+), 6 deletions(-)

-- 
2.35.3



^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] nvme: propagate I/O errors during partition scan
  2024-10-08 13:57 [PATCHv2 0/2] nvme-multipath: fix deadlock in device_add_disk() Hannes Reinecke
@ 2024-10-08 13:57 ` Hannes Reinecke
  2024-10-08 13:57 ` [PATCH 2/2] nvme-multipath: retry partition scan on errors Hannes Reinecke
  1 sibling, 0 replies; 3+ messages in thread
From: Hannes Reinecke @ 2024-10-08 13:57 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke

Partition scan is triggered from device_add_disk(), and is needs to
terminate such the namespace scanning can continue.
This patch adds two new flags NVME_NSHEAD_DISK_SCAN to signal that
the 'head' device is currently in partition scan, and NVME_NS_SCAN_FAILED
to signal that I/O errors had been encountered on the namespace during
partition scan.
With these flags we can modify the failover mechanism to count any namespace
with NVME_NS_SCAN_FAILED as a disabled path, and to not queue commands when
all paths are disabled and NVME_NSHEAD_DISK_SCAN is set.

Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
 drivers/nvme/host/core.c      | 26 ++++++++++++++++++++------
 drivers/nvme/host/multipath.c | 18 ++++++++++++++++++
 drivers/nvme/host/nvme.h      |  2 ++
 3 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 651073280f6f..d8581ddd9a2e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4007,8 +4007,10 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
 	 *
 	 * TODO: we should probably schedule a delayed retry here.
 	 */
-	if (ret > 0 && (ret & NVME_STATUS_DNR))
+	if (ret > 0 && (ret & NVME_STATUS_DNR)) {
+		set_bit(NVME_NS_SCAN_FAILED, &ns->flags);
 		nvme_ns_remove(ns);
+	}
 }
 
 static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
@@ -4017,8 +4019,14 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	struct nvme_ns *ns;
 	int ret;
 
-	if (nvme_identify_ns_descs(ctrl, &info))
+	ns = nvme_find_get_ns(ctrl, nsid);
+	if (nvme_identify_ns_descs(ctrl, &info)) {
+		if (ns) {
+			set_bit(NVME_NS_SCAN_FAILED, &ns->flags);
+			nvme_put_ns(ns);
+		}
 		return;
+	}
 
 	if (info.ids.csi != NVME_CSI_NVM && !nvme_multi_css(ctrl)) {
 		dev_warn(ctrl->device,
@@ -4037,17 +4045,23 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	else
 		ret = nvme_ns_info_from_identify(ctrl, &info);
 
-	if (info.is_removed)
-		nvme_ns_remove_by_nsid(ctrl, nsid);
+	if (info.is_removed && ns) {
+		nvme_ns_remove(ns);
+		nvme_put_ns(ns);
+	}
 
 	/*
 	 * Ignore the namespace if it is not ready. We will get an AEN once it
 	 * becomes ready and restart the scan.
 	 */
-	if (ret || !info.is_ready)
+	if (ret || !info.is_ready) {
+		if (ret > 0 && ns) {
+			set_bit(NVME_NS_SCAN_FAILED, &ns->flags);
+			nvme_put_ns(ns);
+		}
 		return;
+	}
 
-	ns = nvme_find_get_ns(ctrl, nsid);
 	if (ns) {
 		nvme_validate_ns(ns, &info);
 		nvme_put_ns(ns);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f72c5a6a2d8e..68eb1df98419 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -92,6 +92,13 @@ void nvme_failover_req(struct request *req)
 
 	nvme_mpath_clear_current_path(ns);
 
+	/*
+	 * Mark the namespace with NS_SCAN_FAILED to ensure
+	 * I/O can be aborted during partition scan.
+	 */
+	if (test_bit(NVME_NSHEAD_DISK_SCAN, &ns->head->flags))
+		set_bit(NVME_NS_SCAN_FAILED, &ns->flags);
+
 	/*
 	 * If we got back an ANA error, we know the controller is alive but not
 	 * ready to serve this namespace.  Kick of a re-read of the ANA
@@ -227,6 +234,7 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
 	list_for_each_entry_rcu(ns, &head->list, siblings) {
 		if (capacity != get_capacity(ns->disk))
 			clear_bit(NVME_NS_READY, &ns->flags);
+		clear_bit(NVME_NS_SCAN_FAILED, &ns->flags);
 	}
 	srcu_read_unlock(&head->srcu, srcu_idx);
 
@@ -239,6 +247,12 @@ static bool nvme_path_is_disabled(struct nvme_ns *ns)
 {
 	enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl);
 
+	if (test_bit(NVME_NSHEAD_DISK_SCAN, &ns->head->flags)) {
+		if (state == NVME_CTRL_DELETING ||
+		    test_bit(NVME_NS_SCAN_FAILED, &ns->flags))
+			return true;
+	}
+
 	/*
 	 * We don't treat NVME_CTRL_DELETING as a disabled path as I/O should
 	 * still be able to complete assuming that the controller is connected.
@@ -423,6 +437,8 @@ static bool nvme_available_path(struct nvme_ns_head *head)
 
 	if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
 		return NULL;
+	if (test_bit(NVME_NSHEAD_DISK_SCAN, &head->flags))
+		return NULL;
 
 	list_for_each_entry_rcu(ns, &head->list, siblings) {
 		if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
@@ -646,8 +662,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
 	 * head.
 	 */
 	if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
+		set_bit(NVME_NSHEAD_DISK_SCAN, &head->flags);
 		rc = device_add_disk(&head->subsys->dev, head->disk,
 				     nvme_ns_attr_groups);
+		clear_bit(NVME_NSHEAD_DISK_SCAN, &head->flags);
 		if (rc) {
 			clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags);
 			return;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 50515ad0f9d6..6089528d5eb8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -493,6 +493,7 @@ struct nvme_ns_head {
 	struct mutex		lock;
 	unsigned long		flags;
 #define NVME_NSHEAD_DISK_LIVE	0
+#define NVME_NSHEAD_DISK_SCAN	1
 	struct nvme_ns __rcu	*current_path[];
 #endif
 };
@@ -527,6 +528,7 @@ struct nvme_ns {
 #define NVME_NS_ANA_PENDING	2
 #define NVME_NS_FORCE_RO	3
 #define NVME_NS_READY		4
+#define NVME_NS_SCAN_FAILED	5
 
 	struct cdev		cdev;
 	struct device		cdev_device;
-- 
2.35.3



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] nvme-multipath: retry partition scan on errors
  2024-10-08 13:57 [PATCHv2 0/2] nvme-multipath: fix deadlock in device_add_disk() Hannes Reinecke
  2024-10-08 13:57 ` [PATCH 1/2] nvme: propagate I/O errors during partition scan Hannes Reinecke
@ 2024-10-08 13:57 ` Hannes Reinecke
  1 sibling, 0 replies; 3+ messages in thread
From: Hannes Reinecke @ 2024-10-08 13:57 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, linux-nvme, Hannes Reinecke

When we hit I/O errors during partition scan we can assume that
partition scanning was incomplete. Rather than continue to work
with an incomplete device we should disable the disk completely,
and wait for a rescan to retry again.

Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
 drivers/nvme/host/multipath.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 68eb1df98419..f70952c6d683 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -243,6 +243,21 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
 	kblockd_schedule_work(&head->requeue_work);
 }
 
+static bool nvme_scan_errors(struct nvme_ns_head *head)
+{
+	int srcu_idx, valid = 0;
+	struct nvme_ns *ns;
+
+	srcu_idx = srcu_read_lock(&head->srcu);
+	list_for_each_entry_rcu(ns, &head->list, siblings) {
+		if (!test_bit(NVME_NS_SCAN_FAILED, &ns->flags))
+			valid++;
+	}
+	srcu_read_unlock(&head->srcu, srcu_idx);
+
+	return !valid;
+}
+
 static bool nvme_path_is_disabled(struct nvme_ns *ns)
 {
 	enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl);
@@ -670,6 +685,11 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
 			clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags);
 			return;
 		}
+		if (nvme_scan_errors(head)) {
+			del_gendisk(head->disk);
+			clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags);
+			return;
+		}
 		nvme_add_ns_head_cdev(head);
 	}
 
-- 
2.35.3



^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-10-08 13:57 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-08 13:57 [PATCHv2 0/2] nvme-multipath: fix deadlock in device_add_disk() Hannes Reinecke
2024-10-08 13:57 ` [PATCH 1/2] nvme: propagate I/O errors during partition scan Hannes Reinecke
2024-10-08 13:57 ` [PATCH 2/2] nvme-multipath: retry partition scan on errors Hannes Reinecke

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox