public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed
From: hare@kernel.org
To: Christoph Hellwig <hch@lst.de>
Cc: Keith Busch <kbusch@kernel.org>, Sagi Grimberg <sagi@grimberg.me>,
	linux-nvme@lists.infradead.org, Hannes Reinecke <hare@kernel.org>
Subject: [PATCH] nvme-multipath: fix lockdep warning on shutdown
Date: Fri, 24 Jan 2025 08:14:39 +0100	[thread overview]
Message-ID: <20250124071439.106663-1-hare@kernel.org> (raw)

From: Hannes Reinecke <hare@kernel.org>

During shutdown of multipath devices lockdep complained about a
potential circular locking:

WARNING: possible circular locking dependency detected
(udev-worker)/2792 is trying to acquire lock:
ffff8881012a4348 ((wq_completion)kblockd){+.+.}-{0:0}, at: touch_wq_lockdep_map+0
x26/0x90

but task is already holding lock:
ffff88811e4b7cc8 (&disk->open_mutex){+.+.}-{4:4}, at: bdev_release+0x61/0x1a0
which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:
-> #2 (&disk->open_mutex){+.+.}-{4:4}:
        __mutex_lock+0xa5/0xe00
       nvme_partition_scan_work+0x31/0x60
        process_scheduled_works+0x37c/0x6f0
-> #1 ((work_completion)(&head->partition_scan_work)){+.+.}-{0:0}:
        process_scheduled_works+0x348/0x6f0
        worker_thread+0x127/0x2a0
-> #0 ((wq_completion)kblockd){+.+.}-{0:0}:
        __lock_acquire+0x11f9/0x1790
        lock_acquire+0x245/0x2d0
        touch_wq_lockdep_map+0x3b/0x90
        __flush_work+0x240/0x4b0
        nvme_mpath_remove_disk+0x2b/0x50
        nvme_free_ns_head+0x19/0x90

So the problem is that nvme_mpath_remove_disk() is called with the
disk->open_mutex held, hence calling flush_work on partition_scan_work
(which also will try to lock disk->open_mutex) will deadlock.
Fix this by checking for NVME_NSHEAD_DISK_LIVE before trying to lock
disk->open_mutex.

Fixes: 1f021341eef4 ("nvme-multipath: defer partition scanning")

Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
 block/blk-ioprio.c                |  6 ++++-
 drivers/nvme/host/multipath.c     |  2 ++
 drivers/nvme/target/core.c        | 42 +++++++++++++++----------------
 drivers/nvme/target/io-cmd-bdev.c |  9 +++++++
 4 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 8fff7ccc0ac7..9f1b2069a3c9 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -141,9 +141,13 @@ static struct blkcg_policy ioprio_policy = {
 
 void blkcg_set_ioprio(struct bio *bio)
 {
-	struct ioprio_blkcg *blkcg = blkcg_to_ioprio_blkcg(bio->bi_blkg->blkcg);
+	struct ioprio_blkcg *blkcg;
 	u16 prio;
 
+	if (WARN_ON(!bio->bi_blkg || ! bio->bi_blkg->blkcg))
+		return;
+
+	blkcg = blkcg_to_ioprio_blkcg(bio->bi_blkg->blkcg);
 	if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE)
 		return;
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a85d190942bd..af763ac4d657 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -593,6 +593,8 @@ static void nvme_partition_scan_work(struct work_struct *work)
 	if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
 					     &head->disk->state)))
 		return;
+	if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
+		return;
 
 	mutex_lock(&head->disk->open_mutex);
 	bdev_disk_changed(head->disk, false);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 78ba6162361a..5f7b5d1f78c0 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -423,20 +423,37 @@ void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
 	cancel_delayed_work_sync(&ctrl->ka_work);
 }
 
+static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
+		struct nvmet_ns *ns)
+{
+	enum nvme_ana_state state = port->ana_state[ns->anagrpid];
+
+	if (unlikely(state == NVME_ANA_INACCESSIBLE))
+		return NVME_SC_ANA_INACCESSIBLE;
+	if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
+		return NVME_SC_ANA_PERSISTENT_LOSS;
+	if (unlikely(state == NVME_ANA_CHANGE))
+		return NVME_SC_ANA_TRANSITION;
+	return 0;
+}
+
 u16 nvmet_req_find_ns(struct nvmet_req *req)
 {
 	u32 nsid = le32_to_cpu(req->cmd->common.nsid);
 	struct nvmet_subsys *subsys = nvmet_req_subsys(req);
+	u16 status = 0;
 
 	req->ns = xa_load(&subsys->namespaces, nsid);
 	if (unlikely(!req->ns || !req->ns->enabled)) {
 		req->error_loc = offsetof(struct nvme_common_command, nsid);
 		if (!req->ns) /* ns doesn't exist! */
 			return NVME_SC_INVALID_NS | NVME_STATUS_DNR;
-
-		/* ns exists but it's disabled */
+		status = nvmet_check_ana_state(req->port, req->ns);
+		if (!status)
+			/* ns exists but it's disabled */
+			status = NVME_SC_INTERNAL_PATH_ERROR;
 		req->ns = NULL;
-		return NVME_SC_INTERNAL_PATH_ERROR;
+		return status;
 	}
 
 	percpu_ref_get(&req->ns->ref);
@@ -965,20 +982,6 @@ int nvmet_sq_init(struct nvmet_sq *sq)
 }
 EXPORT_SYMBOL_GPL(nvmet_sq_init);
 
-static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
-		struct nvmet_ns *ns)
-{
-	enum nvme_ana_state state = port->ana_state[ns->anagrpid];
-
-	if (unlikely(state == NVME_ANA_INACCESSIBLE))
-		return NVME_SC_ANA_INACCESSIBLE;
-	if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
-		return NVME_SC_ANA_PERSISTENT_LOSS;
-	if (unlikely(state == NVME_ANA_CHANGE))
-		return NVME_SC_ANA_TRANSITION;
-	return 0;
-}
-
 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
 {
 	if (unlikely(req->ns->readonly)) {
@@ -1040,14 +1043,11 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
 		return nvmet_parse_passthru_io_cmd(req);
 
 	ret = nvmet_req_find_ns(req);
-	if (unlikely(ret))
-		return ret;
-
-	ret = nvmet_check_ana_state(req->port, req->ns);
 	if (unlikely(ret)) {
 		req->error_loc = offsetof(struct nvme_common_command, nsid);
 		return ret;
 	}
+
 	ret = nvmet_io_cmd_check_access(req);
 	if (unlikely(ret)) {
 		req->error_loc = offsetof(struct nvme_common_command, nsid);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 2b09b2c69857..4533e9997c7e 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -285,8 +285,16 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 		bio_init(bio, req->ns->bdev, req->inline_bvec,
 			 ARRAY_SIZE(req->inline_bvec), opf);
 	} else {
+		if (!req->ns->enabled) {
+			nvmet_req_complete(req, NVME_SC_INTERNAL_PATH_ERROR);
+			return;
+		}
 		bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf,
 				GFP_KERNEL);
+		if (!bio) {
+			nvmet_req_complete(req, NVME_SC_INTERNAL);
+			return;
+		}
 	}
 	bio->bi_iter.bi_sector = sector;
 	bio->bi_private = req;
@@ -313,6 +321,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 
 			bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
 					opf, GFP_KERNEL);
+			WARN_ON(!bio);
 			bio->bi_iter.bi_sector = sector;
 
 			bio_chain(bio, prev);
-- 
2.35.3



             reply	other threads:[~2025-01-24  7:14 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-24  7:14 hare [this message]
2025-01-24  8:29 ` [PATCH] nvme-multipath: fix lockdep warning on shutdown Hannes Reinecke

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250124071439.106663-1-hare@kernel.org \
    --to=hare@kernel.org \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox