All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] nbd: fix false lockdep deadlock warning
@ 2025-06-27  9:23 Yu Kuai
  2025-06-27 11:04 ` Ming Lei
  0 siblings, 1 reply; 12+ messages in thread
From: Yu Kuai @ 2025-06-27  9:23 UTC (permalink / raw)
  To: josef, axboe, ming.lei, hch, nilay, hare
  Cc: linux-block, nbd, linux-kernel, yukuai3, yukuai1, yi.zhang,
	yangerkun, johnny.chenyi

From: Yu Kuai <yukuai3@huawei.com>

The deadlock is reported because there are circular dependency:

t1: disk->open_mutex -> nbd->config_lock

 blkdev_release
  bdev_release
   //lock disk->open_mutex)
   blkdev_put_whole
    nbd_release
     nbd_config_put
        refcount_dec_and_mutex_lock
        //lock nbd->config_lock

t2: nbd->config_lock -> set->update_nr_hwq_lock

 nbd_genl_connect
  //lock nbd->config_lock
  nbd_start_device
   blk_mq_update_nr_hw_queues
   //lock set->update_nr_hwq_lock

t3: set->update_nr_hwq_lock -> disk->open_mutex

 nbd_dev_remove_work
  nbd_dev_remove
   del_gendisk
    down_read(&set->update_nr_hwq_lock);
    __del_gendisk
    mutex_lock(&disk->open_mutex);

This is false warning because t1 and t2 should be synchronized by
nbd->refs, and t1 is still holding the reference while t2 is triggered
when the reference is decreased to 0. However the lock order is broken.

Fix the problem by breaking the dependency from t2, by calling
blk_mq_update_nr_hw_queues() outside of nbd internal config_lock, since
now other context can concurrent with nbd_start_device(), also make sure
they will still return -EBUSY, the difference is that they will not wait
for nbd_start_device() to be done.

Fixes: 98e68f67020c ("block: prevent adding/deleting disk during updating nr_hw_queues")
Reported-by: syzbot+2bcecf3c38cb3e8fdc8d@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6855034f.a00a0220.137b3.0031.GAE@google.com/
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/block/nbd.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7bdc7eb808ea..d43e8e73aeb3 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1457,10 +1457,13 @@ static void nbd_config_put(struct nbd_device *nbd)
 	}
 }
 
-static int nbd_start_device(struct nbd_device *nbd)
+static int nbd_start_device(struct nbd_device *nbd, bool netlink)
+	__releases(&nbd->config_lock)
+	__acquires(&nbd->config_lock)
 {
 	struct nbd_config *config = nbd->config;
 	int num_connections = config->num_connections;
+	struct task_struct *old;
 	int error = 0, i;
 
 	if (nbd->pid)
@@ -1473,8 +1476,21 @@ static int nbd_start_device(struct nbd_device *nbd)
 		return -EINVAL;
 	}
 
-	blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
+	/*
+	 * synchronize with concurrent nbd_start_device() and
+	 * nbd_add_socket()
+	 */
 	nbd->pid = task_pid_nr(current);
+	if (!netlink) {
+		old = nbd->task_setup;
+		nbd->task_setup = current;
+	}
+
+	mutex_unlock(&nbd->config_lock);
+	blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
+	mutex_lock(&nbd->config_lock);
+	if (!netlink)
+		nbd->task_setup = old;
 
 	nbd_parse_flags(nbd);
 
@@ -1524,7 +1540,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd)
 	struct nbd_config *config = nbd->config;
 	int ret;
 
-	ret = nbd_start_device(nbd);
+	ret = nbd_start_device(nbd, false);
 	if (ret)
 		return ret;
 
@@ -1995,7 +2011,7 @@ static struct nbd_device *nbd_find_get_unused(void)
 	lockdep_assert_held(&nbd_index_mutex);
 
 	idr_for_each_entry(&nbd_index_idr, nbd, id) {
-		if (refcount_read(&nbd->config_refs) ||
+		if (refcount_read(&nbd->config_refs) || nbd->pid ||
 		    test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
 			continue;
 		if (refcount_inc_not_zero(&nbd->refs))
@@ -2109,7 +2125,7 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	mutex_lock(&nbd->config_lock);
-	if (refcount_read(&nbd->config_refs)) {
+	if (refcount_read(&nbd->config_refs) || nbd->pid) {
 		mutex_unlock(&nbd->config_lock);
 		nbd_put(nbd);
 		if (index == -1)
@@ -2198,7 +2214,7 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 				goto out;
 		}
 	}
-	ret = nbd_start_device(nbd);
+	ret = nbd_start_device(nbd, true);
 	if (ret)
 		goto out;
 	if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) {
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2025-07-08 11:13 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-27  9:23 [PATCH] nbd: fix false lockdep deadlock warning Yu Kuai
2025-06-27 11:04 ` Ming Lei
2025-06-28  0:48   ` Yu Kuai
2025-07-01 13:28     ` Nilay Shroff
2025-07-02  1:12       ` Yu Kuai
2025-07-02  2:32         ` Ming Lei
2025-07-02  6:22           ` Nilay Shroff
2025-07-02  7:30             ` Yu Kuai
2025-07-05  1:15               ` Yu Kuai
2025-07-08  5:12                 ` Nilay Shroff
2025-07-08  7:34                   ` Ming Lei
2025-07-08 11:13                     ` Nilay Shroff

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.