* [PATCH 1/7] loop: revert "make autoclear operation asynchronous"
2022-01-29 7:14 loop: make autoclear operation synchronous again Tetsuo Handa
@ 2022-01-29 7:14 ` Tetsuo Handa
2022-02-08 14:47 ` [PATCH for 5.17] " Tetsuo Handa
2022-01-29 7:14 ` [PATCH 2/7] loop: clarify __module_get()/module_put() usage Tetsuo Handa
` (5 subsequent siblings)
6 siblings, 1 reply; 12+ messages in thread
From: Tetsuo Handa @ 2022-01-29 7:14 UTC (permalink / raw)
To: Jens Axboe, Christoph Hellwig, Jan Kara, Ming Lei
Cc: linux-block, Tetsuo Handa, kernel test robot
The kernel test robot is reporting that xfstest which does
umount ext2 on xfs
umount xfs
sequence started failing, for commit 322c4293ecc58110 ("loop: make
autoclear operation asynchronous") removed a guarantee that fput() of
backing file is processed before lo_release() from close() returns to
user mode. Revert that commit.
Reported-by: kernel test robot <oliver.sang@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
drivers/block/loop.c | 65 ++++++++++++++++++++------------------------
drivers/block/loop.h | 1 -
2 files changed, 29 insertions(+), 37 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 01cbbfc4e9e2..150012ffb387 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1082,7 +1082,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
return error;
}
-static void __loop_clr_fd(struct loop_device *lo)
+static void __loop_clr_fd(struct loop_device *lo, bool release)
{
struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
@@ -1144,6 +1144,8 @@ static void __loop_clr_fd(struct loop_device *lo)
/* let user-space know about this change */
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
mapping_set_gfp_mask(filp->f_mapping, gfp);
+ /* This is safe: open() is still holding a reference. */
+ module_put(THIS_MODULE);
blk_mq_unfreeze_queue(lo->lo_queue);
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@@ -1151,52 +1153,44 @@ static void __loop_clr_fd(struct loop_device *lo)
if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
int err;
- mutex_lock(&lo->lo_disk->open_mutex);
+ /*
+ * open_mutex has been held already in release path, so don't
+ * acquire it if this function is called in such case.
+ *
+ * If the reread partition isn't from release path, lo_refcnt
+ * must be at least one and it can only become zero when the
+ * current holder is released.
+ */
+ if (!release)
+ mutex_lock(&lo->lo_disk->open_mutex);
err = bdev_disk_changed(lo->lo_disk, false);
- mutex_unlock(&lo->lo_disk->open_mutex);
+ if (!release)
+ mutex_unlock(&lo->lo_disk->open_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo->lo_number, err);
/* Device is gone, no point in returning error */
}
+ /*
+ * lo->lo_state is set to Lo_unbound here after above partscan has
+ * finished. There cannot be anybody else entering __loop_clr_fd() as
+ * Lo_rundown state protects us from all the other places trying to
+ * change the 'lo' device.
+ */
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART;
-
- fput(filp);
-}
-
-static void loop_rundown_completed(struct loop_device *lo)
-{
mutex_lock(&lo->lo_mutex);
lo->lo_state = Lo_unbound;
mutex_unlock(&lo->lo_mutex);
- module_put(THIS_MODULE);
-}
-
-static void loop_rundown_workfn(struct work_struct *work)
-{
- struct loop_device *lo = container_of(work, struct loop_device,
- rundown_work);
- struct block_device *bdev = lo->lo_device;
- struct gendisk *disk = lo->lo_disk;
-
- __loop_clr_fd(lo);
- kobject_put(&bdev->bd_device.kobj);
- module_put(disk->fops->owner);
- loop_rundown_completed(lo);
-}
-static void loop_schedule_rundown(struct loop_device *lo)
-{
- struct block_device *bdev = lo->lo_device;
- struct gendisk *disk = lo->lo_disk;
-
- __module_get(disk->fops->owner);
- kobject_get(&bdev->bd_device.kobj);
- INIT_WORK(&lo->rundown_work, loop_rundown_workfn);
- queue_work(system_long_wq, &lo->rundown_work);
+ /*
+ * Need not hold lo_mutex to fput backing file. Calling fput holding
+ * lo_mutex triggers a circular lock dependency possibility warning as
+ * fput can take open_mutex which is usually taken before lo_mutex.
+ */
+ fput(filp);
}
static int loop_clr_fd(struct loop_device *lo)
@@ -1228,8 +1222,7 @@ static int loop_clr_fd(struct loop_device *lo)
lo->lo_state = Lo_rundown;
mutex_unlock(&lo->lo_mutex);
- __loop_clr_fd(lo);
- loop_rundown_completed(lo);
+ __loop_clr_fd(lo, false);
return 0;
}
@@ -1754,7 +1747,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
* In autoclear mode, stop the loop thread
* and remove configuration after last close.
*/
- loop_schedule_rundown(lo);
+ __loop_clr_fd(lo, true);
return;
} else if (lo->lo_state == Lo_bound) {
/*
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 918a7a2dc025..082d4b6bfc6a 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -56,7 +56,6 @@ struct loop_device {
struct gendisk *lo_disk;
struct mutex lo_mutex;
bool idr_visible;
- struct work_struct rundown_work;
};
struct loop_cmd {
--
2.32.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH for 5.17] loop: revert "make autoclear operation asynchronous"
2022-01-29 7:14 ` [PATCH 1/7] loop: revert "make autoclear operation asynchronous" Tetsuo Handa
@ 2022-02-08 14:47 ` Tetsuo Handa
2022-02-09 8:16 ` Christoph Hellwig
0 siblings, 1 reply; 12+ messages in thread
From: Tetsuo Handa @ 2022-02-08 14:47 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-block, Jan Kara, Christoph Hellwig
The kernel test robot is reporting that xfstest which does
umount ext2 on xfs
umount xfs
sequence started failing, for commit 322c4293ecc58110 ("loop: make
autoclear operation asynchronous") removed a guarantee that fput() of
backing file is processed before lo_release() from close() returns to
user mode. Revert that commit.
Reported-by: kernel test robot <oliver.sang@intel.com>
Acked-by: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
drivers/block/loop.c | 65 ++++++++++++++++++++------------------------
drivers/block/loop.h | 1 -
2 files changed, 29 insertions(+), 37 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 01cbbfc4e9e2..150012ffb387 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1082,7 +1082,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
return error;
}
-static void __loop_clr_fd(struct loop_device *lo)
+static void __loop_clr_fd(struct loop_device *lo, bool release)
{
struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
@@ -1144,6 +1144,8 @@ static void __loop_clr_fd(struct loop_device *lo)
/* let user-space know about this change */
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
mapping_set_gfp_mask(filp->f_mapping, gfp);
+ /* This is safe: open() is still holding a reference. */
+ module_put(THIS_MODULE);
blk_mq_unfreeze_queue(lo->lo_queue);
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@@ -1151,52 +1153,44 @@ static void __loop_clr_fd(struct loop_device *lo)
if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
int err;
- mutex_lock(&lo->lo_disk->open_mutex);
+ /*
+ * open_mutex has been held already in release path, so don't
+ * acquire it if this function is called in such case.
+ *
+ * If the reread partition isn't from release path, lo_refcnt
+ * must be at least one and it can only become zero when the
+ * current holder is released.
+ */
+ if (!release)
+ mutex_lock(&lo->lo_disk->open_mutex);
err = bdev_disk_changed(lo->lo_disk, false);
- mutex_unlock(&lo->lo_disk->open_mutex);
+ if (!release)
+ mutex_unlock(&lo->lo_disk->open_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo->lo_number, err);
/* Device is gone, no point in returning error */
}
+ /*
+ * lo->lo_state is set to Lo_unbound here after above partscan has
+ * finished. There cannot be anybody else entering __loop_clr_fd() as
+ * Lo_rundown state protects us from all the other places trying to
+ * change the 'lo' device.
+ */
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART;
-
- fput(filp);
-}
-
-static void loop_rundown_completed(struct loop_device *lo)
-{
mutex_lock(&lo->lo_mutex);
lo->lo_state = Lo_unbound;
mutex_unlock(&lo->lo_mutex);
- module_put(THIS_MODULE);
-}
-
-static void loop_rundown_workfn(struct work_struct *work)
-{
- struct loop_device *lo = container_of(work, struct loop_device,
- rundown_work);
- struct block_device *bdev = lo->lo_device;
- struct gendisk *disk = lo->lo_disk;
-
- __loop_clr_fd(lo);
- kobject_put(&bdev->bd_device.kobj);
- module_put(disk->fops->owner);
- loop_rundown_completed(lo);
-}
-static void loop_schedule_rundown(struct loop_device *lo)
-{
- struct block_device *bdev = lo->lo_device;
- struct gendisk *disk = lo->lo_disk;
-
- __module_get(disk->fops->owner);
- kobject_get(&bdev->bd_device.kobj);
- INIT_WORK(&lo->rundown_work, loop_rundown_workfn);
- queue_work(system_long_wq, &lo->rundown_work);
+ /*
+ * Need not hold lo_mutex to fput backing file. Calling fput holding
+ * lo_mutex triggers a circular lock dependency possibility warning as
+ * fput can take open_mutex which is usually taken before lo_mutex.
+ */
+ fput(filp);
}
static int loop_clr_fd(struct loop_device *lo)
@@ -1228,8 +1222,7 @@ static int loop_clr_fd(struct loop_device *lo)
lo->lo_state = Lo_rundown;
mutex_unlock(&lo->lo_mutex);
- __loop_clr_fd(lo);
- loop_rundown_completed(lo);
+ __loop_clr_fd(lo, false);
return 0;
}
@@ -1754,7 +1747,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
* In autoclear mode, stop the loop thread
* and remove configuration after last close.
*/
- loop_schedule_rundown(lo);
+ __loop_clr_fd(lo, true);
return;
} else if (lo->lo_state == Lo_bound) {
/*
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 918a7a2dc025..082d4b6bfc6a 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -56,7 +56,6 @@ struct loop_device {
struct gendisk *lo_disk;
struct mutex lo_mutex;
bool idr_visible;
- struct work_struct rundown_work;
};
struct loop_cmd {
--
2.32.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/7] loop: clarify __module_get()/module_put() usage
2022-01-29 7:14 loop: make autoclear operation synchronous again Tetsuo Handa
2022-01-29 7:14 ` [PATCH 1/7] loop: revert "make autoclear operation asynchronous" Tetsuo Handa
@ 2022-01-29 7:14 ` Tetsuo Handa
2022-01-29 7:14 ` [PATCH 3/7] loop: don't hold lo->lo_mutex from __loop_clr_fd() Tetsuo Handa
` (4 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Tetsuo Handa @ 2022-01-29 7:14 UTC (permalink / raw)
To: Jens Axboe, Christoph Hellwig, Jan Kara, Ming Lei
Cc: linux-block, Tetsuo Handa
These are used in order to block loop_exit() while holding backing files.
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
drivers/block/loop.c | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 150012ffb387..3a87c33df2ec 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -959,9 +959,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
return -EBADF;
is_loop = is_loop_device(file);
- /* This is safe, since we have a reference from open(). */
- __module_get(THIS_MODULE);
-
/*
* If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner.
@@ -1027,6 +1024,8 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
lo->lo_backing_file = file;
+ /* Block loop_exit() while holding backing file. */
+ __module_get(THIS_MODULE);
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
@@ -1077,8 +1076,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
bd_abort_claiming(bdev, loop_configure);
out_putf:
fput(file);
- /* This is safe: open() is still holding a reference. */
- module_put(THIS_MODULE);
return error;
}
@@ -1144,8 +1141,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
/* let user-space know about this change */
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
mapping_set_gfp_mask(filp->f_mapping, gfp);
- /* This is safe: open() is still holding a reference. */
- module_put(THIS_MODULE);
blk_mq_unfreeze_queue(lo->lo_queue);
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@@ -1185,12 +1180,9 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
lo->lo_state = Lo_unbound;
mutex_unlock(&lo->lo_mutex);
- /*
- * Need not hold lo_mutex to fput backing file. Calling fput holding
- * lo_mutex triggers a circular lock dependency possibility warning as
- * fput can take open_mutex which is usually taken before lo_mutex.
- */
+ /* Release backing file and unblock loop_exit(). */
fput(filp);
+ module_put(THIS_MODULE);
}
static int loop_clr_fd(struct loop_device *lo)
--
2.32.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 3/7] loop: don't hold lo->lo_mutex from __loop_clr_fd()
2022-01-29 7:14 loop: make autoclear operation synchronous again Tetsuo Handa
2022-01-29 7:14 ` [PATCH 1/7] loop: revert "make autoclear operation asynchronous" Tetsuo Handa
2022-01-29 7:14 ` [PATCH 2/7] loop: clarify __module_get()/module_put() usage Tetsuo Handa
@ 2022-01-29 7:14 ` Tetsuo Handa
2022-01-29 7:14 ` [PATCH 4/7] loop: don't call blk_mq_freeze_queue()/blk_mq_unfreeze_queue() from lo_release() Tetsuo Handa
` (3 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Tetsuo Handa @ 2022-01-29 7:14 UTC (permalink / raw)
To: Jens Axboe, Christoph Hellwig, Jan Kara, Ming Lei
Cc: linux-block, Tetsuo Handa
As a preparation for not holding lo->lo_mutex from lo_open()/lo_release()
paths, move the updating of lo->lo_state in __loop_clr_fd() to callers.
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
drivers/block/loop.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 3a87c33df2ec..14b6f862531b 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1100,11 +1100,9 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
/*
* Since this function is called upon "ioctl(LOOP_CLR_FD)" xor "close()
* after ioctl(LOOP_CLR_FD)", it is a sign of something going wrong if
- * lo->lo_state has changed while waiting for lo->lo_mutex.
+ * lo->lo_state has changed.
*/
- mutex_lock(&lo->lo_mutex);
BUG_ON(lo->lo_state != Lo_rundown);
- mutex_unlock(&lo->lo_mutex);
if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
blk_queue_write_cache(lo->lo_queue, false, false);
@@ -1167,18 +1165,9 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
/* Device is gone, no point in returning error */
}
- /*
- * lo->lo_state is set to Lo_unbound here after above partscan has
- * finished. There cannot be anybody else entering __loop_clr_fd() as
- * Lo_rundown state protects us from all the other places trying to
- * change the 'lo' device.
- */
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART;
- mutex_lock(&lo->lo_mutex);
- lo->lo_state = Lo_unbound;
- mutex_unlock(&lo->lo_mutex);
/* Release backing file and unblock loop_exit(). */
fput(filp);
@@ -1215,6 +1204,10 @@ static int loop_clr_fd(struct loop_device *lo)
mutex_unlock(&lo->lo_mutex);
__loop_clr_fd(lo, false);
+ /* Allow loop_configure() to reuse this device. */
+ mutex_lock(&lo->lo_mutex);
+ lo->lo_state = Lo_unbound;
+ mutex_unlock(&lo->lo_mutex);
return 0;
}
@@ -1740,6 +1733,9 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
* and remove configuration after last close.
*/
__loop_clr_fd(lo, true);
+ mutex_lock(&lo->lo_mutex);
+ lo->lo_state = Lo_unbound;
+ mutex_unlock(&lo->lo_mutex);
return;
} else if (lo->lo_state == Lo_bound) {
/*
--
2.32.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 4/7] loop: don't call blk_mq_freeze_queue()/blk_mq_unfreeze_queue() from lo_release()
2022-01-29 7:14 loop: make autoclear operation synchronous again Tetsuo Handa
` (2 preceding siblings ...)
2022-01-29 7:14 ` [PATCH 3/7] loop: don't hold lo->lo_mutex from __loop_clr_fd() Tetsuo Handa
@ 2022-01-29 7:14 ` Tetsuo Handa
2022-01-29 7:14 ` [PATCH 5/7] loop: don't call destroy_workqueue() " Tetsuo Handa
` (2 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Tetsuo Handa @ 2022-01-29 7:14 UTC (permalink / raw)
To: Jens Axboe, Christoph Hellwig, Jan Kara, Ming Lei
Cc: linux-block, Tetsuo Handa
It turned out that there is no need to call blk_mq_freeze_queue() from
lo_release(), for all pending I/O requests are flushed by the block core
layer before "struct block_device_operations"->release() is called.
As a preparation for not holding lo->lo_mutex from lo_open()/lo_release()
paths, remove blk_mq_freeze_queue()/blk_mq_unfreeze_queue() from
lo_release().
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
drivers/block/loop.c | 17 ++++-------------
1 file changed, 4 insertions(+), 13 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 14b6f862531b..b6435f803061 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1108,7 +1108,8 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
blk_queue_write_cache(lo->lo_queue, false, false);
/* freeze request queue during the transition */
- blk_mq_freeze_queue(lo->lo_queue);
+ if (!release)
+ blk_mq_freeze_queue(lo->lo_queue);
destroy_workqueue(lo->workqueue);
spin_lock_irq(&lo->lo_work_lock);
@@ -1139,7 +1140,8 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
/* let user-space know about this change */
kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
mapping_set_gfp_mask(filp->f_mapping, gfp);
- blk_mq_unfreeze_queue(lo->lo_queue);
+ if (!release)
+ blk_mq_unfreeze_queue(lo->lo_queue);
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@@ -1728,22 +1730,11 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
goto out_unlock;
lo->lo_state = Lo_rundown;
mutex_unlock(&lo->lo_mutex);
- /*
- * In autoclear mode, stop the loop thread
- * and remove configuration after last close.
- */
__loop_clr_fd(lo, true);
mutex_lock(&lo->lo_mutex);
lo->lo_state = Lo_unbound;
mutex_unlock(&lo->lo_mutex);
return;
- } else if (lo->lo_state == Lo_bound) {
- /*
- * Otherwise keep thread (if running) and config,
- * but flush possible ongoing bios in thread.
- */
- blk_mq_freeze_queue(lo->lo_queue);
- blk_mq_unfreeze_queue(lo->lo_queue);
}
out_unlock:
--
2.32.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 5/7] loop: don't call destroy_workqueue() from lo_release()
2022-01-29 7:14 loop: make autoclear operation synchronous again Tetsuo Handa
` (3 preceding siblings ...)
2022-01-29 7:14 ` [PATCH 4/7] loop: don't call blk_mq_freeze_queue()/blk_mq_unfreeze_queue() from lo_release() Tetsuo Handa
@ 2022-01-29 7:14 ` Tetsuo Handa
2022-01-29 7:14 ` [PATCH 6/7] loop: don't hold lo->lo_mutex from lo_open()/lo_release() Tetsuo Handa
2022-01-29 7:15 ` [PATCH 7/7] loop: use WQ_MEM_RECLAIM flag Tetsuo Handa
6 siblings, 0 replies; 12+ messages in thread
From: Tetsuo Handa @ 2022-01-29 7:14 UTC (permalink / raw)
To: Jens Axboe, Christoph Hellwig, Jan Kara, Ming Lei
Cc: linux-block, Tetsuo Handa
syzbot is reporting circular locking problem at __loop_clr_fd() [1], for
commit 87579e9b7d8dc36e ("loop: use worker per cgroup instead of kworker")
is calling destroy_workqueue() with disk->open_mutex held.
But it turned out that there is no need to call flush_workqueue() from
lo_release(), for all pending I/O requests are flushed by the block core
layer before "struct block_device_operations"->release() is called.
In order to silence lockdep warning, defer calling destroy_workqueue()
till loop_remove(). Note that the root cause is that lo_open()/lo_release()
waits for I/O requests with disk->open_mutex held, and we need to avoid
holding lo->lo_mutex from lo_open()/lo_release() paths.
Link: https://syzkaller.appspot.com/bug?extid=643e4ce4b6ad1347d372 [1]
Reported-by: syzbot <syzbot+643e4ce4b6ad1347d372@syzkaller.appspotmail.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
drivers/block/loop.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b6435f803061..481d2371864a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1003,7 +1003,8 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo->lo_flags |= LO_FLAGS_READ_ONLY;
- lo->workqueue = alloc_workqueue("loop%d",
+ if (!lo->workqueue)
+ lo->workqueue = alloc_workqueue("loop%d",
WQ_UNBOUND | WQ_FREEZABLE,
0,
lo->lo_number);
@@ -1108,10 +1109,11 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
blk_queue_write_cache(lo->lo_queue, false, false);
/* freeze request queue during the transition */
- if (!release)
+ if (!release) {
blk_mq_freeze_queue(lo->lo_queue);
-
- destroy_workqueue(lo->workqueue);
+ destroy_workqueue(lo->workqueue);
+ lo->workqueue = NULL;
+ }
spin_lock_irq(&lo->lo_work_lock);
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
idle_list) {
@@ -2050,6 +2052,8 @@ static void loop_remove(struct loop_device *lo)
mutex_lock(&loop_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
mutex_unlock(&loop_ctl_mutex);
+ if (lo->workqueue)
+ destroy_workqueue(lo->workqueue);
/* There is no route which can find this loop device. */
mutex_destroy(&lo->lo_mutex);
kfree(lo);
--
2.32.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 6/7] loop: don't hold lo->lo_mutex from lo_open()/lo_release()
2022-01-29 7:14 loop: make autoclear operation synchronous again Tetsuo Handa
` (4 preceding siblings ...)
2022-01-29 7:14 ` [PATCH 5/7] loop: don't call destroy_workqueue() " Tetsuo Handa
@ 2022-01-29 7:14 ` Tetsuo Handa
2022-01-29 7:15 ` [PATCH 7/7] loop: use WQ_MEM_RECLAIM flag Tetsuo Handa
6 siblings, 0 replies; 12+ messages in thread
From: Tetsuo Handa @ 2022-01-29 7:14 UTC (permalink / raw)
To: Jens Axboe, Christoph Hellwig, Jan Kara, Ming Lei
Cc: linux-block, Tetsuo Handa
Since holding lo->lo_mutex from lo_open()/lo_release() can indirectly
wait for I/O requests via e.g. ioctl(LOOP_SET_STATUS64), we need to avoid
holding lo->lo_mutex from lo_open()/lo_release() paths.
Replace lo->lo_state == Lo_deleting with lo->lo_refcnt == -1, making it
possible for lo_open() to run without holding lo->lo_mutex.
Since nobody except loop_control_remove() will access lo->lo_state and
lo->lo_refcnt when lo->lo_refcnt became 0 in lo_release(), we can avoid
holding lo->lo_mutex from lo_release().
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
drivers/block/loop.c | 76 ++++++++++++++------------------------------
drivers/block/loop.h | 1 -
2 files changed, 23 insertions(+), 54 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 481d2371864a..b45198f2d76b 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1706,41 +1706,25 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
static int lo_open(struct block_device *bdev, fmode_t mode)
{
struct loop_device *lo = bdev->bd_disk->private_data;
- int err;
- err = mutex_lock_killable(&lo->lo_mutex);
- if (err)
- return err;
- if (lo->lo_state == Lo_deleting)
- err = -ENXIO;
- else
- atomic_inc(&lo->lo_refcnt);
- mutex_unlock(&lo->lo_mutex);
- return err;
+ return atomic_inc_unless_negative(&lo->lo_refcnt) ? 0 : -ENXIO;
}
static void lo_release(struct gendisk *disk, fmode_t mode)
{
struct loop_device *lo = disk->private_data;
- mutex_lock(&lo->lo_mutex);
- if (atomic_dec_return(&lo->lo_refcnt))
- goto out_unlock;
-
- if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
- if (lo->lo_state != Lo_bound)
- goto out_unlock;
- lo->lo_state = Lo_rundown;
- mutex_unlock(&lo->lo_mutex);
- __loop_clr_fd(lo, true);
- mutex_lock(&lo->lo_mutex);
- lo->lo_state = Lo_unbound;
- mutex_unlock(&lo->lo_mutex);
+ /*
+ * Since lo_open() and lo_release() are serialized by disk->open_mutex,
+ * and lo->refcnt == 0 means nobody is using this device, we can access
+ * lo->lo_state and lo->lo_flags without holding lo->lo_mutex.
+ */
+ if (atomic_dec_return(&lo->lo_refcnt) ||
+ lo->lo_state != Lo_bound || !(lo->lo_flags & LO_FLAGS_AUTOCLEAR))
return;
- }
-
-out_unlock:
- mutex_unlock(&lo->lo_mutex);
+ lo->lo_state = Lo_rundown;
+ __loop_clr_fd(lo, true);
+ lo->lo_state = Lo_unbound;
}
static const struct block_device_operations lo_fops = {
@@ -2077,42 +2061,28 @@ static int loop_control_remove(int idx)
pr_warn_once("deleting an unspecified loop device is not supported.\n");
return -EINVAL;
}
-
- /* Hide this loop device for serialization. */
+
ret = mutex_lock_killable(&loop_ctl_mutex);
if (ret)
return ret;
lo = idr_find(&loop_index_idr, idx);
+ /* Fail if loop_add() or loop_remove() are in progress. */
if (!lo || !lo->idr_visible)
ret = -ENODEV;
- else
+ /* Fail if somebody is using this loop device. */
+ else if (data_race(lo->lo_state) != Lo_unbound ||
+ atomic_read(&lo->lo_refcnt) > 0)
+ ret = -EBUSY;
+ /* Fail if somebody started using this loop device. */
+ else if (atomic_dec_return(&lo->lo_refcnt) == -1)
lo->idr_visible = false;
- mutex_unlock(&loop_ctl_mutex);
- if (ret)
- return ret;
-
- /* Check whether this loop device can be removed. */
- ret = mutex_lock_killable(&lo->lo_mutex);
- if (ret)
- goto mark_visible;
- if (lo->lo_state != Lo_unbound ||
- atomic_read(&lo->lo_refcnt) > 0) {
- mutex_unlock(&lo->lo_mutex);
+ else {
+ atomic_inc(&lo->lo_refcnt);
ret = -EBUSY;
- goto mark_visible;
}
- /* Mark this loop device no longer open()-able. */
- lo->lo_state = Lo_deleting;
- mutex_unlock(&lo->lo_mutex);
-
- loop_remove(lo);
- return 0;
-
-mark_visible:
- /* Show this loop device again. */
- mutex_lock(&loop_ctl_mutex);
- lo->idr_visible = true;
mutex_unlock(&loop_ctl_mutex);
+ if (!ret)
+ loop_remove(lo);
return ret;
}
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 082d4b6bfc6a..49bfa223072b 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -21,7 +21,6 @@ enum {
Lo_unbound,
Lo_bound,
Lo_rundown,
- Lo_deleting,
};
struct loop_func_table;
--
2.32.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 7/7] loop: use WQ_MEM_RECLAIM flag
2022-01-29 7:14 loop: make autoclear operation synchronous again Tetsuo Handa
` (5 preceding siblings ...)
2022-01-29 7:14 ` [PATCH 6/7] loop: don't hold lo->lo_mutex from lo_open()/lo_release() Tetsuo Handa
@ 2022-01-29 7:15 ` Tetsuo Handa
6 siblings, 0 replies; 12+ messages in thread
From: Tetsuo Handa @ 2022-01-29 7:15 UTC (permalink / raw)
To: Jens Axboe, Christoph Hellwig, Jan Kara, Ming Lei
Cc: linux-block, Tetsuo Handa
Allocating kworker threads on demand has a risk of OOM deadlock.
Make sure that each loop device has at least one execution context.
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
drivers/block/loop.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b45198f2d76b..a2f0397d29e5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1003,11 +1003,20 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo->lo_flags |= LO_FLAGS_READ_ONLY;
+ /*
+ * Allocate a WQ for this loop device. We can't use a global WQ because
+ * an I/O request will hung when number of active work hits concurrency
+ * limit due to stacked loop devices. Also, specify WQ_MEM_RECLAIM in
+ * order to guarantee that loop_process_work() can start processing an
+ * I/O request even under memory pressure. As a result, this allocation
+ * sounds a sort of resource wasting prepared for the worst condition.
+ * We hope that people utilize ioctl(LOOP_CTL_GET_FREE) in order to
+ * create only minimal number of loop devices.
+ */
if (!lo->workqueue)
- lo->workqueue = alloc_workqueue("loop%d",
- WQ_UNBOUND | WQ_FREEZABLE,
- 0,
- lo->lo_number);
+ lo->workqueue = alloc_workqueue("loop%d", WQ_MEM_RECLAIM |
+ WQ_UNBOUND | WQ_FREEZABLE,
+ 0, lo->lo_number);
if (!lo->workqueue) {
error = -ENOMEM;
goto out_unlock;
--
2.32.0
^ permalink raw reply related [flat|nested] 12+ messages in thread