[PATCH] make autoclear operation synchronous again

linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] make autoclear operation synchronous again
@ 2021-12-26  7:06 Tetsuo Handa
  2021-12-29 17:29 ` Christoph Hellwig
  0 siblings, 1 reply; 7+ messages in thread
From: Tetsuo Handa @ 2021-12-26  7:06 UTC (permalink / raw)
  To: Jens Axboe, Jan Kara, Dan Schatzberg, Christoph Hellwig,
	linux-block

The kernel test robot is reporting that xfstest can fail at

  umount ext2 on xfs
  umount xfs

sequence, for commit 322c4293ecc58110 ("loop: make autoclear operation
asynchronous") broke what commit ("loop: Make explicit loop device
destruction lazy") wanted to achieve.

Although we cannot guarantee that nobody is holding a reference when
"umount xfs" is called, we should try to close a race window opened
by asynchronous autoclear operation.

It turned out that there is no need to call flush_workqueue() from
__loop_clr_fd(), for blk_mq_freeze_queue() blocks until all pending
"struct work_struct" are processed by loop_process_work().

Revert commit 322c4293ecc58110 ("loop: make autoclear operation
asynchronous"), and simply defer calling destroy_workqueue() till
loop_remove().

Since a loop device is likely reused after once created thanks to
ioctl(LOOP_CTL_GET_FREE), being unable to destroy corresponding WQ
when ioctl(LOOP_CLR_FD) is called should be an acceptable waste.

Reported-by: kernel test robot <oliver.sang@intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Tested-by: syzbot <syzbot+643e4ce4b6ad1347d372@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---

 drivers/block/loop.c | 76 ++++++++++++++++++++------------------------
 drivers/block/loop.h |  1 -
 2 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b1b05c45c07c..93137858b00c 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1006,10 +1006,10 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	    !file->f_op->write_iter)
 		lo->lo_flags |= LO_FLAGS_READ_ONLY;
 
-	lo->workqueue = alloc_workqueue("loop%d",
-					WQ_UNBOUND | WQ_FREEZABLE,
-					0,
-					lo->lo_number);
+	if (!lo->workqueue)
+		lo->workqueue = alloc_workqueue("loop%d",
+						WQ_UNBOUND | WQ_FREEZABLE,
+						0, lo->lo_number);
 	if (!lo->workqueue) {
 		error = -ENOMEM;
 		goto out_unlock;
@@ -1082,7 +1082,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	return error;
 }
 
-static void __loop_clr_fd(struct loop_device *lo)
+static void __loop_clr_fd(struct loop_device *lo, bool release)
 {
 	struct file *filp;
 	gfp_t gfp = lo->old_gfp_mask;
@@ -1115,7 +1115,6 @@ static void __loop_clr_fd(struct loop_device *lo)
 	/* freeze request queue during the transition */
 	blk_mq_freeze_queue(lo->lo_queue);
 
-	destroy_workqueue(lo->workqueue);
 	spin_lock_irq(&lo->lo_work_lock);
 	list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
 				idle_list) {
@@ -1144,6 +1143,8 @@ static void __loop_clr_fd(struct loop_device *lo)
 	/* let user-space know about this change */
 	kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
 	mapping_set_gfp_mask(filp->f_mapping, gfp);
+	/* This is safe: open() is still holding a reference. */
+	module_put(THIS_MODULE);
 	blk_mq_unfreeze_queue(lo->lo_queue);
 
 	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@@ -1151,52 +1152,44 @@ static void __loop_clr_fd(struct loop_device *lo)
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
 		int err;
 
-		mutex_lock(&lo->lo_disk->open_mutex);
+		/*
+		 * open_mutex has been held already in release path, so don't
+		 * acquire it if this function is called in such case.
+		 *
+		 * If the reread partition isn't from release path, lo_refcnt
+		 * must be at least one and it can only become zero when the
+		 * current holder is released.
+		 */
+		if (!release)
+			mutex_lock(&lo->lo_disk->open_mutex);
 		err = bdev_disk_changed(lo->lo_disk, false);
-		mutex_unlock(&lo->lo_disk->open_mutex);
+		if (!release)
+			mutex_unlock(&lo->lo_disk->open_mutex);
 		if (err)
 			pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
 				__func__, lo->lo_number, err);
 		/* Device is gone, no point in returning error */
 	}
 
+	/*
+	 * lo->lo_state is set to Lo_unbound here after above partscan has
+	 * finished. There cannot be anybody else entering __loop_clr_fd() as
+	 * Lo_rundown state protects us from all the other places trying to
+	 * change the 'lo' device.
+	 */
 	lo->lo_flags = 0;
 	if (!part_shift)
 		lo->lo_disk->flags |= GENHD_FL_NO_PART;
-
-	fput(filp);
-}
-
-static void loop_rundown_completed(struct loop_device *lo)
-{
 	mutex_lock(&lo->lo_mutex);
 	lo->lo_state = Lo_unbound;
 	mutex_unlock(&lo->lo_mutex);
-	module_put(THIS_MODULE);
-}
-
-static void loop_rundown_workfn(struct work_struct *work)
-{
-	struct loop_device *lo = container_of(work, struct loop_device,
-					      rundown_work);
-	struct block_device *bdev = lo->lo_device;
-	struct gendisk *disk = lo->lo_disk;
-
-	__loop_clr_fd(lo);
-	kobject_put(&bdev->bd_device.kobj);
-	module_put(disk->fops->owner);
-	loop_rundown_completed(lo);
-}
 
-static void loop_schedule_rundown(struct loop_device *lo)
-{
-	struct block_device *bdev = lo->lo_device;
-	struct gendisk *disk = lo->lo_disk;
-
-	__module_get(disk->fops->owner);
-	kobject_get(&bdev->bd_device.kobj);
-	INIT_WORK(&lo->rundown_work, loop_rundown_workfn);
-	queue_work(system_long_wq, &lo->rundown_work);
+	/*
+	 * Need not hold lo_mutex to fput backing file. Calling fput holding
+	 * lo_mutex triggers a circular lock dependency possibility warning as
+	 * fput can take open_mutex which is usually taken before lo_mutex.
+	 */
+	fput(filp);
 }
 
 static int loop_clr_fd(struct loop_device *lo)
@@ -1228,8 +1221,7 @@ static int loop_clr_fd(struct loop_device *lo)
 	lo->lo_state = Lo_rundown;
 	mutex_unlock(&lo->lo_mutex);
 
-	__loop_clr_fd(lo);
-	loop_rundown_completed(lo);
+	__loop_clr_fd(lo, false);
 	return 0;
 }
 
@@ -1754,7 +1746,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 		 * In autoclear mode, stop the loop thread
 		 * and remove configuration after last close.
 		 */
-		loop_schedule_rundown(lo);
+		__loop_clr_fd(lo, true);
 		return;
 	} else if (lo->lo_state == Lo_bound) {
 		/*
@@ -2080,6 +2072,8 @@ static void loop_remove(struct loop_device *lo)
 	mutex_unlock(&loop_ctl_mutex);
 	/* There is no route which can find this loop device. */
 	mutex_destroy(&lo->lo_mutex);
+	if (lo->workqueue)
+		destroy_workqueue(lo->workqueue);
 	kfree(lo);
 }
 
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 918a7a2dc025..082d4b6bfc6a 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -56,7 +56,6 @@ struct loop_device {
 	struct gendisk		*lo_disk;
 	struct mutex		lo_mutex;
 	bool			idr_visible;
-	struct work_struct      rundown_work;
 };
 
 struct loop_cmd {
-- 
2.32.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] make autoclear operation synchronous again
  2021-12-26  7:06 [PATCH] make autoclear operation synchronous again Tetsuo Handa
@ 2021-12-29 17:29 ` Christoph Hellwig
  2021-12-30 10:52   ` Tetsuo Handa
  0 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2021-12-29 17:29 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Jens Axboe, Jan Kara, Dan Schatzberg, Christoph Hellwig,
	linux-block

The subject line is missing the loop: prefix.

This approach looks very reasonable, but a few comments below:

> -	lo->workqueue = alloc_workqueue("loop%d",
> -					WQ_UNBOUND | WQ_FREEZABLE,
> -					0,
> -					lo->lo_number);
> +	if (!lo->workqueue)
> +		lo->workqueue = alloc_workqueue("loop%d",
> +						WQ_UNBOUND | WQ_FREEZABLE,
> +						0, lo->lo_number);

Instead of having to deal with sometimes present workqueues, why
not move the workqueue allocation to loop_add?

> +	/* This is safe: open() is still holding a reference. */
> +	module_put(THIS_MODULE);

Any reason to move the module_put here insted of keeping it at the
end of the function as the old code did?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] make autoclear operation synchronous again
  2021-12-29 17:29 ` Christoph Hellwig
@ 2021-12-30 10:52   ` Tetsuo Handa
  2022-01-03  8:33     ` Christoph Hellwig
  2022-01-05  6:02     ` Jan Stancek
  0 siblings, 2 replies; 7+ messages in thread
From: Tetsuo Handa @ 2021-12-30 10:52 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jens Axboe, Jan Kara, Dan Schatzberg, linux-block

On 2021/12/30 2:29, Christoph Hellwig wrote:
> The subject line is missing the loop: prefix.

Oops.

> 
> This approach looks very reasonable, but a few comments below:

I prefer task_work approach if it is safe, for it can call destroy_workqueue()
without global locks held. Note that __loop_clear_fd() is also called from
ioctl(LOOP_CLR_FD), and creating locking dependency chain for both with and without
global locks held looks stupid. If we can do without global locks held, why bother
to do with global locks held?

> 
>> -	lo->workqueue = alloc_workqueue("loop%d",
>> -					WQ_UNBOUND | WQ_FREEZABLE,
>> -					0,
>> -					lo->lo_number);
>> +	if (!lo->workqueue)
>> +		lo->workqueue = alloc_workqueue("loop%d",
>> +						WQ_UNBOUND | WQ_FREEZABLE,
>> +						0, lo->lo_number);
> 
> Instead of having to deal with sometimes present workqueues, why
> not move the workqueue allocation to loop_add?

A bit of worrisome thing is that destroy_workqueue() can be called with
major_names_lock held, for loop_add() may be called as probe function from
blk_request_module(). Some unexpected dependency might bite us in future.

We can avoid destroy_workqueue() from loop_add() if we call alloc_workqueue()
after add_disk() succeeded. But in that case calling alloc_workqueue() from
loop_configure() (which is called without global locks like major_names_lock)
sounds safer.

> 
>> +	/* This is safe: open() is still holding a reference. */
>> +	module_put(THIS_MODULE);
> 
> Any reason to move the module_put here insted of keeping it at the
> end of the function as the old code did?

OK. Two patches shown below. Are these look reasonable?



From 1409a49181efcc474fbae2cf4e60cbc37adf34aa Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Thu, 30 Dec 2021 18:41:05 +0900
Subject: [PATCH 1/2] loop: Revert "loop: make autoclear operation asynchronous"

The kernel test robot is reporting that xfstest can fail at

  umount ext2 on xfs
  umount xfs

sequence, for commit 322c4293ecc58110 ("loop: make autoclear operation
asynchronous") broke what commit ("loop: Make explicit loop device
destruction lazy") wanted to achieve.

Although we cannot guarantee that nobody is holding a reference when
"umount xfs" is called, we should try to close a race window opened
by asynchronous autoclear operation.

Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 drivers/block/loop.c | 65 ++++++++++++++++++++------------------------
 drivers/block/loop.h |  1 -
 2 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b1b05c45c07c..e52a8a5e8cbc 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1082,7 +1082,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	return error;
 }
 
-static void __loop_clr_fd(struct loop_device *lo)
+static void __loop_clr_fd(struct loop_device *lo, bool release)
 {
 	struct file *filp;
 	gfp_t gfp = lo->old_gfp_mask;
@@ -1144,6 +1144,8 @@ static void __loop_clr_fd(struct loop_device *lo)
 	/* let user-space know about this change */
 	kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
 	mapping_set_gfp_mask(filp->f_mapping, gfp);
+	/* This is safe: open() is still holding a reference. */
+	module_put(THIS_MODULE);
 	blk_mq_unfreeze_queue(lo->lo_queue);
 
 	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@@ -1151,52 +1153,44 @@ static void __loop_clr_fd(struct loop_device *lo)
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
 		int err;
 
-		mutex_lock(&lo->lo_disk->open_mutex);
+		/*
+		 * open_mutex has been held already in release path, so don't
+		 * acquire it if this function is called in such case.
+		 *
+		 * If the reread partition isn't from release path, lo_refcnt
+		 * must be at least one and it can only become zero when the
+		 * current holder is released.
+		 */
+		if (!release)
+			mutex_lock(&lo->lo_disk->open_mutex);
 		err = bdev_disk_changed(lo->lo_disk, false);
-		mutex_unlock(&lo->lo_disk->open_mutex);
+		if (!release)
+			mutex_unlock(&lo->lo_disk->open_mutex);
 		if (err)
 			pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
 				__func__, lo->lo_number, err);
 		/* Device is gone, no point in returning error */
 	}
 
+	/*
+	 * lo->lo_state is set to Lo_unbound here after above partscan has
+	 * finished. There cannot be anybody else entering __loop_clr_fd() as
+	 * Lo_rundown state protects us from all the other places trying to
+	 * change the 'lo' device.
+	 */
 	lo->lo_flags = 0;
 	if (!part_shift)
 		lo->lo_disk->flags |= GENHD_FL_NO_PART;
-
-	fput(filp);
-}
-
-static void loop_rundown_completed(struct loop_device *lo)
-{
 	mutex_lock(&lo->lo_mutex);
 	lo->lo_state = Lo_unbound;
 	mutex_unlock(&lo->lo_mutex);
-	module_put(THIS_MODULE);
-}
-
-static void loop_rundown_workfn(struct work_struct *work)
-{
-	struct loop_device *lo = container_of(work, struct loop_device,
-					      rundown_work);
-	struct block_device *bdev = lo->lo_device;
-	struct gendisk *disk = lo->lo_disk;
-
-	__loop_clr_fd(lo);
-	kobject_put(&bdev->bd_device.kobj);
-	module_put(disk->fops->owner);
-	loop_rundown_completed(lo);
-}
 
-static void loop_schedule_rundown(struct loop_device *lo)
-{
-	struct block_device *bdev = lo->lo_device;
-	struct gendisk *disk = lo->lo_disk;
-
-	__module_get(disk->fops->owner);
-	kobject_get(&bdev->bd_device.kobj);
-	INIT_WORK(&lo->rundown_work, loop_rundown_workfn);
-	queue_work(system_long_wq, &lo->rundown_work);
+	/*
+	 * Need not hold lo_mutex to fput backing file. Calling fput holding
+	 * lo_mutex triggers a circular lock dependency possibility warning as
+	 * fput can take open_mutex which is usually taken before lo_mutex.
+	 */
+	fput(filp);
 }
 
 static int loop_clr_fd(struct loop_device *lo)
@@ -1228,8 +1222,7 @@ static int loop_clr_fd(struct loop_device *lo)
 	lo->lo_state = Lo_rundown;
 	mutex_unlock(&lo->lo_mutex);
 
-	__loop_clr_fd(lo);
-	loop_rundown_completed(lo);
+	__loop_clr_fd(lo, false);
 	return 0;
 }
 
@@ -1754,7 +1747,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 		 * In autoclear mode, stop the loop thread
 		 * and remove configuration after last close.
 		 */
-		loop_schedule_rundown(lo);
+		__loop_clr_fd(lo, true);
 		return;
 	} else if (lo->lo_state == Lo_bound) {
 		/*
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 918a7a2dc025..082d4b6bfc6a 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -56,7 +56,6 @@ struct loop_device {
 	struct gendisk		*lo_disk;
 	struct mutex		lo_mutex;
 	bool			idr_visible;
-	struct work_struct      rundown_work;
 };
 
 struct loop_cmd {
-- 
2.32.0



From 639bd8cfad4b776094b1d05e8d4802c365e8436a Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Thu, 30 Dec 2021 19:45:00 +0900
Subject: [PATCH 2/2] loop: allocate WQ when creating a loop device

syzbot is reporting circular locking problem at __loop_clr_fd() [1], for
commit 87579e9b7d8dc36e ("loop: use worker per cgroup instead of kworker")
is calling destroy_workqueue() with disk->open_mutex held.

It turned out that there is no need to call flush_workqueue() from
__loop_clr_fd(), for blk_mq_freeze_queue() blocks until all pending
"struct work_struct" are processed by loop_process_work().

Thus, allocate a WQ from loop_add() than loop_configure() and destroy
this WQ from loop_remove() than __loop_clr_fd(). Also, use WQ_MEM_RECLAIM
because this WQ needs to be able to start when writeback operation for
reclaiming memory is in progress.

Ideally, loop_process_work() which this WQ calls should be able to
complete without memory allocation, for e.g. kmalloc_array() from
lo_rw_aio() from do_req_filebacked() from loop_handle_cmd() may be
subjected to infinite loop problem [2], but such change is too big
to make in this patch. This patch tries to make sure that this WQ can
call loop_process_work() as needed.

Link: https://syzkaller.appspot.com/bug?extid=643e4ce4b6ad1347d372 [1]
Link: https://lwn.net/Articles/627419/ [2]
Reported-by: syzbot <syzbot+643e4ce4b6ad1347d372@syzkaller.appspotmail.com>
Suggested-by: Christoph Hellwig <hch@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 drivers/block/loop.c | 48 +++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e52a8a5e8cbc..e7c25d5a40f5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -959,9 +959,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 		return -EBADF;
 	is_loop = is_loop_device(file);
 
-	/* This is safe, since we have a reference from open(). */
-	__module_get(THIS_MODULE);
-
 	/*
 	 * If we don't hold exclusive handle for the device, upgrade to it
 	 * here to avoid changing device under exclusive owner.
@@ -1006,15 +1003,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	    !file->f_op->write_iter)
 		lo->lo_flags |= LO_FLAGS_READ_ONLY;
 
-	lo->workqueue = alloc_workqueue("loop%d",
-					WQ_UNBOUND | WQ_FREEZABLE,
-					0,
-					lo->lo_number);
-	if (!lo->workqueue) {
-		error = -ENOMEM;
-		goto out_unlock;
-	}
-
 	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
 	set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
@@ -1068,6 +1056,8 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 		loop_reread_partitions(lo);
 	if (!(mode & FMODE_EXCL))
 		bd_abort_claiming(bdev, loop_configure);
+	/* Disallow module unloading while holding backing file. */
+	__module_get(THIS_MODULE);
 	return 0;
 
 out_unlock:
@@ -1077,8 +1067,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 		bd_abort_claiming(bdev, loop_configure);
 out_putf:
 	fput(file);
-	/* This is safe: open() is still holding a reference. */
-	module_put(THIS_MODULE);
 	return error;
 }
 
@@ -1115,7 +1103,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
 	/* freeze request queue during the transition */
 	blk_mq_freeze_queue(lo->lo_queue);
 
-	destroy_workqueue(lo->workqueue);
 	spin_lock_irq(&lo->lo_work_lock);
 	list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
 				idle_list) {
@@ -1144,8 +1131,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
 	/* let user-space know about this change */
 	kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
 	mapping_set_gfp_mask(filp->f_mapping, gfp);
-	/* This is safe: open() is still holding a reference. */
-	module_put(THIS_MODULE);
 	blk_mq_unfreeze_queue(lo->lo_queue);
 
 	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@@ -1185,12 +1170,9 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
 	lo->lo_state = Lo_unbound;
 	mutex_unlock(&lo->lo_mutex);
 
-	/*
-	 * Need not hold lo_mutex to fput backing file. Calling fput holding
-	 * lo_mutex triggers a circular lock dependency possibility warning as
-	 * fput can take open_mutex which is usually taken before lo_mutex.
-	 */
+	/* Release backing file and allow module unloading. */
 	fput(filp);
+	module_put(THIS_MODULE);
 }
 
 static int loop_clr_fd(struct loop_device *lo)
@@ -1972,6 +1954,23 @@ static int loop_add(int i)
 		goto out_free_dev;
 	i = err;
 
+	/*
+	 * Allocate a WQ for this loop device. We can't use a global WQ because
+	 * an I/O request will hung when number of active work hits concurrency
+	 * limit due to stacked loop devices. Also, specify WQ_MEM_RECLAIM in
+	 * order to guarantee that loop_process_work() can start processing an
+	 * I/O request even under memory pressure. As a result, this allocation
+	 * sounds a sort of resource wasting prepared for the worst condition.
+	 * We hope that people utilize ioctl(LOOP_CTL_GET_FREE) in order to
+	 * create only minimal number of loop devices.
+	 */
+	lo->workqueue = alloc_workqueue("loop%d", WQ_MEM_RECLAIM | WQ_UNBOUND |
+					WQ_FREEZABLE, 0, i);
+	if (!lo->workqueue) {
+		err = -ENOMEM;
+		goto out_free_idr;
+	}
+
 	lo->tag_set.ops = &loop_mq_ops;
 	lo->tag_set.nr_hw_queues = 1;
 	lo->tag_set.queue_depth = 128;
@@ -1983,7 +1982,7 @@ static int loop_add(int i)
 
 	err = blk_mq_alloc_tag_set(&lo->tag_set);
 	if (err)
-		goto out_free_idr;
+		goto out_free_workqueue;
 
 	disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, lo);
 	if (IS_ERR(disk)) {
@@ -2052,6 +2051,8 @@ static int loop_add(int i)
 	blk_cleanup_disk(disk);
 out_cleanup_tags:
 	blk_mq_free_tag_set(&lo->tag_set);
+out_free_workqueue:
+	destroy_workqueue(lo->workqueue);
 out_free_idr:
 	mutex_lock(&loop_ctl_mutex);
 	idr_remove(&loop_index_idr, i);
@@ -2073,6 +2074,7 @@ static void loop_remove(struct loop_device *lo)
 	mutex_unlock(&loop_ctl_mutex);
 	/* There is no route which can find this loop device. */
 	mutex_destroy(&lo->lo_mutex);
+	destroy_workqueue(lo->workqueue);
 	kfree(lo);
 }
 
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] make autoclear operation synchronous again
  2021-12-30 10:52   ` Tetsuo Handa
@ 2022-01-03  8:33     ` Christoph Hellwig
  2022-01-05  6:02     ` Jan Stancek
  1 sibling, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2022-01-03  8:33 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Christoph Hellwig, Jens Axboe, Jan Kara, Dan Schatzberg,
	linux-block

On Thu, Dec 30, 2021 at 07:52:34PM +0900, Tetsuo Handa wrote:
> > Instead of having to deal with sometimes present workqueues, why
> > not move the workqueue allocation to loop_add?
> 
> A bit of worrisome thing is that destroy_workqueue() can be called with
> major_names_lock held, for loop_add() may be called as probe function from
> blk_request_module(). Some unexpected dependency might bite us in future.
> 
> We can avoid destroy_workqueue() from loop_add() if we call alloc_workqueue()
> after add_disk() succeeded. But in that case calling alloc_workqueue() from
> loop_configure() (which is called without global locks like major_names_lock)
> sounds safer.

Ok.

> OK. Two patches shown below. Are these look reasonable?

They do look reasonable to me based on a quick glance, but please post
them one patch per mail in a separate thread for proper review.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] make autoclear operation synchronous again
  2021-12-30 10:52   ` Tetsuo Handa
  2022-01-03  8:33     ` Christoph Hellwig
@ 2022-01-05  6:02     ` Jan Stancek
  2022-01-05  6:10       ` Tetsuo Handa
  1 sibling, 1 reply; 7+ messages in thread
From: Jan Stancek @ 2022-01-05  6:02 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Christoph Hellwig, Jens Axboe, Jan Kara, Dan Schatzberg,
	linux-block, ltp

On Thu, Dec 30, 2021 at 07:52:34PM +0900, Tetsuo Handa wrote:
>OK. Two patches shown below. Are these look reasonable?
>
>
>
>>From 1409a49181efcc474fbae2cf4e60cbc37adf34aa Mon Sep 17 00:00:00 2001
>From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
>Date: Thu, 30 Dec 2021 18:41:05 +0900
>Subject: [PATCH 1/2] loop: Revert "loop: make autoclear operation asynchronous"
>

Thanks, the revert fixes failures we saw recently in LTP tests,
which do mount/umount in close succession:

# for i in `seq 1 2`;do mount -t iso9660 -o loop /root/isofs.iso /mnt/isofs; umount /mnt/isofs/; done
mount: /mnt/isofs: WARNING: source write-protected, mounted read-only.
mount: /mnt/isofs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error.
umount: /mnt/isofs/: not mounted.

>The kernel test robot is reporting that xfstest can fail at
>
>  umount ext2 on xfs
>  umount xfs
>
>sequence, for commit 322c4293ecc58110 ("loop: make autoclear operation
>asynchronous") broke what commit ("loop: Make explicit loop device
>destruction lazy") wanted to achieve.
>
>Although we cannot guarantee that nobody is holding a reference when
>"umount xfs" is called, we should try to close a race window opened
>by asynchronous autoclear operation.
>
>Reported-by: kernel test robot <oliver.sang@intel.com>
>Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
>---
> drivers/block/loop.c | 65 ++++++++++++++++++++------------------------
> drivers/block/loop.h |  1 -
> 2 files changed, 29 insertions(+), 37 deletions(-)
>
>diff --git a/drivers/block/loop.c b/drivers/block/loop.c
>index b1b05c45c07c..e52a8a5e8cbc 100644
>--- a/drivers/block/loop.c
>+++ b/drivers/block/loop.c
>@@ -1082,7 +1082,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
> 	return error;
> }
>
>-static void __loop_clr_fd(struct loop_device *lo)
>+static void __loop_clr_fd(struct loop_device *lo, bool release)
> {
> 	struct file *filp;
> 	gfp_t gfp = lo->old_gfp_mask;
>@@ -1144,6 +1144,8 @@ static void __loop_clr_fd(struct loop_device *lo)
> 	/* let user-space know about this change */
> 	kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
> 	mapping_set_gfp_mask(filp->f_mapping, gfp);
>+	/* This is safe: open() is still holding a reference. */
>+	module_put(THIS_MODULE);
> 	blk_mq_unfreeze_queue(lo->lo_queue);
>
> 	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
>@@ -1151,52 +1153,44 @@ static void __loop_clr_fd(struct loop_device *lo)
> 	if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
> 		int err;
>
>-		mutex_lock(&lo->lo_disk->open_mutex);
>+		/*
>+		 * open_mutex has been held already in release path, so don't
>+		 * acquire it if this function is called in such case.
>+		 *
>+		 * If the reread partition isn't from release path, lo_refcnt
>+		 * must be at least one and it can only become zero when the
>+		 * current holder is released.
>+		 */
>+		if (!release)
>+			mutex_lock(&lo->lo_disk->open_mutex);
> 		err = bdev_disk_changed(lo->lo_disk, false);
>-		mutex_unlock(&lo->lo_disk->open_mutex);
>+		if (!release)
>+			mutex_unlock(&lo->lo_disk->open_mutex);
> 		if (err)
> 			pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
> 				__func__, lo->lo_number, err);
> 		/* Device is gone, no point in returning error */
> 	}
>
>+	/*
>+	 * lo->lo_state is set to Lo_unbound here after above partscan has
>+	 * finished. There cannot be anybody else entering __loop_clr_fd() as
>+	 * Lo_rundown state protects us from all the other places trying to
>+	 * change the 'lo' device.
>+	 */
> 	lo->lo_flags = 0;
> 	if (!part_shift)
> 		lo->lo_disk->flags |= GENHD_FL_NO_PART;
>-
>-	fput(filp);
>-}
>-
>-static void loop_rundown_completed(struct loop_device *lo)
>-{
> 	mutex_lock(&lo->lo_mutex);
> 	lo->lo_state = Lo_unbound;
> 	mutex_unlock(&lo->lo_mutex);
>-	module_put(THIS_MODULE);
>-}
>-
>-static void loop_rundown_workfn(struct work_struct *work)
>-{
>-	struct loop_device *lo = container_of(work, struct loop_device,
>-					      rundown_work);
>-	struct block_device *bdev = lo->lo_device;
>-	struct gendisk *disk = lo->lo_disk;
>-
>-	__loop_clr_fd(lo);
>-	kobject_put(&bdev->bd_device.kobj);
>-	module_put(disk->fops->owner);
>-	loop_rundown_completed(lo);
>-}
>
>-static void loop_schedule_rundown(struct loop_device *lo)
>-{
>-	struct block_device *bdev = lo->lo_device;
>-	struct gendisk *disk = lo->lo_disk;
>-
>-	__module_get(disk->fops->owner);
>-	kobject_get(&bdev->bd_device.kobj);
>-	INIT_WORK(&lo->rundown_work, loop_rundown_workfn);
>-	queue_work(system_long_wq, &lo->rundown_work);
>+	/*
>+	 * Need not hold lo_mutex to fput backing file. Calling fput holding
>+	 * lo_mutex triggers a circular lock dependency possibility warning as
>+	 * fput can take open_mutex which is usually taken before lo_mutex.
>+	 */
>+	fput(filp);
> }
>
> static int loop_clr_fd(struct loop_device *lo)
>@@ -1228,8 +1222,7 @@ static int loop_clr_fd(struct loop_device *lo)
> 	lo->lo_state = Lo_rundown;
> 	mutex_unlock(&lo->lo_mutex);
>
>-	__loop_clr_fd(lo);
>-	loop_rundown_completed(lo);
>+	__loop_clr_fd(lo, false);
> 	return 0;
> }
>
>@@ -1754,7 +1747,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
> 		 * In autoclear mode, stop the loop thread
> 		 * and remove configuration after last close.
> 		 */
>-		loop_schedule_rundown(lo);
>+		__loop_clr_fd(lo, true);
> 		return;
> 	} else if (lo->lo_state == Lo_bound) {
> 		/*
>diff --git a/drivers/block/loop.h b/drivers/block/loop.h
>index 918a7a2dc025..082d4b6bfc6a 100644
>--- a/drivers/block/loop.h
>+++ b/drivers/block/loop.h
>@@ -56,7 +56,6 @@ struct loop_device {
> 	struct gendisk		*lo_disk;
> 	struct mutex		lo_mutex;
> 	bool			idr_visible;
>-	struct work_struct      rundown_work;
> };
>
> struct loop_cmd {
>-- 
>2.32.0
>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] make autoclear operation synchronous again
  2022-01-05  6:02     ` Jan Stancek
@ 2022-01-05  6:10       ` Tetsuo Handa
  2022-01-20  8:17         ` Christoph Hellwig
  0 siblings, 1 reply; 7+ messages in thread
From: Tetsuo Handa @ 2022-01-05  6:10 UTC (permalink / raw)
  To: Jan Stancek
  Cc: Christoph Hellwig, Jens Axboe, Jan Kara, Dan Schatzberg,
	linux-block, ltp

On 2022/01/05 15:02, Jan Stancek wrote:
> On Thu, Dec 30, 2021 at 07:52:34PM +0900, Tetsuo Handa wrote:
>> OK. Two patches shown below. Are these look reasonable?
>>
>>
>>
>>> From 1409a49181efcc474fbae2cf4e60cbc37adf34aa Mon Sep 17 00:00:00 2001
>> From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
>> Date: Thu, 30 Dec 2021 18:41:05 +0900
>> Subject: [PATCH 1/2] loop: Revert "loop: make autoclear operation asynchronous"
>>
> 
> Thanks, the revert fixes failures we saw recently in LTP tests,
> which do mount/umount in close succession:
> 
> # for i in `seq 1 2`;do mount -t iso9660 -o loop /root/isofs.iso /mnt/isofs; umount /mnt/isofs/; done
> mount: /mnt/isofs: WARNING: source write-protected, mounted read-only.
> mount: /mnt/isofs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error.
> umount: /mnt/isofs/: not mounted.
> 

I'm waiting for Jens to come back to
https://lkml.kernel.org/r/c205dcd2-db55-a35c-e2ef-20193b5ac0da@i-love.sakura.ne.jp .

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] make autoclear operation synchronous again
  2022-01-05  6:10       ` Tetsuo Handa
@ 2022-01-20  8:17         ` Christoph Hellwig
  0 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2022-01-20  8:17 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Jan Stancek, Christoph Hellwig, Jens Axboe, Jan Kara,
	Dan Schatzberg, linux-block, ltp

On Wed, Jan 05, 2022 at 03:10:23PM +0900, Tetsuo Handa wrote:
> On 2022/01/05 15:02, Jan Stancek wrote:
> > On Thu, Dec 30, 2021 at 07:52:34PM +0900, Tetsuo Handa wrote:
> >> OK. Two patches shown below. Are these look reasonable?
> >>
> >>
> >>
> >>> From 1409a49181efcc474fbae2cf4e60cbc37adf34aa Mon Sep 17 00:00:00 2001
> >> From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> >> Date: Thu, 30 Dec 2021 18:41:05 +0900
> >> Subject: [PATCH 1/2] loop: Revert "loop: make autoclear operation asynchronous"
> >>
> > 
> > Thanks, the revert fixes failures we saw recently in LTP tests,
> > which do mount/umount in close succession:
> > 
> > # for i in `seq 1 2`;do mount -t iso9660 -o loop /root/isofs.iso /mnt/isofs; umount /mnt/isofs/; done
> > mount: /mnt/isofs: WARNING: source write-protected, mounted read-only.
> > mount: /mnt/isofs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error.
> > umount: /mnt/isofs/: not mounted.
> > 
> 
> I'm waiting for Jens to come back to
> https://lkml.kernel.org/r/c205dcd2-db55-a35c-e2ef-20193b5ac0da@i-love.sakura.ne.jp .

So I think the version in this thread is what we should merge.

On top of that we should:

 - remove the probe mechanism (patch already sent)
 - stop taking open_mutex in del_gendisk and bdev_disk_changed (I have
   a series for that now)

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-01-20  8:17 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-12-26  7:06 [PATCH] make autoclear operation synchronous again Tetsuo Handa
2021-12-29 17:29 ` Christoph Hellwig
2021-12-30 10:52   ` Tetsuo Handa
2022-01-03  8:33     ` Christoph Hellwig
2022-01-05  6:02     ` Jan Stancek
2022-01-05  6:10       ` Tetsuo Handa
2022-01-20  8:17         ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).