Re: possible deadlock in blkdev_reread

public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed

* Re: possible deadlock in blkdev_reread_part
       [not found] <001a11446e86e97ceb055cf07f4e@google.com>
@ 2017-11-01 19:02 ` Dmitry Vyukov
  2017-11-05 20:18   ` Eric Biggers
  0 siblings, 1 reply; 3+ messages in thread
From: Dmitry Vyukov @ 2017-11-01 19:02 UTC (permalink / raw)
  To: syzbot; +Cc: axboe, linux-block, LKML, syzkaller-bugs

On Wed, Nov 1, 2017 at 10:01 PM, syzbot
<bot+4684a000d5abdade83fac55b1e7d1f935ef1936e@syzkaller.appspotmail.com>
wrote:
> Hello,
>
> syzkaller hit the following crash on
> e19b205be43d11bff638cad4487008c48d21c103
> git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master
> compiler: gcc (GCC) 7.1.1 20170620
> .config is attached
> Raw console output is attached.
> C reproducer is attached
> syzkaller reproducer is attached. See https://goo.gl/kgGztJ
> for information about syzkaller reproducers
>
>
> ======================================================
> WARNING: possible circular locking dependency detected
> 4.14.0-rc2+ #10 Not tainted
> ------------------------------------------------------
> syzkaller821047/2981 is trying to acquire lock:
>  (&bdev->bd_mutex){+.+.}, at: [<ffffffff8232c60e>]
> blkdev_reread_part+0x1e/0x40 block/ioctl.c:192
>
> but task is already holding lock:
>  (&lo->lo_ctl_mutex#2){+.+.}, at: [<ffffffff83541ef9>]
> lo_compat_ioctl+0x109/0x140 drivers/block/loop.c:1533
>
> which lock already depends on the new lock.
>
>
> the existing dependency chain (in reverse order) is:
>
> -> #1 (&lo->lo_ctl_mutex#2){+.+.}:
>        check_prevs_add kernel/locking/lockdep.c:2020 [inline]
>        validate_chain kernel/locking/lockdep.c:2469 [inline]
>        __lock_acquire+0x328f/0x4620 kernel/locking/lockdep.c:3498
>        lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:4002
>        __mutex_lock_common kernel/locking/mutex.c:756 [inline]
>        __mutex_lock+0x16f/0x19d0 kernel/locking/mutex.c:893
>        mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
>        lo_release+0x6b/0x180 drivers/block/loop.c:1587
>        __blkdev_put+0x602/0x7c0 fs/block_dev.c:1780
>        blkdev_put+0x85/0x4f0 fs/block_dev.c:1845
>        blkdev_close+0x91/0xc0 fs/block_dev.c:1852
>        __fput+0x333/0x7f0 fs/file_table.c:210
>        ____fput+0x15/0x20 fs/file_table.c:244
>        task_work_run+0x199/0x270 kernel/task_work.c:112
>        tracehook_notify_resume include/linux/tracehook.h:191 [inline]
>        exit_to_usermode_loop+0x296/0x310 arch/x86/entry/common.c:162
>        prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
>        syscall_return_slowpath+0x42f/0x510 arch/x86/entry/common.c:266
>        entry_SYSCALL_64_fastpath+0xbc/0xbe
>
> -> #0 (&bdev->bd_mutex){+.+.}:
>        check_prev_add+0x865/0x1520 kernel/locking/lockdep.c:1894
>        check_prevs_add kernel/locking/lockdep.c:2020 [inline]
>        validate_chain kernel/locking/lockdep.c:2469 [inline]
>        __lock_acquire+0x328f/0x4620 kernel/locking/lockdep.c:3498
>        lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:4002
>        __mutex_lock_common kernel/locking/mutex.c:756 [inline]
>        __mutex_lock+0x16f/0x19d0 kernel/locking/mutex.c:893
>        mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
>        blkdev_reread_part+0x1e/0x40 block/ioctl.c:192
>        loop_reread_partitions+0x12f/0x1a0 drivers/block/loop.c:614
>        loop_set_status+0x9ba/0xf60 drivers/block/loop.c:1156
>        loop_set_status_compat+0x92/0xf0 drivers/block/loop.c:1506
>        lo_compat_ioctl+0x114/0x140 drivers/block/loop.c:1534
>        compat_blkdev_ioctl+0x3ba/0x1850 block/compat_ioctl.c:405
>        C_SYSC_ioctl fs/compat_ioctl.c:1593 [inline]
>        compat_SyS_ioctl+0x1d7/0x3290 fs/compat_ioctl.c:1540
>        do_syscall_32_irqs_on arch/x86/entry/common.c:329 [inline]
>        do_fast_syscall_32+0x3f2/0xf05 arch/x86/entry/common.c:391
>        entry_SYSENTER_compat+0x51/0x60 arch/x86/entry/entry_64_compat.S:124
>
> other info that might help us debug this:
>
>  Possible unsafe locking scenario:
>
>        CPU0                    CPU1
>        ----                    ----
>   lock(&lo->lo_ctl_mutex#2);
>                                lock(&bdev->bd_mutex);
>                                lock(&lo->lo_ctl_mutex#2);
>   lock(&bdev->bd_mutex);
>
>  *** DEADLOCK ***
>
> 1 lock held by syzkaller821047/2981:
>  #0:  (&lo->lo_ctl_mutex#2){+.+.}, at: [<ffffffff83541ef9>]
> lo_compat_ioctl+0x109/0x140 drivers/block/loop.c:1533
>
> stack backtrace:
> CPU: 0 PID: 2981 Comm: syzkaller821047 Not tainted 4.14.0-rc2+ #10
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:16 [inline]
>  dump_stack+0x194/0x257 lib/dump_stack.c:52
>  print_circular_bug+0x503/0x710 kernel/locking/lockdep.c:1259
>  check_prev_add+0x865/0x1520 kernel/locking/lockdep.c:1894
>  check_prevs_add kernel/locking/lockdep.c:2020 [inline]
>  validate_chain kernel/locking/lockdep.c:2469 [inline]
>  __lock_acquire+0x328f/0x4620 kernel/locking/lockdep.c:3498
>  lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:4002
>  __mutex_lock_common kernel/locking/mutex.c:756 [inline]
>  __mutex_lock+0x16f/0x19d0 kernel/locking/mutex.c:893
>  mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
>  blkdev_reread_part+0x1e/0x40 block/ioctl.c:192
>  loop_reread_partitions+0x12f/0x1a0 drivers/block/loop.c:614
>  loop_set_status+0x9ba/0xf60 drivers/block/loop.c:1156
>  loop_set_status_compat+0x92/0xf0 drivers/block/loop.c:1506
>  lo_compat_ioctl+0x114/0x140 drivers/block/loop.c:1534
>  compat_blkdev_ioctl+0x3ba/0x1850 block/compat_ioctl.c:405
>  C_SYSC_ioctl fs/compat_ioctl.c:1593 [inline]
>  compat_SyS_ioctl+0x1d7/0x3290 fs/compat_ioctl.c:1540
>  do_syscall_32_irqs_on arch/x86/entry/common.c:329 [inline]
>  do_fast_syscall_32+0x3f2/0xf05 arch/x86/entry/common.c:391
>  entry_SYSENTER_compat+0x51/0x60 arch/x86/entry/entry_64_compat.S:124
> RIP: 0023:0xf7f4bc79
> RSP: 002b:00000000ff90868c EFLAGS: 00000286 ORIG_RAX: 0000000000000036
> RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000004c02
> RDX: 00000


Still happens on linux-next 36ef71cae353f88fd6e095e2aaa3e5953af1685d (Oct 20).
Note repro needs to be compiled with -m32

[  243.819514] ======================================================
[  243.820949] WARNING: possible circular locking dependency detected
[  243.822417] 4.14.0-rc5-next-20171018 #15 Not tainted
[  243.823592] ------------------------------------------------------
[  243.825012] a.out/11871 is trying to acquire lock:
[  243.826182]  (&bdev->bd_mutex){+.+.}, at: [<ffffffff8245f13e>]
blkdev_reread_part+0x1e/0x40
[  243.828317]
[  243.828317] but task is already holding lock:
[  243.829669]  (&lo->lo_ctl_mutex#2){+.+.}, at: [<ffffffff83867189>]
lo_compat_ioctl+0x119/0x150
[  243.831728]
[  243.831728] which lock already depends on the new lock.
[  243.831728]
[  243.833373]
[  243.833373] the existing dependency chain (in reverse order) is:
[  243.834991]
[  243.834991] -> #1 (&lo->lo_ctl_mutex#2){+.+.}:
[  243.836422]        __mutex_lock+0x16f/0x1990
[  243.837474]        mutex_lock_nested+0x16/0x20
[  243.838463]        lo_release+0x7a/0x1d0
[  243.839370]        __blkdev_put+0x66e/0x810
[  243.840300]        blkdev_put+0x98/0x540
[  243.841171]        blkdev_close+0x8b/0xb0
[  243.842101]        __fput+0x354/0x870
[  243.842932]        ____fput+0x15/0x20
[  243.843680]        task_work_run+0x1c6/0x270
[  243.844540]        exit_to_usermode_loop+0x2b9/0x300
[  243.845502]        syscall_return_slowpath+0x425/0x4d0
[  243.846469]        entry_SYSCALL_64_fastpath+0xbc/0xbe
[  243.847598]
[  243.847598] -> #0 (&bdev->bd_mutex){+.+.}:
[  243.848686]        lock_acquire+0x1d3/0x520
[  243.849495]        __mutex_lock+0x16f/0x1990
[  243.850332]        mutex_lock_nested+0x16/0x20
[  243.851204]        blkdev_reread_part+0x1e/0x40
[  243.852053]        loop_reread_partitions+0x14c/0x170
[  243.853049]        loop_set_status+0xac6/0xfd0
[  243.853892]        loop_set_status_compat+0x9c/0xd0
[  243.854841]        lo_compat_ioctl+0x124/0x150
[  243.855664]        compat_blkdev_ioctl+0x3c4/0x1ad0
[  243.856547]        compat_SyS_ioctl+0x1c6/0x3a00
[  243.857365]        do_fast_syscall_32+0x428/0xf67
[  243.858284]        entry_SYSENTER_compat+0x51/0x60
[  243.859189]
[  243.859189] other info that might help us debug this:
[  243.859189]
[  243.860555]  Possible unsafe locking scenario:
[  243.860555]
[  243.861597]        CPU0                    CPU1
[  243.862374]        ----                    ----
[  243.863175]   lock(&lo->lo_ctl_mutex#2);
[  243.863886]                                lock(&bdev->bd_mutex);
[  243.865020]                                lock(&lo->lo_ctl_mutex#2);
[  243.866152]   lock(&bdev->bd_mutex);
[  243.866822]
[  243.866822]  *** DEADLOCK ***



> ---
> This bug is generated by a dumb bot. It may contain errors.
> See https://goo.gl/tpsmEJ for details.
> Direct all questions to syzkaller@googlegroups.com.
> Please credit me with: Reported-by: syzbot <syzkaller@googlegroups.com>
>
> syzbot will keep track of this bug report.
> Once a fix for this bug is committed, please reply to this email with:
> #syz fix: exact-commit-title
> To mark this as a duplicate of another syzbot report, please reply with:
> #syz dup: exact-subject-of-another-report
> If it's a one-off invalid bug report, please reply with:
> #syz invalid
> Note: if the crash happens again, it will cause creation of a new bug
> report.
> Note: all commands must start from beginning of the line.
>
> --
> You received this message because you are subscribed to the Google Groups
> "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to syzkaller-bugs+unsubscribe@googlegroups.com.
> To view this discussion on the web visit
> https://groups.google.com/d/msgid/syzkaller-bugs/001a11446e86e97ceb055cf07f4e%40google.com.
> For more options, visit https://groups.google.com/d/optout.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: possible deadlock in blkdev_reread_part
  2017-11-01 19:02 ` possible deadlock in blkdev_reread_part Dmitry Vyukov
@ 2017-11-05 20:18   ` Eric Biggers
       [not found]     ` <201804162302.GBI43737.OLFSOFQHVJMFtO@I-love.SAKURA.ne.jp>
  0 siblings, 1 reply; 3+ messages in thread
From: Eric Biggers @ 2017-11-05 20:18 UTC (permalink / raw)
  To: Dmitry Vyukov; +Cc: syzbot, axboe, linux-block, LKML, syzkaller-bugs

On Wed, Nov 01, 2017 at 10:02:44PM +0300, 'Dmitry Vyukov' via syzkaller-bugs wrote:
> 
> Still happens on linux-next 36ef71cae353f88fd6e095e2aaa3e5953af1685d (Oct 20).
> Note repro needs to be compiled with -m32
> 
> [  243.819514] ======================================================
> [  243.820949] WARNING: possible circular locking dependency detected
> [  243.822417] 4.14.0-rc5-next-20171018 #15 Not tainted
> [  243.823592] ------------------------------------------------------
> [  243.825012] a.out/11871 is trying to acquire lock:
> [  243.826182]  (&bdev->bd_mutex){+.+.}, at: [<ffffffff8245f13e>]
> blkdev_reread_part+0x1e/0x40
> [  243.828317]
> [  243.828317] but task is already holding lock:
> [  243.829669]  (&lo->lo_ctl_mutex#2){+.+.}, at: [<ffffffff83867189>]
> lo_compat_ioctl+0x119/0x150
> [  243.831728]
> [  243.831728] which lock already depends on the new lock.
> [  243.831728]
> [  243.833373]

Here's a simplified reproducer:

	#include <fcntl.h>
	#include <linux/loop.h>
	#include <sys/ioctl.h>
	#include <unistd.h>

	int main()
	{
		int loopfd, fd;
		struct loop_info info = { .lo_flags = LO_FLAGS_PARTSCAN };

		loopfd = open("/dev/loop0", O_RDWR);

		fd = open("/bin/ls", O_RDONLY);

		ioctl(loopfd, LOOP_SET_FD, fd);

		ioctl(loopfd, LOOP_SET_STATUS, &info);
	}

It still needs to be compiled with -m32.  The reason is that lo_ioctl() has:

	mutex_lock_nested(&lo->lo_ctl_mutex, 1);

but lo_compat_ioctl() has:

	mutex_lock(&lo->lo_ctl_mutex);

But ->lo_ctl_mutex isn't actually being nested under itself, so I don't think
the "nested" annotation is actually appropriate.

It seems that ->bd_mutex is held while opening and closing block devices, which
should rank it above both ->lo_ctl_mutex and loop_index_mutex (see lo_open() and
lo_release()).

But blkdev_reread_part(), which takes ->bd_mutex, is called from some of the
ioctls while ->lo_ctl_mutex is held.

Perhaps we should call blkdev_reread_part() at the end of the ioctls, after
->lo_ctl_mutex has been dropped?  But it looks like that can do I/O to the
device, which probably could race with loop_clr_fd()...

Or perhaps we should just take both locks for the ioctls, in the order
->bd_mutex, then ->lo_ctl_mutex -- and then use __blkdev_reread_part()?

Eric

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2] block/loop: Serialize ioctl operations.
       [not found]       ` <201804202306.FGC05220.OOJFMFOSFHtLQV@I-love.SAKURA.ne.jp>
@ 2018-05-09 10:54         ` Tetsuo Handa
  0 siblings, 0 replies; 3+ messages in thread
From: Tetsuo Handa @ 2018-05-09 10:54 UTC (permalink / raw)
  To: ebiggers3, dvyukov, jack, tytso, gmazyland
  Cc: syzbot+4684a000d5abdade83fac55b1e7d1f935ef1936e, axboe,
	linux-block, syzkaller-bugs, akpm

>>From 86acfa7288c5e6ddab26f430e2bd2f42ad1407f0 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 9 May 2018 13:01:31 +0900
Subject: [PATCH v2] block/loop: Serialize ioctl operations.

syzbot is reporting NULL pointer dereference [1] which is caused by
race condition between ioctl(loop_fd, LOOP_CLR_FD, 0) versus
ioctl(other_loop_fd, LOOP_SET_FD, loop_fd) due to traversing other
loop devices without holding corresponding locks.

syzbot is also reporting circular locking dependency between bdev->bd_mutex
and lo->lo_ctl_mutex [2] which is caused by calling blkdev_reread_part()
with lock held.

Since ioctl() request on loop devices is not frequent operation, we don't
need fine grained locking. Let's use global lock and simplify the locking
order.

Strategy is that the global lock is held upon entry of ioctl() request,
and release it before either starting operations which might deadlock or
leaving ioctl() request. After the global lock is released, current thread
no longer uses "struct loop_device" memory because it might be modified
by next ioctl() request which was waiting for current ioctl() request.

In order to enforce this strategy, this patch inversed
loop_reread_partitions() and loop_unprepare_queue() in loop_clr_fd().
I don't know whether it breaks something, but I don't have testcases.

Since this patch serializes using global lock, race bugs should no longer
exist. Thus, it will be easy to test whether this patch broke something.

[1] https://syzkaller.appspot.com/bug?id=f3cfe26e785d85f9ee259f385515291d21bd80a3
[2] https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d15889

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+bf89c128e05dd6c62523@syzkaller.appspotmail.com>
Reported-by: syzbot <syzbot+4684a000d5abdade83fac55b1e7d1f935ef1936e@syzkaller.appspotmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 231 ++++++++++++++++++++++++++++-----------------------
 drivers/block/loop.h |   1 -
 2 files changed, 128 insertions(+), 104 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 24f6682..b17fd05 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -81,11 +81,50 @@
 #include <linux/uaccess.h>
 
 static DEFINE_IDR(loop_index_idr);
-static DEFINE_MUTEX(loop_index_mutex);
+static DEFINE_MUTEX(loop_mutex);
+static void *loop_mutex_owner; /* == __mutex_owner(&loop_mutex) */
 
 static int max_part;
 static int part_shift;
 
+/*
+ * lock_loop - Lock loop_mutex.
+ */
+static void lock_loop(void)
+{
+	mutex_lock(&loop_mutex);
+	loop_mutex_owner = current;
+}
+
+/*
+ * lock_loop_killable - Lock loop_mutex unless killed.
+ */
+static int lock_loop_killable(void)
+{
+	int err = mutex_lock_killable(&loop_mutex);
+
+	if (err)
+		return err;
+	loop_mutex_owner = current;
+	return 0;
+}
+
+/*
+ * unlock_loop - Unlock loop_mutex as needed.
+ *
+ * Explicitly call this function before calling fput() or blkdev_reread_part()
+ * in order to avoid circular lock dependency. After this function is called,
+ * current thread is no longer allowed to access "struct loop_device" memory,
+ * for another thread would access that memory as soon as loop_mutex is held.
+ */
+static void unlock_loop(void)
+{
+	if (loop_mutex_owner == current) {
+		loop_mutex_owner = NULL;
+		mutex_unlock(&loop_mutex);
+	}
+}
+
 static int transfer_xor(struct loop_device *lo, int cmd,
 			struct page *raw_page, unsigned raw_off,
 			struct page *loop_page, unsigned loop_off,
@@ -626,7 +665,12 @@ static void loop_reread_partitions(struct loop_device *lo,
 				   struct block_device *bdev)
 {
 	int rc;
+	/* Save variables which might change after unlock_loop() is called. */
+	char filename[sizeof(lo->lo_file_name)];
+	const int num = lo->lo_number;
 
+	memmove(filename, lo->lo_file_name, sizeof(filename));
+	unlock_loop();
 	/*
 	 * bd_mutex has been held already in release path, so don't
 	 * acquire it if this function is called in such case.
@@ -641,7 +685,7 @@ static void loop_reread_partitions(struct loop_device *lo,
 		rc = blkdev_reread_part(bdev);
 	if (rc)
 		pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
-			__func__, lo->lo_number, lo->lo_file_name, rc);
+			__func__, num, filename, rc);
 }
 
 static inline int is_loop_device(struct file *file)
@@ -689,6 +733,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	struct inode	*inode;
 	int		error;
 
+	lockdep_assert_held(&loop_mutex);
 	error = -ENXIO;
 	if (lo->lo_state != Lo_bound)
 		goto out;
@@ -726,12 +771,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	loop_update_dio(lo);
 	blk_mq_unfreeze_queue(lo->lo_queue);
 
-	fput(old_file);
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-		loop_reread_partitions(lo, bdev);
+		loop_reread_partitions(lo, bdev); /* calls unlock_loop() */
+	unlock_loop();
+	fput(old_file);
 	return 0;
 
  out_putf:
+	unlock_loop();
 	fput(file);
  out:
 	return error;
@@ -909,6 +956,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	int		error;
 	loff_t		size;
 
+	lockdep_assert_held(&loop_mutex);
 	/* This is safe, since we have a reference from open(). */
 	__module_get(THIS_MODULE);
 
@@ -967,19 +1015,21 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	set_blocksize(bdev, S_ISBLK(inode->i_mode) ?
 		      block_size(inode->i_bdev) : PAGE_SIZE);
 
+	/*
+	 * Grab the block_device to prevent its destruction after we
+	 * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
+	 */
+	bdgrab(bdev);
+
 	lo->lo_state = Lo_bound;
 	if (part_shift)
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-		loop_reread_partitions(lo, bdev);
-
-	/* Grab the block_device to prevent its destruction after we
-	 * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
-	 */
-	bdgrab(bdev);
+		loop_reread_partitions(lo, bdev); /* calls unlock_loop() */
 	return 0;
 
  out_putf:
+	unlock_loop();
 	fput(file);
  out:
 	/* This is safe: open() is still holding a reference. */
@@ -1029,7 +1079,9 @@ static int loop_clr_fd(struct loop_device *lo)
 	struct file *filp = lo->lo_backing_file;
 	gfp_t gfp = lo->old_gfp_mask;
 	struct block_device *bdev = lo->lo_device;
+	bool reread;
 
+	lockdep_assert_held(&loop_mutex);
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
 
@@ -1045,7 +1097,6 @@ static int loop_clr_fd(struct loop_device *lo)
 	 */
 	if (atomic_read(&lo->lo_refcnt) > 1) {
 		lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
-		mutex_unlock(&lo->lo_ctl_mutex);
 		return 0;
 	}
 
@@ -1090,20 +1141,14 @@ static int loop_clr_fd(struct loop_device *lo)
 	/* This is safe: open() is still holding a reference. */
 	module_put(THIS_MODULE);
 	blk_mq_unfreeze_queue(lo->lo_queue);
-
-	if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
-		loop_reread_partitions(lo, bdev);
+	reread = (lo->lo_flags & LO_FLAGS_PARTSCAN) && bdev;
 	lo->lo_flags = 0;
 	if (!part_shift)
 		lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
 	loop_unprepare_queue(lo);
-	mutex_unlock(&lo->lo_ctl_mutex);
-	/*
-	 * Need not hold lo_ctl_mutex to fput backing file.
-	 * Calling fput holding lo_ctl_mutex triggers a circular
-	 * lock dependency possibility warning as fput can take
-	 * bd_mutex which is usually taken before lo_ctl_mutex.
-	 */
+	if (reread)
+		loop_reread_partitions(lo, bdev); /* calls unlock_loop() */
+	unlock_loop();
 	fput(filp);
 	return 0;
 }
@@ -1193,7 +1238,7 @@ static int loop_clr_fd(struct loop_device *lo)
 	     !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
 		lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
-		loop_reread_partitions(lo, lo->lo_device);
+		loop_reread_partitions(lo, lo->lo_device); /* calls unlock_loop() */
 	}
 
 	return err;
@@ -1202,14 +1247,13 @@ static int loop_clr_fd(struct loop_device *lo)
 static int
 loop_get_status(struct loop_device *lo, struct loop_info64 *info)
 {
-	struct file *file;
+	struct path path;
 	struct kstat stat;
 	int ret;
 
-	if (lo->lo_state != Lo_bound) {
-		mutex_unlock(&lo->lo_ctl_mutex);
+	lockdep_assert_held(&loop_mutex);
+	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
-	}
 
 	memset(info, 0, sizeof(*info));
 	info->lo_number = lo->lo_number;
@@ -1226,17 +1270,17 @@ static int loop_clr_fd(struct loop_device *lo)
 		       lo->lo_encrypt_key_size);
 	}
 
-	/* Drop lo_ctl_mutex while we call into the filesystem. */
-	file = get_file(lo->lo_backing_file);
-	mutex_unlock(&lo->lo_ctl_mutex);
-	ret = vfs_getattr(&file->f_path, &stat, STATX_INO,
-			  AT_STATX_SYNC_AS_STAT);
+	/* Drop loop_mutex while we call into the filesystem. */
+	path = lo->lo_backing_file->f_path;
+	path_get(&path);
+	unlock_loop();
+	ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
 	if (!ret) {
 		info->lo_device = huge_encode_dev(stat.dev);
 		info->lo_inode = stat.ino;
 		info->lo_rdevice = huge_encode_dev(stat.rdev);
 	}
-	fput(file);
+	path_put(&path);
 	return ret;
 }
 
@@ -1298,6 +1342,7 @@ static int loop_clr_fd(struct loop_device *lo)
 	struct loop_info info;
 	struct loop_info64 info64;
 
+	lockdep_assert_held(&loop_mutex);
 	if (copy_from_user(&info, arg, sizeof (struct loop_info)))
 		return -EFAULT;
 	loop_info64_from_old(&info, &info64);
@@ -1309,6 +1354,7 @@ static int loop_clr_fd(struct loop_device *lo)
 {
 	struct loop_info64 info64;
 
+	lockdep_assert_held(&loop_mutex);
 	if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
 		return -EFAULT;
 	return loop_set_status(lo, &info64);
@@ -1320,10 +1366,8 @@ static int loop_clr_fd(struct loop_device *lo)
 	struct loop_info64 info64;
 	int err;
 
-	if (!arg) {
-		mutex_unlock(&lo->lo_ctl_mutex);
+	if (!arg)
 		return -EINVAL;
-	}
 	err = loop_get_status(lo, &info64);
 	if (!err)
 		err = loop_info64_to_old(&info64, &info);
@@ -1338,10 +1382,8 @@ static int loop_clr_fd(struct loop_device *lo)
 	struct loop_info64 info64;
 	int err;
 
-	if (!arg) {
-		mutex_unlock(&lo->lo_ctl_mutex);
+	if (!arg)
 		return -EINVAL;
-	}
 	err = loop_get_status(lo, &info64);
 	if (!err && copy_to_user(arg, &info64, sizeof(info64)))
 		err = -EFAULT;
@@ -1351,6 +1393,7 @@ static int loop_clr_fd(struct loop_device *lo)
 
 static int loop_set_capacity(struct loop_device *lo)
 {
+	lockdep_assert_held(&loop_mutex);
 	if (unlikely(lo->lo_state != Lo_bound))
 		return -ENXIO;
 
@@ -1360,6 +1403,8 @@ static int loop_set_capacity(struct loop_device *lo)
 static int loop_set_dio(struct loop_device *lo, unsigned long arg)
 {
 	int error = -ENXIO;
+
+	lockdep_assert_held(&loop_mutex);
 	if (lo->lo_state != Lo_bound)
 		goto out;
 
@@ -1373,6 +1418,7 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
 
 static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
 {
+	lockdep_assert_held(&loop_mutex);
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
 
@@ -1395,12 +1441,10 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 	unsigned int cmd, unsigned long arg)
 {
 	struct loop_device *lo = bdev->bd_disk->private_data;
-	int err;
+	int err = lock_loop_killable();
 
-	err = mutex_lock_killable_nested(&lo->lo_ctl_mutex, 1);
 	if (err)
-		goto out_unlocked;
-
+		return err;
 	switch (cmd) {
 	case LOOP_SET_FD:
 		err = loop_set_fd(lo, mode, bdev, arg);
@@ -1409,10 +1453,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 		err = loop_change_fd(lo, bdev, arg);
 		break;
 	case LOOP_CLR_FD:
-		/* loop_clr_fd would have unlocked lo_ctl_mutex on success */
 		err = loop_clr_fd(lo);
-		if (!err)
-			goto out_unlocked;
 		break;
 	case LOOP_SET_STATUS:
 		err = -EPERM;
@@ -1422,8 +1463,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 		break;
 	case LOOP_GET_STATUS:
 		err = loop_get_status_old(lo, (struct loop_info __user *) arg);
-		/* loop_get_status() unlocks lo_ctl_mutex */
-		goto out_unlocked;
+		break;
 	case LOOP_SET_STATUS64:
 		err = -EPERM;
 		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
@@ -1432,8 +1472,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 		break;
 	case LOOP_GET_STATUS64:
 		err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
-		/* loop_get_status() unlocks lo_ctl_mutex */
-		goto out_unlocked;
+		break;
 	case LOOP_SET_CAPACITY:
 		err = -EPERM;
 		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
@@ -1452,9 +1491,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 	default:
 		err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
 	}
-	mutex_unlock(&lo->lo_ctl_mutex);
-
-out_unlocked:
+	unlock_loop();
 	return err;
 }
 
@@ -1555,6 +1592,7 @@ struct compat_loop_info {
 	struct loop_info64 info64;
 	int ret;
 
+	lockdep_assert_held(&loop_mutex);
 	ret = loop_info64_from_compat(arg, &info64);
 	if (ret < 0)
 		return ret;
@@ -1568,10 +1606,9 @@ struct compat_loop_info {
 	struct loop_info64 info64;
 	int err;
 
-	if (!arg) {
-		mutex_unlock(&lo->lo_ctl_mutex);
+	lockdep_assert_held(&loop_mutex);
+	if (!arg)
 		return -EINVAL;
-	}
 	err = loop_get_status(lo, &info64);
 	if (!err)
 		err = loop_info64_to_compat(&info64, arg);
@@ -1586,20 +1623,16 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 
 	switch(cmd) {
 	case LOOP_SET_STATUS:
-		err = mutex_lock_killable(&lo->lo_ctl_mutex);
-		if (!err) {
+		err = lock_loop_killable();
+		if (!err)
 			err = loop_set_status_compat(lo,
 						     (const struct compat_loop_info __user *)arg);
-			mutex_unlock(&lo->lo_ctl_mutex);
-		}
 		break;
 	case LOOP_GET_STATUS:
-		err = mutex_lock_killable(&lo->lo_ctl_mutex);
-		if (!err) {
+		err = lock_loop_killable();
+		if (!err)
 			err = loop_get_status_compat(lo,
 						     (struct compat_loop_info __user *)arg);
-			/* loop_get_status() unlocks lo_ctl_mutex */
-		}
 		break;
 	case LOOP_SET_CAPACITY:
 	case LOOP_CLR_FD:
@@ -1614,6 +1647,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 		err = -ENOIOCTLCMD;
 		break;
 	}
+	unlock_loop();
 	return err;
 }
 #endif
@@ -1621,37 +1655,30 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 static int lo_open(struct block_device *bdev, fmode_t mode)
 {
 	struct loop_device *lo;
-	int err = 0;
+	int err = lock_loop_killable();
 
-	mutex_lock(&loop_index_mutex);
+	if (err)
+		return err;
 	lo = bdev->bd_disk->private_data;
-	if (!lo) {
+	if (!lo)
 		err = -ENXIO;
-		goto out;
-	}
-
-	atomic_inc(&lo->lo_refcnt);
-out:
-	mutex_unlock(&loop_index_mutex);
+	else
+		atomic_inc(&lo->lo_refcnt);
+	unlock_loop();
 	return err;
 }
 
 static void __lo_release(struct loop_device *lo)
 {
-	int err;
-
 	if (atomic_dec_return(&lo->lo_refcnt))
 		return;
-
-	mutex_lock(&lo->lo_ctl_mutex);
+	lockdep_assert_held(&loop_mutex);
 	if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
 		/*
 		 * In autoclear mode, stop the loop thread
 		 * and remove configuration after last close.
 		 */
-		err = loop_clr_fd(lo);
-		if (!err)
-			return;
+		loop_clr_fd(lo);
 	} else if (lo->lo_state == Lo_bound) {
 		/*
 		 * Otherwise keep thread (if running) and config,
@@ -1660,15 +1687,13 @@ static void __lo_release(struct loop_device *lo)
 		blk_mq_freeze_queue(lo->lo_queue);
 		blk_mq_unfreeze_queue(lo->lo_queue);
 	}
-
-	mutex_unlock(&lo->lo_ctl_mutex);
 }
 
 static void lo_release(struct gendisk *disk, fmode_t mode)
 {
-	mutex_lock(&loop_index_mutex);
+	lock_loop();
 	__lo_release(disk->private_data);
-	mutex_unlock(&loop_index_mutex);
+	unlock_loop();
 }
 
 static const struct block_device_operations lo_fops = {
@@ -1707,10 +1732,8 @@ static int unregister_transfer_cb(int id, void *ptr, void *data)
 	struct loop_device *lo = ptr;
 	struct loop_func_table *xfer = data;
 
-	mutex_lock(&lo->lo_ctl_mutex);
 	if (lo->lo_encryption == xfer)
 		loop_release_xfer(lo);
-	mutex_unlock(&lo->lo_ctl_mutex);
 	return 0;
 }
 
@@ -1722,8 +1745,14 @@ int loop_unregister_transfer(int number)
 	if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
 		return -EINVAL;
 
+	/*
+	 * cleanup_cryptoloop() cannot handle errors because it is called
+	 * from module_exit(). Thus, don't give up upon SIGKILL here.
+	 */
+	lock_loop();
 	xfer_funcs[n] = NULL;
 	idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
+	unlock_loop();
 	return 0;
 }
 
@@ -1891,7 +1920,6 @@ static int loop_add(struct loop_device **l, int i)
 	if (!part_shift)
 		disk->flags |= GENHD_FL_NO_PART_SCAN;
 	disk->flags |= GENHD_FL_EXT_DEVT;
-	mutex_init(&lo->lo_ctl_mutex);
 	atomic_set(&lo->lo_refcnt, 0);
 	lo->lo_number		= i;
 	spin_lock_init(&lo->lo_lock);
@@ -1967,20 +1995,19 @@ static int loop_lookup(struct loop_device **l, int i)
 static struct kobject *loop_probe(dev_t dev, int *part, void *data)
 {
 	struct loop_device *lo;
-	struct kobject *kobj;
-	int err;
+	struct kobject *kobj = NULL;
+	int err = lock_loop_killable();
 
-	mutex_lock(&loop_index_mutex);
+	*part = 0;
+	if (err)
+		return NULL;
 	err = loop_lookup(&lo, MINOR(dev) >> part_shift);
 	if (err < 0)
 		err = loop_add(&lo, MINOR(dev) >> part_shift);
-	if (err < 0)
-		kobj = NULL;
-	else
+	if (err >= 0)
 		kobj = get_disk_and_module(lo->lo_disk);
-	mutex_unlock(&loop_index_mutex);
+	unlock_loop();
 
-	*part = 0;
 	return kobj;
 }
 
@@ -1988,9 +2015,11 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
 			       unsigned long parm)
 {
 	struct loop_device *lo;
-	int ret = -ENOSYS;
+	int ret = lock_loop_killable();
 
-	mutex_lock(&loop_index_mutex);
+	if (ret)
+		return ret;
+	ret = -ENOSYS;
 	switch (cmd) {
 	case LOOP_CTL_ADD:
 		ret = loop_lookup(&lo, parm);
@@ -2004,21 +2033,17 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
 		ret = loop_lookup(&lo, parm);
 		if (ret < 0)
 			break;
-		ret = mutex_lock_killable(&lo->lo_ctl_mutex);
 		if (ret)
 			break;
 		if (lo->lo_state != Lo_unbound) {
 			ret = -EBUSY;
-			mutex_unlock(&lo->lo_ctl_mutex);
 			break;
 		}
 		if (atomic_read(&lo->lo_refcnt) > 0) {
 			ret = -EBUSY;
-			mutex_unlock(&lo->lo_ctl_mutex);
 			break;
 		}
 		lo->lo_disk->private_data = NULL;
-		mutex_unlock(&lo->lo_ctl_mutex);
 		idr_remove(&loop_index_idr, lo->lo_number);
 		loop_remove(lo);
 		break;
@@ -2028,7 +2053,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
 			break;
 		ret = loop_add(&lo, -1);
 	}
-	mutex_unlock(&loop_index_mutex);
+	unlock_loop();
 
 	return ret;
 }
@@ -2112,10 +2137,10 @@ static int __init loop_init(void)
 				  THIS_MODULE, loop_probe, NULL, NULL);
 
 	/* pre-create number of devices given by config or max_loop */
-	mutex_lock(&loop_index_mutex);
+	lock_loop();
 	for (i = 0; i < nr; i++)
 		loop_add(&lo, i);
-	mutex_unlock(&loop_index_mutex);
+	unlock_loop();
 
 	printk(KERN_INFO "loop: module loaded\n");
 	return 0;
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 4d42c7a..af75a5e 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -54,7 +54,6 @@ struct loop_device {
 
 	spinlock_t		lo_lock;
 	int			lo_state;
-	struct mutex		lo_ctl_mutex;
 	struct kthread_worker	worker;
 	struct task_struct	*worker_task;
 	bool			use_dio;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-05-09 10:54 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <001a11446e86e97ceb055cf07f4e@google.com>
2017-11-01 19:02 ` possible deadlock in blkdev_reread_part Dmitry Vyukov
2017-11-05 20:18   ` Eric Biggers
     [not found]     ` <201804162302.GBI43737.OLFSOFQHVJMFtO@I-love.SAKURA.ne.jp>
     [not found]       ` <201804202306.FGC05220.OOJFMFOSFHtLQV@I-love.SAKURA.ne.jp>
2018-05-09 10:54         ` [PATCH v2] block/loop: Serialize ioctl operations Tetsuo Handa

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox