[PATCH] block/genhd: use atomic_t for disk

public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] block/genhd: use atomic_t for disk_event->block
@ 2021-06-01 11:01 Hannes Reinecke
  2021-06-01 13:25 ` Ming Lei
  0 siblings, 1 reply; 3+ messages in thread
From: Hannes Reinecke @ 2021-06-01 11:01 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, linux-block, Linux Kernel Mailinglist,
	Hannes Reinecke

__disk_unblock_events() will call queue_delayed_work() with a '0' argument
under a spin lock. This might cause the queue_work item to be executed
immediately, and run into a deadlock in disk_check_events() waiting for
the lock to be released.

This patch converts the 'blocked' counter into an atomic variable, so we don't
need to hold a spinlock anymore when scheduling the workqueue function.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 block/genhd.c | 36 +++++++++++++-----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 9f8cb7beaad1..07e70f0c9c25 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1379,7 +1379,7 @@ struct disk_events {
 	spinlock_t		lock;
 
 	struct mutex		block_mutex;	/* protects blocking */
-	int			block;		/* event blocking depth */
+	atomic_t		block;		/* event blocking depth */
 	unsigned int		pending;	/* events already sent out */
 	unsigned int		clearing;	/* events being cleared */
 
@@ -1439,8 +1439,6 @@ static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
 void disk_block_events(struct gendisk *disk)
 {
 	struct disk_events *ev = disk->ev;
-	unsigned long flags;
-	bool cancel;
 
 	if (!ev)
 		return;
@@ -1451,11 +1449,7 @@ void disk_block_events(struct gendisk *disk)
 	 */
 	mutex_lock(&ev->block_mutex);
 
-	spin_lock_irqsave(&ev->lock, flags);
-	cancel = !ev->block++;
-	spin_unlock_irqrestore(&ev->lock, flags);
-
-	if (cancel)
+	if (atomic_inc_return(&ev->block) == 1)
 		cancel_delayed_work_sync(&disk->ev->dwork);
 
 	mutex_unlock(&ev->block_mutex);
@@ -1467,23 +1461,19 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
 	unsigned long intv;
 	unsigned long flags;
 
+	if (atomic_dec_return(&ev->block) <= 0) {
+		mutex_unlock(&ev->block_mutex);
+		return;
+	}
 	spin_lock_irqsave(&ev->lock, flags);
-
-	if (WARN_ON_ONCE(ev->block <= 0))
-		goto out_unlock;
-
-	if (--ev->block)
-		goto out_unlock;
-
 	intv = disk_events_poll_jiffies(disk);
+	spin_unlock_irqrestore(&ev->lock, flags);
 	if (check_now)
 		queue_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, 0);
 	else if (intv)
 		queue_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, intv);
-out_unlock:
-	spin_unlock_irqrestore(&ev->lock, flags);
 }
 
 /**
@@ -1523,10 +1513,10 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
 
 	spin_lock_irq(&ev->lock);
 	ev->clearing |= mask;
-	if (!ev->block)
+	spin_unlock_irq(&ev->lock);
+	if (!atomic_read(&ev->block))
 		mod_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, 0);
-	spin_unlock_irq(&ev->lock);
 }
 
 /**
@@ -1638,11 +1628,11 @@ static void disk_check_events(struct disk_events *ev,
 	*clearing_ptr &= ~clearing;
 
 	intv = disk_events_poll_jiffies(disk);
-	if (!ev->block && intv)
+	spin_unlock_irq(&ev->lock);
+	if (!atomic_read(&ev->block) && intv)
 		queue_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, intv);
 
-	spin_unlock_irq(&ev->lock);
 
 	/*
 	 * Tell userland about new events.  Only the events listed in
@@ -1807,7 +1797,7 @@ static void disk_alloc_events(struct gendisk *disk)
 	ev->disk = disk;
 	spin_lock_init(&ev->lock);
 	mutex_init(&ev->block_mutex);
-	ev->block = 1;
+	atomic_set(&ev->block, 1);
 	ev->poll_msecs = -1;
 	INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
 
@@ -1851,6 +1841,6 @@ static void disk_del_events(struct gendisk *disk)
 static void disk_release_events(struct gendisk *disk)
 {
 	/* the block count should be 1 from disk_del_events() */
-	WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
+	WARN_ON_ONCE(disk->ev && atomic_read(&disk->ev->block) != 1);
 	kfree(disk->ev);
 }
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] block/genhd: use atomic_t for disk_event->block
  2021-06-01 11:01 [PATCH] block/genhd: use atomic_t for disk_event->block Hannes Reinecke
@ 2021-06-01 13:25 ` Ming Lei
  2021-06-01 13:41   ` Hannes Reinecke
  0 siblings, 1 reply; 3+ messages in thread
From: Ming Lei @ 2021-06-01 13:25 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Jens Axboe, Christoph Hellwig, linux-block,
	Linux Kernel Mailinglist

On Tue, Jun 01, 2021 at 01:01:45PM +0200, Hannes Reinecke wrote:
> __disk_unblock_events() will call queue_delayed_work() with a '0' argument
> under a spin lock. This might cause the queue_work item to be executed
> immediately, and run into a deadlock in disk_check_events() waiting for
> the lock to be released.

Do you have lockdep warning on this 'deadlock'?

Executed immediately doesn't mean the work fn is run in the current
task context, and it is actually run in one wq worker(task) context, so
__queue_work() usually wakes up one wq worker for running the work fn,
then there shouldn't be the 'deadlock' you mentioned.

Thanks, 
Ming


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] block/genhd: use atomic_t for disk_event->block
  2021-06-01 13:25 ` Ming Lei
@ 2021-06-01 13:41   ` Hannes Reinecke
  0 siblings, 0 replies; 3+ messages in thread
From: Hannes Reinecke @ 2021-06-01 13:41 UTC (permalink / raw)
  To: Ming Lei
  Cc: Jens Axboe, Christoph Hellwig, linux-block,
	Linux Kernel Mailinglist

On 6/1/21 3:25 PM, Ming Lei wrote:
> On Tue, Jun 01, 2021 at 01:01:45PM +0200, Hannes Reinecke wrote:
>> __disk_unblock_events() will call queue_delayed_work() with a '0' argument
>> under a spin lock. This might cause the queue_work item to be executed
>> immediately, and run into a deadlock in disk_check_events() waiting for
>> the lock to be released.
> 
> Do you have lockdep warning on this 'deadlock'?
> 
> Executed immediately doesn't mean the work fn is run in the current
> task context, and it is actually run in one wq worker(task) context, so
> __queue_work() usually wakes up one wq worker for running the work fn,
> then there shouldn't be the 'deadlock' you mentioned.
> 

That's what I thought, too, but then we have a customer report
complaining about a stuck installation, and this kernel message:

> [  990.305908] INFO: task init:1 blocked for more than 491 seconds.
> [  990.311904]       Not tainted 5.3.18-22-default #1
> [  990.316682] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
> [  990.324483] init            D    0     1      0 0x00000000
> [  990.329950] Call Trace:
> [  990.332396]  __schedule+0x28a/0x6d0
> [  990.335876]  ? work_busy+0x80/0x80
> [  990.339267]  schedule+0x2f/0xa0
> [  990.342399]  schedule_timeout+0x1dd/0x300
> [  990.346399]  ? check_preempt_curr+0x29/0x80
> [  990.350569]  ? ttwu_do_wakeup+0x19/0x150
> [  990.354480]  ? work_busy+0x80/0x80
> [  990.357869]  wait_for_completion+0xba/0x140
> [  990.362040]  ? wake_up_q+0xa0/0xa0
> [  990.365430]  __flush_work+0x177/0x1d0
> [  990.369080]  ? worker_detach_from_pool+0xa0/0xa0
> [  990.373682]  __cancel_work_timer+0x12b/0x1b0
> [  990.377940]  ? exact_lock+0xd/0x20
> [  990.381329]  ? kobj_lookup+0x113/0x160
> [  990.385067]  disk_block_events+0x78/0x90
> [  990.388979]  __blkdev_get+0x6d/0x7e0
> [  990.392542]  ? blkdev_get_by_dev+0x40/0x40
> [  990.396627]  do_dentry_open+0x1ea/0x380
> [  990.400450]  path_openat+0x2fc/0x1520
> [  990.404103]  do_filp_open+0x9b/0x110
> [  990.407667]  ? kmem_cache_alloc+0x3d/0x250
> [  990.411749]  ? do_sys_open+0x1bd/0x250
> [  990.415486]  do_sys_open+0x1bd/0x250
> [  990.419052]  do_syscall_64+0x5b/0x1e0
> [  990.422701]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

Which does vanish with this patch.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		        Kernel Storage Architect
hare@suse.de			               +49 911 74053 688
SUSE Software Solutions Germany GmbH, 90409 Nürnberg
GF: F. Imendörffer, HRB 36809 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-06-01 13:41 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-06-01 11:01 [PATCH] block/genhd: use atomic_t for disk_event->block Hannes Reinecke
2021-06-01 13:25 ` Ming Lei
2021-06-01 13:41   ` Hannes Reinecke

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox