public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] block: fix race between wbt_enable_default and IO submission
@ 2025-12-10  9:10 Ming Lei
  2025-12-12  5:56 ` Yu Kuai
  0 siblings, 1 reply; 4+ messages in thread
From: Ming Lei @ 2025-12-10  9:10 UTC (permalink / raw)
  To: Jens Axboe, linux-block; +Cc: Ming Lei, Nilay Shroff, Yu Kuai, Guangwu Zhang

When wbt_enable_default() is moved out of queue freezing in elevator_change(),
it can cause the wbt inflight counter to become negative (-1), leading to hung
tasks in the writeback path. Tasks get stuck in wbt_wait() because the counter
is in an inconsistent state.

The issue occurs because wbt_enable_default() could race with IO submission,
allowing the counter to be decremented before proper initialization. This manifests
as:

  rq_wait[0]:
    inflight:             -1
    has_waiters:        True

And results in hung task warnings like:
  task:kworker/u24:39 state:D stack:0 pid:14767
  Call Trace:
    rq_qos_wait+0xb4/0x150
    wbt_wait+0xa9/0x100
    __rq_qos_throttle+0x24/0x40
    blk_mq_submit_bio+0x672/0x7b0
    ...

Fix this by:

1. Splitting wbt_enable_default() into:
   - __wbt_enable_default(): Returns true if wbt_init() should be called
   - wbt_enable_default(): Wrapper for existing callers (no init)
   - wbt_init_enable_default(): New function that checks and inits WBT

2. Using wbt_init_enable_default() in blk_register_queue() to ensure
   proper initialization during queue registration

3. Move wbt_init() out of wbt_enable_default() which is only for enabling
   disabled wbt from bfq and iocost, and wbt_init() isn't needed. Then the
   original lock warning can be avoided.

4. Removing the ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT flag and its handling
   code since it's no longer needed

This ensures WBT is properly initialized before any IO can be submitted,
preventing the counter from going negative.

Cc: Nilay Shroff <nilay@linux.ibm.com>
Cc: Yu Kuai <yukuai3@huawei.com>
Cc: Guangwu Zhang <guazhang@redhat.com>
Fixes: 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/bfq-iosched.c |  2 +-
 block/blk-sysfs.c   |  2 +-
 block/blk-wbt.c     | 20 ++++++++++++++++----
 block/blk-wbt.h     |  5 +++++
 block/elevator.c    |  4 ----
 block/elevator.h    |  1 -
 6 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 4a8d3d96bfe4..6e54b1d3d8bc 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7181,7 +7181,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
 
 	blk_stat_disable_accounting(bfqd->queue);
 	blk_queue_flag_clear(QUEUE_FLAG_DISABLE_WBT_DEF, bfqd->queue);
-	set_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, &e->flags);
+	wbt_enable_default(bfqd->queue->disk);
 
 	kfree(bfqd);
 }
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 8684c57498cc..e0a70d26972b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -932,7 +932,7 @@ int blk_register_queue(struct gendisk *disk)
 		elevator_set_default(q);
 
 	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
-	wbt_enable_default(disk);
+	wbt_init_enable_default(disk);
 
 	/* Now everything is ready and send out KOBJ_ADD uevent */
 	kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index eb8037bae0bd..0974875f77bd 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -699,7 +699,7 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
 /*
  * Enable wbt if defaults are configured that way
  */
-void wbt_enable_default(struct gendisk *disk)
+static bool __wbt_enable_default(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
 	struct rq_qos *rqos;
@@ -716,19 +716,31 @@ void wbt_enable_default(struct gendisk *disk)
 		if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
 			RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
 		mutex_unlock(&disk->rqos_state_mutex);
-		return;
+		return false;
 	}
 	mutex_unlock(&disk->rqos_state_mutex);
 
 	/* Queue not registered? Maybe shutting down... */
 	if (!blk_queue_registered(q))
-		return;
+		return false;
 
 	if (queue_is_mq(q) && enable)
-		wbt_init(disk);
+		return true;
+	return false;
+}
+
+void wbt_enable_default(struct gendisk *disk)
+{
+	__wbt_enable_default(disk);
 }
 EXPORT_SYMBOL_GPL(wbt_enable_default);
 
+void wbt_init_enable_default(struct gendisk *disk)
+{
+	if (__wbt_enable_default(disk))
+		WARN_ON_ONCE(wbt_init(disk));
+}
+
 u64 wbt_default_latency_nsec(struct request_queue *q)
 {
 	/*
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index e5fc653b9b76..925f22475738 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -5,6 +5,7 @@
 #ifdef CONFIG_BLK_WBT
 
 int wbt_init(struct gendisk *disk);
+void wbt_init_enable_default(struct gendisk *disk);
 void wbt_disable_default(struct gendisk *disk);
 void wbt_enable_default(struct gendisk *disk);
 
@@ -16,6 +17,10 @@ u64 wbt_default_latency_nsec(struct request_queue *);
 
 #else
 
+static inline void wbt_init_enable_default(struct gendisk *disk)
+{
+}
+
 static inline void wbt_disable_default(struct gendisk *disk)
 {
 }
diff --git a/block/elevator.c b/block/elevator.c
index 5b37ef44f52d..a2f8b2251dc6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -633,14 +633,10 @@ static int elevator_change_done(struct request_queue *q,
 			.et = ctx->old->et,
 			.data = ctx->old->elevator_data
 		};
-		bool enable_wbt = test_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT,
-				&ctx->old->flags);
 
 		elv_unregister_queue(q, ctx->old);
 		blk_mq_free_sched_res(&res, ctx->old->type, q->tag_set);
 		kobject_put(&ctx->old->kobj);
-		if (enable_wbt)
-			wbt_enable_default(q->disk);
 	}
 	if (ctx->new) {
 		ret = elv_register_queue(q, ctx->new, !ctx->no_uevent);
diff --git a/block/elevator.h b/block/elevator.h
index a9d092c5a9e8..3eb32516be0b 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -156,7 +156,6 @@ struct elevator_queue
 
 #define ELEVATOR_FLAG_REGISTERED	0
 #define ELEVATOR_FLAG_DYING		1
-#define ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT	2
 
 /*
  * block elevator interface
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] block: fix race between wbt_enable_default and IO submission
  2025-12-10  9:10 [PATCH] block: fix race between wbt_enable_default and IO submission Ming Lei
@ 2025-12-12  5:56 ` Yu Kuai
  2025-12-12  7:47   ` Ming Lei
  0 siblings, 1 reply; 4+ messages in thread
From: Yu Kuai @ 2025-12-12  5:56 UTC (permalink / raw)
  To: Ming Lei, Jens Axboe, linux-block
  Cc: Nilay Shroff, Yu Kuai, Guangwu Zhang, yukuai

Hi,

在 2025/12/10 17:10, Ming Lei 写道:
> When wbt_enable_default() is moved out of queue freezing in elevator_change(),
> it can cause the wbt inflight counter to become negative (-1), leading to hung
> tasks in the writeback path. Tasks get stuck in wbt_wait() because the counter
> is in an inconsistent state.
>
> The issue occurs because wbt_enable_default() could race with IO submission,
> allowing the counter to be decremented before proper initialization. This manifests
> as:
>
>    rq_wait[0]:
>      inflight:             -1
>      has_waiters:        True
>
> And results in hung task warnings like:
>    task:kworker/u24:39 state:D stack:0 pid:14767
>    Call Trace:
>      rq_qos_wait+0xb4/0x150
>      wbt_wait+0xa9/0x100
>      __rq_qos_throttle+0x24/0x40
>      blk_mq_submit_bio+0x672/0x7b0
>      ...
>
> Fix this by:
>
> 1. Splitting wbt_enable_default() into:
>     - __wbt_enable_default(): Returns true if wbt_init() should be called
>     - wbt_enable_default(): Wrapper for existing callers (no init)
>     - wbt_init_enable_default(): New function that checks and inits WBT
>
> 2. Using wbt_init_enable_default() in blk_register_queue() to ensure
>     proper initialization during queue registration
>
> 3. Move wbt_init() out of wbt_enable_default() which is only for enabling
>     disabled wbt from bfq and iocost, and wbt_init() isn't needed. Then the
>     original lock warning can be avoided.
>
> 4. Removing the ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT flag and its handling
>     code since it's no longer needed
>
> This ensures WBT is properly initialized before any IO can be submitted,
> preventing the counter from going negative.
>
> Cc: Nilay Shroff <nilay@linux.ibm.com>
> Cc: Yu Kuai <yukuai3@huawei.com>
> Cc: Guangwu Zhang <guazhang@redhat.com>
> Fixes: 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()")
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>   block/bfq-iosched.c |  2 +-
>   block/blk-sysfs.c   |  2 +-
>   block/blk-wbt.c     | 20 ++++++++++++++++----
>   block/blk-wbt.h     |  5 +++++
>   block/elevator.c    |  4 ----
>   block/elevator.h    |  1 -
>   6 files changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
> index 4a8d3d96bfe4..6e54b1d3d8bc 100644
> --- a/block/bfq-iosched.c
> +++ b/block/bfq-iosched.c
> @@ -7181,7 +7181,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
>   
>   	blk_stat_disable_accounting(bfqd->queue);
>   	blk_queue_flag_clear(QUEUE_FLAG_DISABLE_WBT_DEF, bfqd->queue);
> -	set_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, &e->flags);
> +	wbt_enable_default(bfqd->queue->disk);
>   
>   	kfree(bfqd);
>   }
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 8684c57498cc..e0a70d26972b 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -932,7 +932,7 @@ int blk_register_queue(struct gendisk *disk)
>   		elevator_set_default(q);
>   
>   	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
> -	wbt_enable_default(disk);
> +	wbt_init_enable_default(disk);
>   
>   	/* Now everything is ready and send out KOBJ_ADD uevent */
>   	kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> index eb8037bae0bd..0974875f77bd 100644
> --- a/block/blk-wbt.c
> +++ b/block/blk-wbt.c
> @@ -699,7 +699,7 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
>   /*
>    * Enable wbt if defaults are configured that way
>    */
> -void wbt_enable_default(struct gendisk *disk)
> +static bool __wbt_enable_default(struct gendisk *disk)
>   {
>   	struct request_queue *q = disk->queue;
>   	struct rq_qos *rqos;
> @@ -716,19 +716,31 @@ void wbt_enable_default(struct gendisk *disk)
>   		if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
>   			RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;

Is this problem due to above state? Changing the state is not under queue frozen.
The commit message is not quite clear to me. If so, the changes looks good to me,
by setting the state with queue frozen.

Thanks,
Kuai

>   		mutex_unlock(&disk->rqos_state_mutex);
> -		return;
> +		return false;
>   	}
>   	mutex_unlock(&disk->rqos_state_mutex);
>   
>   	/* Queue not registered? Maybe shutting down... */
>   	if (!blk_queue_registered(q))
> -		return;
> +		return false;
>   
>   	if (queue_is_mq(q) && enable)
> -		wbt_init(disk);
> +		return true;
> +	return false;
> +}
> +
> +void wbt_enable_default(struct gendisk *disk)
> +{
> +	__wbt_enable_default(disk);
>   }
>   EXPORT_SYMBOL_GPL(wbt_enable_default);
>   
> +void wbt_init_enable_default(struct gendisk *disk)
> +{
> +	if (__wbt_enable_default(disk))
> +		WARN_ON_ONCE(wbt_init(disk));
> +}
> +
>   u64 wbt_default_latency_nsec(struct request_queue *q)
>   {
>   	/*
> diff --git a/block/blk-wbt.h b/block/blk-wbt.h
> index e5fc653b9b76..925f22475738 100644
> --- a/block/blk-wbt.h
> +++ b/block/blk-wbt.h
> @@ -5,6 +5,7 @@
>   #ifdef CONFIG_BLK_WBT
>   
>   int wbt_init(struct gendisk *disk);
> +void wbt_init_enable_default(struct gendisk *disk);
>   void wbt_disable_default(struct gendisk *disk);
>   void wbt_enable_default(struct gendisk *disk);
>   
> @@ -16,6 +17,10 @@ u64 wbt_default_latency_nsec(struct request_queue *);
>   
>   #else
>   
> +static inline void wbt_init_enable_default(struct gendisk *disk)
> +{
> +}
> +
>   static inline void wbt_disable_default(struct gendisk *disk)
>   {
>   }
> diff --git a/block/elevator.c b/block/elevator.c
> index 5b37ef44f52d..a2f8b2251dc6 100644
> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -633,14 +633,10 @@ static int elevator_change_done(struct request_queue *q,
>   			.et = ctx->old->et,
>   			.data = ctx->old->elevator_data
>   		};
> -		bool enable_wbt = test_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT,
> -				&ctx->old->flags);
>   
>   		elv_unregister_queue(q, ctx->old);
>   		blk_mq_free_sched_res(&res, ctx->old->type, q->tag_set);
>   		kobject_put(&ctx->old->kobj);
> -		if (enable_wbt)
> -			wbt_enable_default(q->disk);
>   	}
>   	if (ctx->new) {
>   		ret = elv_register_queue(q, ctx->new, !ctx->no_uevent);
> diff --git a/block/elevator.h b/block/elevator.h
> index a9d092c5a9e8..3eb32516be0b 100644
> --- a/block/elevator.h
> +++ b/block/elevator.h
> @@ -156,7 +156,6 @@ struct elevator_queue
>   
>   #define ELEVATOR_FLAG_REGISTERED	0
>   #define ELEVATOR_FLAG_DYING		1
> -#define ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT	2
>   
>   /*
>    * block elevator interface

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] block: fix race between wbt_enable_default and IO submission
  2025-12-12  5:56 ` Yu Kuai
@ 2025-12-12  7:47   ` Ming Lei
  2025-12-12 13:40     ` Nilay Shroff
  0 siblings, 1 reply; 4+ messages in thread
From: Ming Lei @ 2025-12-12  7:47 UTC (permalink / raw)
  To: Yu Kuai; +Cc: Jens Axboe, linux-block, Nilay Shroff, Yu Kuai, Guangwu Zhang

On Fri, Dec 12, 2025 at 01:56:20PM +0800, Yu Kuai wrote:
> Hi,
> 
> 在 2025/12/10 17:10, Ming Lei 写道:
> > When wbt_enable_default() is moved out of queue freezing in elevator_change(),
> > it can cause the wbt inflight counter to become negative (-1), leading to hung
> > tasks in the writeback path. Tasks get stuck in wbt_wait() because the counter
> > is in an inconsistent state.
> >
> > The issue occurs because wbt_enable_default() could race with IO submission,
> > allowing the counter to be decremented before proper initialization. This manifests
> > as:
> >
> >    rq_wait[0]:
> >      inflight:             -1
> >      has_waiters:        True
> >
> > And results in hung task warnings like:
> >    task:kworker/u24:39 state:D stack:0 pid:14767
> >    Call Trace:
> >      rq_qos_wait+0xb4/0x150
> >      wbt_wait+0xa9/0x100
> >      __rq_qos_throttle+0x24/0x40
> >      blk_mq_submit_bio+0x672/0x7b0
> >      ...
> >
> > Fix this by:
> >
> > 1. Splitting wbt_enable_default() into:
> >     - __wbt_enable_default(): Returns true if wbt_init() should be called
> >     - wbt_enable_default(): Wrapper for existing callers (no init)
> >     - wbt_init_enable_default(): New function that checks and inits WBT
> >
> > 2. Using wbt_init_enable_default() in blk_register_queue() to ensure
> >     proper initialization during queue registration
> >
> > 3. Move wbt_init() out of wbt_enable_default() which is only for enabling
> >     disabled wbt from bfq and iocost, and wbt_init() isn't needed. Then the
> >     original lock warning can be avoided.
> >
> > 4. Removing the ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT flag and its handling
> >     code since it's no longer needed
> >
> > This ensures WBT is properly initialized before any IO can be submitted,
> > preventing the counter from going negative.
> >
> > Cc: Nilay Shroff <nilay@linux.ibm.com>
> > Cc: Yu Kuai <yukuai3@huawei.com>
> > Cc: Guangwu Zhang <guazhang@redhat.com>
> > Fixes: 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()")
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >   block/bfq-iosched.c |  2 +-
> >   block/blk-sysfs.c   |  2 +-
> >   block/blk-wbt.c     | 20 ++++++++++++++++----
> >   block/blk-wbt.h     |  5 +++++
> >   block/elevator.c    |  4 ----
> >   block/elevator.h    |  1 -
> >   6 files changed, 23 insertions(+), 11 deletions(-)
> >
> > diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
> > index 4a8d3d96bfe4..6e54b1d3d8bc 100644
> > --- a/block/bfq-iosched.c
> > +++ b/block/bfq-iosched.c
> > @@ -7181,7 +7181,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
> >   
> >   	blk_stat_disable_accounting(bfqd->queue);
> >   	blk_queue_flag_clear(QUEUE_FLAG_DISABLE_WBT_DEF, bfqd->queue);
> > -	set_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, &e->flags);
> > +	wbt_enable_default(bfqd->queue->disk);
> >   
> >   	kfree(bfqd);
> >   }
> > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> > index 8684c57498cc..e0a70d26972b 100644
> > --- a/block/blk-sysfs.c
> > +++ b/block/blk-sysfs.c
> > @@ -932,7 +932,7 @@ int blk_register_queue(struct gendisk *disk)
> >   		elevator_set_default(q);
> >   
> >   	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
> > -	wbt_enable_default(disk);
> > +	wbt_init_enable_default(disk);
> >   
> >   	/* Now everything is ready and send out KOBJ_ADD uevent */
> >   	kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
> > diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> > index eb8037bae0bd..0974875f77bd 100644
> > --- a/block/blk-wbt.c
> > +++ b/block/blk-wbt.c
> > @@ -699,7 +699,7 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
> >   /*
> >    * Enable wbt if defaults are configured that way
> >    */
> > -void wbt_enable_default(struct gendisk *disk)
> > +static bool __wbt_enable_default(struct gendisk *disk)
> >   {
> >   	struct request_queue *q = disk->queue;
> >   	struct rq_qos *rqos;
> > @@ -716,19 +716,31 @@ void wbt_enable_default(struct gendisk *disk)
> >   		if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
> >   			RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
> 
> Is this problem due to above state? Changing the state is not under queue frozen.
> The commit message is not quite clear to me. If so, the changes looks good to me,
> by setting the state with queue frozen.

Yes, rwb_enabled() checks the state, which can be update exactly between wbt_wait()
(rq_qos_throttle()) and wbt_track()(rq_qos_track), then the inflight counter will
become negative.


Thanks,
Ming


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] block: fix race between wbt_enable_default and IO submission
  2025-12-12  7:47   ` Ming Lei
@ 2025-12-12 13:40     ` Nilay Shroff
  0 siblings, 0 replies; 4+ messages in thread
From: Nilay Shroff @ 2025-12-12 13:40 UTC (permalink / raw)
  To: Ming Lei, Yu Kuai; +Cc: Jens Axboe, linux-block, Yu Kuai, Guangwu Zhang



On 12/12/25 1:17 PM, Ming Lei wrote:
> On Fri, Dec 12, 2025 at 01:56:20PM +0800, Yu Kuai wrote:
>> Hi,
>>
>> 在 2025/12/10 17:10, Ming Lei 写道:
>>> When wbt_enable_default() is moved out of queue freezing in elevator_change(),
>>> it can cause the wbt inflight counter to become negative (-1), leading to hung
>>> tasks in the writeback path. Tasks get stuck in wbt_wait() because the counter
>>> is in an inconsistent state.
>>>
>>> The issue occurs because wbt_enable_default() could race with IO submission,
>>> allowing the counter to be decremented before proper initialization. This manifests
>>> as:
>>>
>>>    rq_wait[0]:
>>>      inflight:             -1
>>>      has_waiters:        True
>>>
>>> And results in hung task warnings like:
>>>    task:kworker/u24:39 state:D stack:0 pid:14767
>>>    Call Trace:
>>>      rq_qos_wait+0xb4/0x150
>>>      wbt_wait+0xa9/0x100
>>>      __rq_qos_throttle+0x24/0x40
>>>      blk_mq_submit_bio+0x672/0x7b0
>>>      ...
>>>
>>> Fix this by:
>>>
>>> 1. Splitting wbt_enable_default() into:
>>>     - __wbt_enable_default(): Returns true if wbt_init() should be called
>>>     - wbt_enable_default(): Wrapper for existing callers (no init)
>>>     - wbt_init_enable_default(): New function that checks and inits WBT
>>>
>>> 2. Using wbt_init_enable_default() in blk_register_queue() to ensure
>>>     proper initialization during queue registration
>>>
>>> 3. Move wbt_init() out of wbt_enable_default() which is only for enabling
>>>     disabled wbt from bfq and iocost, and wbt_init() isn't needed. Then the
>>>     original lock warning can be avoided.
>>>
>>> 4. Removing the ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT flag and its handling
>>>     code since it's no longer needed
>>>
>>> This ensures WBT is properly initialized before any IO can be submitted,
>>> preventing the counter from going negative.
>>>
>>> Cc: Nilay Shroff <nilay@linux.ibm.com>
>>> Cc: Yu Kuai <yukuai3@huawei.com>
>>> Cc: Guangwu Zhang <guazhang@redhat.com>
>>> Fixes: 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()")
>>> Signed-off-by: Ming Lei <ming.lei@redhat.com>
>>> ---
>>>   block/bfq-iosched.c |  2 +-
>>>   block/blk-sysfs.c   |  2 +-
>>>   block/blk-wbt.c     | 20 ++++++++++++++++----
>>>   block/blk-wbt.h     |  5 +++++
>>>   block/elevator.c    |  4 ----
>>>   block/elevator.h    |  1 -
>>>   6 files changed, 23 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
>>> index 4a8d3d96bfe4..6e54b1d3d8bc 100644
>>> --- a/block/bfq-iosched.c
>>> +++ b/block/bfq-iosched.c
>>> @@ -7181,7 +7181,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
>>>   
>>>   	blk_stat_disable_accounting(bfqd->queue);
>>>   	blk_queue_flag_clear(QUEUE_FLAG_DISABLE_WBT_DEF, bfqd->queue);
>>> -	set_bit(ELEVATOR_FLAG_ENABLE_WBT_ON_EXIT, &e->flags);
>>> +	wbt_enable_default(bfqd->queue->disk);
>>>   
>>>   	kfree(bfqd);
>>>   }
>>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>>> index 8684c57498cc..e0a70d26972b 100644
>>> --- a/block/blk-sysfs.c
>>> +++ b/block/blk-sysfs.c
>>> @@ -932,7 +932,7 @@ int blk_register_queue(struct gendisk *disk)
>>>   		elevator_set_default(q);
>>>   
>>>   	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
>>> -	wbt_enable_default(disk);
>>> +	wbt_init_enable_default(disk);
>>>   
>>>   	/* Now everything is ready and send out KOBJ_ADD uevent */
>>>   	kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
>>> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
>>> index eb8037bae0bd..0974875f77bd 100644
>>> --- a/block/blk-wbt.c
>>> +++ b/block/blk-wbt.c
>>> @@ -699,7 +699,7 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
>>>   /*
>>>    * Enable wbt if defaults are configured that way
>>>    */
>>> -void wbt_enable_default(struct gendisk *disk)
>>> +static bool __wbt_enable_default(struct gendisk *disk)
>>>   {
>>>   	struct request_queue *q = disk->queue;
>>>   	struct rq_qos *rqos;
>>> @@ -716,19 +716,31 @@ void wbt_enable_default(struct gendisk *disk)
>>>   		if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
>>>   			RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
>>
>> Is this problem due to above state? Changing the state is not under queue frozen.
>> The commit message is not quite clear to me. If so, the changes looks good to me,
>> by setting the state with queue frozen.
> 
> Yes, rwb_enabled() checks the state, which can be update exactly between wbt_wait()
> (rq_qos_throttle()) and wbt_track()(rq_qos_track), then the inflight counter will
> become negative.
> 
I think above reasoning should be added in the commit message so
that makes it easy to understand the proposed change, otherwise
changes looks good to me.

Thanks,
--Nilay
 


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-12-12 13:40 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-10  9:10 [PATCH] block: fix race between wbt_enable_default and IO submission Ming Lei
2025-12-12  5:56 ` Yu Kuai
2025-12-12  7:47   ` Ming Lei
2025-12-12 13:40     ` Nilay Shroff

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox