* [PATCH V3] blk-cgroup: defer blkcg css_put until blkg is unlinked from queue
@ 2026-06-16 1:17 Zizhi Wo
2026-06-16 16:50 ` Tang Yizhou
0 siblings, 1 reply; 2+ messages in thread
From: Zizhi Wo @ 2026-06-16 1:17 UTC (permalink / raw)
To: axboe, tj, josef, linux-block
Cc: cgroups, yangerkun, chengzhihao1, houtao1, yukuai, wozizhi
From: Zizhi Wo <wozizhi@huawei.com>
[BUG]
Our fuzz testing triggered a blkcg use-after-free issue:
BUG: KASAN: slab-use-after-free in _raw_spin_lock+0x75/0xe0
Call Trace:
...
blkcg_deactivate_policy+0x244/0x4d0
ioc_rqos_exit+0x44/0xe0
rq_qos_exit+0xba/0x120
__del_gendisk+0x50b/0x800
del_gendisk+0xff/0x190
...
[CAUSE]
process1 process2
cgroup_rmdir
...
css_killed_work_fn
offline_css
...
blkcg_destroy_blkgs
...
__blkg_release
css_put(&blkg->blkcg->css)
blkg_free
INIT_WORK(xxx, blkg_free_workfn)
schedule_work
css_put
...
blkcg_css_free
kfree(blkcg)--------blkcg has been freed!!!
====================================schedule_work
blkg_free_workfn
__del_gendisk
rq_qos_exit
ioc_rqos_exit
blkcg_deactivate_policy
mutex_lock(&q->blkcg_mutex)
spin_lock_irq(&q->queue_lock)
list_for_each_entry(blkg, xxx)
blkcg = blkg->blkcg
spin_lock(&blkcg->lock)-------UAF!!!
mutex_lock(&q->blkcg_mutex)
spin_lock_irq(&q->queue_lock)
/* Only then is the blkg removed from the list */
list_del_init(&blkg->q_node)
As a result, a blkg can still be reachable through q->blkg_list while
its ->blkcg has already been freed.
[Fix]
Fix this by deferring the blkcg css_put() until after the blkg has been
unlinked from q->blkg_list in blkg_free_workfn(). This ensures that the
blkcg outlives every blkg still reachable through q->blkg_list, so any
iterator holding q->queue_lock is guaranteed to observe a valid
blkg->blkcg.
While at it, move css_tryget_online() from blkg_create() into blkg_alloc()
so that the css reference is owned by the alloc/free pair rather than
straddling layers:
blkg_alloc() <-> blkg_free()
blkg_create() <-> blkg_destroy()
Fixes: f1c006f1c685 ("blk-cgroup: synchronize pd_free_fn() from blkg_free_workfn() and blkcg_deactivate_policy()")
Suggested-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Yu Kuai <yukuai@fygo.io>
---
v3:
- move css_put() after mutex_unlock() in blkg_free_workfn().
v2:
- Move css_tryget_online() from blkg_create() into blkg_alloc() so the
css reference follows the blkg's own lifetime, making the put in
blkg_free_workfn() symmetric with the get in blkg_alloc().
v1: https://lore.kernel.org/all/20260518010932.633707-1-wozizhi@huaweicloud.com/
block/blk-cgroup.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bc63bd220865..3ac41f766caf 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -136,6 +136,11 @@ static void blkg_free_workfn(struct work_struct *work)
spin_unlock_irq(&q->queue_lock);
mutex_unlock(&q->blkcg_mutex);
+ /*
+ * Release blkcg css ref only after blkg is removed from q->blkg_list,
+ * so concurrent iterators won't see a blkg with a freed blkcg.
+ */
+ css_put(&blkg->blkcg->css);
blk_put_queue(q);
free_percpu(blkg->iostat_cpu);
percpu_ref_exit(&blkg->refcnt);
@@ -179,8 +184,6 @@ static void __blkg_release(struct rcu_head *rcu)
for_each_possible_cpu(cpu)
__blkcg_rstat_flush(blkcg, cpu);
- /* release the blkcg and parent blkg refs this blkg has been holding */
- css_put(&blkg->blkcg->css);
blkg_free(blkg);
}
@@ -313,6 +316,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
goto out_exit_refcnt;
if (!blk_get_queue(disk->queue))
goto out_free_iostat;
+ /* blkg holds a reference to blkcg */
+ if (!css_tryget_online(&blkcg->css))
+ goto out_put_queue;
blkg->q = disk->queue;
INIT_LIST_HEAD(&blkg->q_node);
@@ -353,6 +359,8 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
while (--i >= 0)
if (blkg->pd[i])
blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
+ css_put(&blkcg->css);
+out_put_queue:
blk_put_queue(disk->queue);
out_free_iostat:
free_percpu(blkg->iostat_cpu);
@@ -381,18 +389,12 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
goto err_free_blkg;
}
- /* blkg holds a reference to blkcg */
- if (!css_tryget_online(&blkcg->css)) {
- ret = -ENODEV;
- goto err_free_blkg;
- }
-
/* allocate */
if (!new_blkg) {
new_blkg = blkg_alloc(blkcg, disk, GFP_NOWAIT);
if (unlikely(!new_blkg)) {
ret = -ENOMEM;
- goto err_put_css;
+ goto err_free_blkg;
}
}
blkg = new_blkg;
@@ -402,7 +404,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
blkg->parent = blkg_lookup(blkcg_parent(blkcg), disk->queue);
if (WARN_ON_ONCE(!blkg->parent)) {
ret = -ENODEV;
- goto err_put_css;
+ goto err_free_blkg;
}
blkg_get(blkg->parent);
}
@@ -442,8 +444,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
blkg_put(blkg);
return ERR_PTR(ret);
-err_put_css:
- css_put(&blkcg->css);
err_free_blkg:
if (new_blkg)
blkg_free(new_blkg);
--
2.52.0
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH V3] blk-cgroup: defer blkcg css_put until blkg is unlinked from queue
2026-06-16 1:17 [PATCH V3] blk-cgroup: defer blkcg css_put until blkg is unlinked from queue Zizhi Wo
@ 2026-06-16 16:50 ` Tang Yizhou
0 siblings, 0 replies; 2+ messages in thread
From: Tang Yizhou @ 2026-06-16 16:50 UTC (permalink / raw)
To: Zizhi Wo, axboe, tj, josef, linux-block
Cc: cgroups, yangerkun, chengzhihao1, houtao1, yukuai
On 16/6/26 9:17 am, Zizhi Wo wrote:
> From: Zizhi Wo <wozizhi@huawei.com>
>
> [BUG]
> Our fuzz testing triggered a blkcg use-after-free issue:
>
> BUG: KASAN: slab-use-after-free in _raw_spin_lock+0x75/0xe0
> Call Trace:
> ...
> blkcg_deactivate_policy+0x244/0x4d0
> ioc_rqos_exit+0x44/0xe0
> rq_qos_exit+0xba/0x120
> __del_gendisk+0x50b/0x800
> del_gendisk+0xff/0x190
> ...
>
> [CAUSE]
> process1 process2
> cgroup_rmdir
> ...
> css_killed_work_fn
> offline_css
> ...
> blkcg_destroy_blkgs
> ...
> __blkg_release
> css_put(&blkg->blkcg->css)
> blkg_free
> INIT_WORK(xxx, blkg_free_workfn)
> schedule_work
> css_put
> ...
> blkcg_css_free
> kfree(blkcg)--------blkcg has been freed!!!
> ====================================schedule_work
> blkg_free_workfn
> __del_gendisk
> rq_qos_exit
> ioc_rqos_exit
> blkcg_deactivate_policy
> mutex_lock(&q->blkcg_mutex)
> spin_lock_irq(&q->queue_lock)
> list_for_each_entry(blkg, xxx)
> blkcg = blkg->blkcg
> spin_lock(&blkcg->lock)-------UAF!!!
> mutex_lock(&q->blkcg_mutex)
> spin_lock_irq(&q->queue_lock)
> /* Only then is the blkg removed from the list */
> list_del_init(&blkg->q_node)
>
> As a result, a blkg can still be reachable through q->blkg_list while
> its ->blkcg has already been freed.
>
> [Fix]
> Fix this by deferring the blkcg css_put() until after the blkg has been
> unlinked from q->blkg_list in blkg_free_workfn(). This ensures that the
> blkcg outlives every blkg still reachable through q->blkg_list, so any
> iterator holding q->queue_lock is guaranteed to observe a valid
> blkg->blkcg.
>
> While at it, move css_tryget_online() from blkg_create() into blkg_alloc()
> so that the css reference is owned by the alloc/free pair rather than
> straddling layers:
> blkg_alloc() <-> blkg_free()
> blkg_create() <-> blkg_destroy()
>
> Fixes: f1c006f1c685 ("blk-cgroup: synchronize pd_free_fn() from blkg_free_workfn() and blkcg_deactivate_policy()")
> Suggested-by: Hou Tao <houtao1@huawei.com>
> Signed-off-by: Zizhi Wo <wozizhi@huawei.com>
> Reviewed-by: Yu Kuai <yukuai@fygo.io>
> ---
> v3:
> - move css_put() after mutex_unlock() in blkg_free_workfn().
>
> v2:
> - Move css_tryget_online() from blkg_create() into blkg_alloc() so the
> css reference follows the blkg's own lifetime, making the put in
> blkg_free_workfn() symmetric with the get in blkg_alloc().
>
> v1: https://lore.kernel.org/all/20260518010932.633707-1-wozizhi@huaweicloud.com/
> block/blk-cgroup.c | 24 ++++++++++++------------
> 1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index bc63bd220865..3ac41f766caf 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -136,6 +136,11 @@ static void blkg_free_workfn(struct work_struct *work)
> spin_unlock_irq(&q->queue_lock);
> mutex_unlock(&q->blkcg_mutex);
>
> + /*
> + * Release blkcg css ref only after blkg is removed from q->blkg_list,
> + * so concurrent iterators won't see a blkg with a freed blkcg.
> + */
> + css_put(&blkg->blkcg->css);
> blk_put_queue(q);
> free_percpu(blkg->iostat_cpu);
> percpu_ref_exit(&blkg->refcnt);
> @@ -179,8 +184,6 @@ static void __blkg_release(struct rcu_head *rcu)
> for_each_possible_cpu(cpu)
> __blkcg_rstat_flush(blkcg, cpu);
>
> - /* release the blkcg and parent blkg refs this blkg has been holding */
> - css_put(&blkg->blkcg->css);
> blkg_free(blkg);
> }
>
> @@ -313,6 +316,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
> goto out_exit_refcnt;
> if (!blk_get_queue(disk->queue))
> goto out_free_iostat;
> + /* blkg holds a reference to blkcg */
> + if (!css_tryget_online(&blkcg->css))
> + goto out_put_queue;
>
> blkg->q = disk->queue;
> INIT_LIST_HEAD(&blkg->q_node);
> @@ -353,6 +359,8 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
> while (--i >= 0)
> if (blkg->pd[i])
> blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
> + css_put(&blkcg->css);
> +out_put_queue:
> blk_put_queue(disk->queue);
> out_free_iostat:
> free_percpu(blkg->iostat_cpu);
> @@ -381,18 +389,12 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
> goto err_free_blkg;
> }
>
> - /* blkg holds a reference to blkcg */
> - if (!css_tryget_online(&blkcg->css)) {
> - ret = -ENODEV;
> - goto err_free_blkg;
> - }
> -
> /* allocate */
> if (!new_blkg) {
> new_blkg = blkg_alloc(blkcg, disk, GFP_NOWAIT);
> if (unlikely(!new_blkg)) {
> ret = -ENOMEM;
> - goto err_put_css;
> + goto err_free_blkg;
> }
> }
> blkg = new_blkg;
> @@ -402,7 +404,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
> blkg->parent = blkg_lookup(blkcg_parent(blkcg), disk->queue);
> if (WARN_ON_ONCE(!blkg->parent)) {
> ret = -ENODEV;
> - goto err_put_css;
> + goto err_free_blkg;
> }
> blkg_get(blkg->parent);
> }
> @@ -442,8 +444,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
> blkg_put(blkg);
> return ERR_PTR(ret);
>
> -err_put_css:
> - css_put(&blkcg->css);
> err_free_blkg:
> if (new_blkg)
> blkg_free(new_blkg);
LGTM.
Reviewed-by: Tang Yizhou <yizhou.tang@shopee.com>
--
Best Regards,
Yi
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-06-16 16:50 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-16 1:17 [PATCH V3] blk-cgroup: defer blkcg css_put until blkg is unlinked from queue Zizhi Wo
2026-06-16 16:50 ` Tang Yizhou
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox