From: Yu Kuai <yukuai@kernel.org>
To: Jens Axboe <axboe@kernel.dk>, Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>, Keith Busch <kbusch@kernel.org>,
Sagi Grimberg <sagi@grimberg.me>,
Alasdair Kergon <agk@redhat.com>,
Benjamin Marzinski <bmarzins@redhat.com>,
Mike Snitzer <snitzer@kernel.org>,
Mikulas Patocka <mpatocka@redhat.com>,
Dongsheng Yang <dongsheng.yang@linux.dev>,
Zheng Gu <cengku@gmail.com>, Coly Li <colyli@fygo.io>,
Kent Overstreet <kent.overstreet@linux.dev>,
Josef Bacik <josef@toxicpanda.com>, Yu Kuai <yukuai@fygo.io>,
Nilay Shroff <nilay@linux.ibm.com>,
linux-block@vger.kernel.org, cgroups@vger.kernel.org,
linux-nvme@lists.infradead.org, dm-devel@lists.linux.dev,
linux-bcache@vger.kernel.org
Subject: [RFC PATCH v1 16/17] blk-cgroup: allocate blkgs in blkg_create
Date: Sun, 5 Jul 2026 03:51:23 +0800 [thread overview]
Message-ID: <20260704195124.1375075-17-yukuai@kernel.org> (raw)
In-Reply-To: <20260704195124.1375075-1-yukuai@kernel.org>
From: Yu Kuai <yukuai@fygo.io>
After radix tree preloading is gone, callers no longer need to allocate a
blkg before entering blkg_create(). Move allocation into blkg_create() and
pass the desired GFP mask instead.
Use GFP_NOIO for runtime and config blkg creation so slow paths can sleep
without recursing into IO reclaim, keep GFP_KERNEL for root blkg setup, and
use GFP_ATOMIC when nowait bio association creates a missing blkg after a
successful q->blkcg_mutex trylock.
Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
block/blk-cgroup.c | 89 ++++++++++------------------------------------
1 file changed, 18 insertions(+), 71 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b99ab8d67798..ddc9073d7ab9 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -371,14 +371,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
return NULL;
}
-/*
- * If @new_blkg is %NULL, this function tries to allocate a new one as
- * necessary using %GFP_NOWAIT. @new_blkg is always consumed on return.
- */
static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
- struct blkcg_gq *new_blkg)
+ gfp_t gfp_mask)
{
- struct blkcg_gq *blkg;
+ struct blkcg_gq *blkg = NULL;
int i, ret;
lockdep_assert_held(&disk->queue->blkcg_mutex);
@@ -389,15 +385,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
goto err_free_blkg;
}
- /* allocate */
- if (!new_blkg) {
- new_blkg = blkg_alloc(blkcg, disk, GFP_NOWAIT);
- if (unlikely(!new_blkg)) {
- ret = -ENOMEM;
- goto err_free_blkg;
- }
+ blkg = blkg_alloc(blkcg, disk, gfp_mask);
+ if (unlikely(!blkg)) {
+ ret = -ENOMEM;
+ goto err_free_blkg;
}
- blkg = new_blkg;
/* link parent */
if (blkcg_parent(blkcg)) {
@@ -447,8 +439,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
return ERR_PTR(ret);
err_free_blkg:
- if (new_blkg)
- blkg_free(new_blkg);
+ if (blkg)
+ blkg_free(blkg);
return ERR_PTR(ret);
}
@@ -505,7 +497,7 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
}
rcu_read_unlock();
- blkg = blkg_create(pos, disk, NULL);
+ blkg = blkg_create(pos, disk, GFP_NOIO);
if (IS_ERR(blkg)) {
blkg = ret_blkg;
break;
@@ -858,7 +850,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
while (true) {
struct blkcg *pos = blkcg;
struct blkcg *parent;
- struct blkcg_gq *new_blkg;
parent = blkcg_parent(blkcg);
rcu_read_lock();
@@ -868,14 +859,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
}
rcu_read_unlock();
- new_blkg = blkg_alloc(pos, disk, GFP_NOIO);
- if (unlikely(!new_blkg)) {
- ret = -ENOMEM;
- goto fail_unlock;
- }
-
if (!blkcg_policy_enabled(q, pol)) {
- blkg_free(new_blkg);
ret = -EOPNOTSUPP;
goto fail_unlock;
}
@@ -883,10 +867,8 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
rcu_read_lock();
blkg = blkg_lookup(pos, q);
rcu_read_unlock();
- if (blkg) {
- blkg_free(new_blkg);
- } else {
- blkg = blkg_create(pos, disk, new_blkg);
+ if (!blkg) {
+ blkg = blkg_create(pos, disk, GFP_NOIO);
if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
goto fail_unlock;
@@ -1436,7 +1418,7 @@ void blkg_init_queue(struct request_queue *q)
int blkcg_init_disk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
- struct blkcg_gq *new_blkg, *blkg;
+ struct blkcg_gq *blkg;
/*
* If the queue is shared across disk rebind (e.g., SCSI), the
@@ -1450,13 +1432,9 @@ int blkcg_init_disk(struct gendisk *disk)
*/
wait_var_event(&q->root_blkg, !READ_ONCE(q->root_blkg));
- new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
- if (!new_blkg)
- return -ENOMEM;
-
/* Make sure the root blkg exists. */
mutex_lock(&q->blkcg_mutex);
- blkg = blkg_create(&blkcg_root, disk, new_blkg);
+ blkg = blkg_create(&blkcg_root, disk, GFP_KERNEL);
if (IS_ERR(blkg))
goto err_unlock;
q->root_blkg = blkg;
@@ -1559,8 +1537,7 @@ static void blkg_free_policy_data(struct blkcg_gq *blkg,
int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
{
struct request_queue *q = disk->queue;
- struct blkg_policy_data *pd_prealloc = NULL;
- struct blkcg_gq *blkg, *pinned_blkg = NULL;
+ struct blkcg_gq *blkg;
unsigned int memflags;
int ret;
@@ -1578,7 +1555,6 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
if (queue_is_mq(q))
memflags = blk_mq_freeze_queue(q);
-retry:
mutex_lock(&q->blkcg_mutex);
/* blkg_list is pushed at the head, reverse walk to initialize parents first */
@@ -1590,34 +1566,9 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
if (hlist_unhashed(&blkg->blkcg_node))
continue;
- /* If prealloc matches, use it; otherwise try GFP_NOWAIT. */
- if (blkg == pinned_blkg) {
- pd = pd_prealloc;
- pd_prealloc = NULL;
- } else {
- pd = pol->pd_alloc_fn(disk, blkg->blkcg, GFP_NOWAIT);
- }
-
- if (!pd) {
- /*
- * GFP_NOWAIT failed. Free the existing one and
- * prealloc for @blkg w/ GFP_KERNEL.
- */
- if (pinned_blkg)
- blkg_put(pinned_blkg);
- blkg_get(blkg);
- pinned_blkg = blkg;
-
- mutex_unlock(&q->blkcg_mutex);
-
- if (pd_prealloc)
- pol->pd_free_fn(pd_prealloc);
- pd_prealloc = pol->pd_alloc_fn(disk, blkg->blkcg,
- GFP_KERNEL);
- if (pd_prealloc)
- goto retry;
+ pd = pol->pd_alloc_fn(disk, blkg->blkcg, GFP_NOIO);
+ if (!pd)
goto enomem;
- }
spin_lock_irq(&blkg->blkcg->lock);
@@ -1642,15 +1593,10 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
mutex_unlock(&q->blkcg_mutex);
if (queue_is_mq(q))
blk_mq_unfreeze_queue(q, memflags);
- if (pinned_blkg)
- blkg_put(pinned_blkg);
- if (pd_prealloc)
- pol->pd_free_fn(pd_prealloc);
return ret;
enomem:
/* alloc failed, take down everything */
- mutex_lock(&q->blkcg_mutex);
list_for_each_entry(blkg, &q->blkg_list, q_node)
blkg_free_policy_data(blkg, pol);
ret = -ENOMEM;
@@ -2080,7 +2026,8 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
if (!preemptible() || !mutex_trylock(&q->blkcg_mutex))
return NULL;
- blkg = blkg_lookup_create(blkcg, bio->bi_bdev->bd_disk);
+ blkg = blkg_lookup_create(blkcg, bio->bi_bdev->bd_disk,
+ GFP_ATOMIC);
if (blkg)
blkg = blkg_lookup_tryget(blkg);
mutex_unlock(&q->blkcg_mutex);
--
2.51.0
next prev parent reply other threads:[~2026-07-04 19:54 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-07-04 19:51 [RFC PATCH v1 00/17] blk-cgroup: protect blkgs with blkcg_mutex Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 01/17] nvme-multipath: retarget failedover bios from requeue work Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 02/17] dm thin: avoid bio_set_dev under pool lock Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 03/17] dm snapshot: avoid bio_set_dev in locked map paths Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 04/17] blk-throttle: protect throttle state with td lock Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 05/17] block: add bio_alloc_atomic() for atomic bio users Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 06/17] blk-cgroup: support non-blocking bio association Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 07/17] block: support non-blocking bio allocation with a bdev Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 08/17] bcache: avoid sleeping blkg association from locked paths Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 09/17] dm bufio: avoid blkg association from GFP_NOWAIT bio init Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 10/17] dm pcache: handle non-blocking bio clone init failure Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 11/17] block: avoid scheduling from non-blocking helper allocations Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 12/17] dm: avoid sleeping blkg association from NOWAIT remaps Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 13/17] bfq: avoid blkg lookup from locked cgroup update Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 14/17] blk-cgroup: protect blkgs with blkcg_mutex Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 15/17] blk-cgroup: remove blkg radix tree preloading Yu Kuai
2026-07-04 19:51 ` Yu Kuai [this message]
2026-07-04 19:51 ` [RFC PATCH v1 17/17] blk-cgroup: share blkg creation between lookup and config prep Yu Kuai
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260704195124.1375075-17-yukuai@kernel.org \
--to=yukuai@kernel.org \
--cc=agk@redhat.com \
--cc=axboe@kernel.dk \
--cc=bmarzins@redhat.com \
--cc=cengku@gmail.com \
--cc=cgroups@vger.kernel.org \
--cc=colyli@fygo.io \
--cc=dm-devel@lists.linux.dev \
--cc=dongsheng.yang@linux.dev \
--cc=hch@lst.de \
--cc=josef@toxicpanda.com \
--cc=kbusch@kernel.org \
--cc=kent.overstreet@linux.dev \
--cc=linux-bcache@vger.kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=mpatocka@redhat.com \
--cc=nilay@linux.ibm.com \
--cc=sagi@grimberg.me \
--cc=snitzer@kernel.org \
--cc=tj@kernel.org \
--cc=yukuai@fygo.io \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox