Linux block layer
 help / color / mirror / Atom feed
From: Yu Kuai <yukuai@kernel.org>
To: Jens Axboe <axboe@kernel.dk>, Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>, Keith Busch <kbusch@kernel.org>,
	Sagi Grimberg <sagi@grimberg.me>,
	Alasdair Kergon <agk@redhat.com>,
	Benjamin Marzinski <bmarzins@redhat.com>,
	Mike Snitzer <snitzer@kernel.org>,
	Mikulas Patocka <mpatocka@redhat.com>,
	Dongsheng Yang <dongsheng.yang@linux.dev>,
	Zheng Gu <cengku@gmail.com>, Coly Li <colyli@fygo.io>,
	Kent Overstreet <kent.overstreet@linux.dev>,
	Josef Bacik <josef@toxicpanda.com>, Yu Kuai <yukuai@fygo.io>,
	Nilay Shroff <nilay@linux.ibm.com>,
	linux-block@vger.kernel.org, cgroups@vger.kernel.org,
	linux-nvme@lists.infradead.org, dm-devel@lists.linux.dev,
	linux-bcache@vger.kernel.org
Subject: [RFC PATCH v1 16/17] blk-cgroup: allocate blkgs in blkg_create
Date: Sun,  5 Jul 2026 03:51:23 +0800	[thread overview]
Message-ID: <20260704195124.1375075-17-yukuai@kernel.org> (raw)
In-Reply-To: <20260704195124.1375075-1-yukuai@kernel.org>

From: Yu Kuai <yukuai@fygo.io>

After radix tree preloading is gone, callers no longer need to allocate a
blkg before entering blkg_create(). Move allocation into blkg_create() and
pass the desired GFP mask instead.

Use GFP_NOIO for runtime and config blkg creation so slow paths can sleep
without recursing into IO reclaim, keep GFP_KERNEL for root blkg setup, and
use GFP_ATOMIC when nowait bio association creates a missing blkg after a
successful q->blkcg_mutex trylock.

Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 block/blk-cgroup.c | 89 ++++++++++------------------------------------
 1 file changed, 18 insertions(+), 71 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b99ab8d67798..ddc9073d7ab9 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -371,14 +371,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
 	return NULL;
 }
 
-/*
- * If @new_blkg is %NULL, this function tries to allocate a new one as
- * necessary using %GFP_NOWAIT.  @new_blkg is always consumed on return.
- */
 static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
-				    struct blkcg_gq *new_blkg)
+				    gfp_t gfp_mask)
 {
-	struct blkcg_gq *blkg;
+	struct blkcg_gq *blkg = NULL;
 	int i, ret;
 
 	lockdep_assert_held(&disk->queue->blkcg_mutex);
@@ -389,15 +385,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
 		goto err_free_blkg;
 	}
 
-	/* allocate */
-	if (!new_blkg) {
-		new_blkg = blkg_alloc(blkcg, disk, GFP_NOWAIT);
-		if (unlikely(!new_blkg)) {
-			ret = -ENOMEM;
-			goto err_free_blkg;
-		}
+	blkg = blkg_alloc(blkcg, disk, gfp_mask);
+	if (unlikely(!blkg)) {
+		ret = -ENOMEM;
+		goto err_free_blkg;
 	}
-	blkg = new_blkg;
 
 	/* link parent */
 	if (blkcg_parent(blkcg)) {
@@ -447,8 +439,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
 	return ERR_PTR(ret);
 
 err_free_blkg:
-	if (new_blkg)
-		blkg_free(new_blkg);
+	if (blkg)
+		blkg_free(blkg);
 	return ERR_PTR(ret);
 }
 
@@ -505,7 +497,7 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 		}
 		rcu_read_unlock();
 
-		blkg = blkg_create(pos, disk, NULL);
+		blkg = blkg_create(pos, disk, GFP_NOIO);
 		if (IS_ERR(blkg)) {
 			blkg = ret_blkg;
 			break;
@@ -858,7 +850,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 	while (true) {
 		struct blkcg *pos = blkcg;
 		struct blkcg *parent;
-		struct blkcg_gq *new_blkg;
 
 		parent = blkcg_parent(blkcg);
 		rcu_read_lock();
@@ -868,14 +859,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		}
 		rcu_read_unlock();
 
-		new_blkg = blkg_alloc(pos, disk, GFP_NOIO);
-		if (unlikely(!new_blkg)) {
-			ret = -ENOMEM;
-			goto fail_unlock;
-		}
-
 		if (!blkcg_policy_enabled(q, pol)) {
-			blkg_free(new_blkg);
 			ret = -EOPNOTSUPP;
 			goto fail_unlock;
 		}
@@ -883,10 +867,8 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		rcu_read_lock();
 		blkg = blkg_lookup(pos, q);
 		rcu_read_unlock();
-		if (blkg) {
-			blkg_free(new_blkg);
-		} else {
-			blkg = blkg_create(pos, disk, new_blkg);
+		if (!blkg) {
+			blkg = blkg_create(pos, disk, GFP_NOIO);
 			if (IS_ERR(blkg)) {
 				ret = PTR_ERR(blkg);
 				goto fail_unlock;
@@ -1436,7 +1418,7 @@ void blkg_init_queue(struct request_queue *q)
 int blkcg_init_disk(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
-	struct blkcg_gq *new_blkg, *blkg;
+	struct blkcg_gq *blkg;
 
 	/*
 	 * If the queue is shared across disk rebind (e.g., SCSI), the
@@ -1450,13 +1432,9 @@ int blkcg_init_disk(struct gendisk *disk)
 	 */
 	wait_var_event(&q->root_blkg, !READ_ONCE(q->root_blkg));
 
-	new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
-	if (!new_blkg)
-		return -ENOMEM;
-
 	/* Make sure the root blkg exists. */
 	mutex_lock(&q->blkcg_mutex);
-	blkg = blkg_create(&blkcg_root, disk, new_blkg);
+	blkg = blkg_create(&blkcg_root, disk, GFP_KERNEL);
 	if (IS_ERR(blkg))
 		goto err_unlock;
 	q->root_blkg = blkg;
@@ -1559,8 +1537,7 @@ static void blkg_free_policy_data(struct blkcg_gq *blkg,
 int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 {
 	struct request_queue *q = disk->queue;
-	struct blkg_policy_data *pd_prealloc = NULL;
-	struct blkcg_gq *blkg, *pinned_blkg = NULL;
+	struct blkcg_gq *blkg;
 	unsigned int memflags;
 	int ret;
 
@@ -1578,7 +1555,6 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 	if (queue_is_mq(q))
 		memflags = blk_mq_freeze_queue(q);
 
-retry:
 	mutex_lock(&q->blkcg_mutex);
 
 	/* blkg_list is pushed at the head, reverse walk to initialize parents first */
@@ -1590,34 +1566,9 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 		if (hlist_unhashed(&blkg->blkcg_node))
 			continue;
 
-		/* If prealloc matches, use it; otherwise try GFP_NOWAIT. */
-		if (blkg == pinned_blkg) {
-			pd = pd_prealloc;
-			pd_prealloc = NULL;
-		} else {
-			pd = pol->pd_alloc_fn(disk, blkg->blkcg, GFP_NOWAIT);
-		}
-
-		if (!pd) {
-			/*
-			 * GFP_NOWAIT failed.  Free the existing one and
-			 * prealloc for @blkg w/ GFP_KERNEL.
-			 */
-			if (pinned_blkg)
-				blkg_put(pinned_blkg);
-			blkg_get(blkg);
-			pinned_blkg = blkg;
-
-			mutex_unlock(&q->blkcg_mutex);
-
-			if (pd_prealloc)
-				pol->pd_free_fn(pd_prealloc);
-			pd_prealloc = pol->pd_alloc_fn(disk, blkg->blkcg,
-						       GFP_KERNEL);
-			if (pd_prealloc)
-				goto retry;
+		pd = pol->pd_alloc_fn(disk, blkg->blkcg, GFP_NOIO);
+		if (!pd)
 			goto enomem;
-		}
 
 		spin_lock_irq(&blkg->blkcg->lock);
 
@@ -1642,15 +1593,10 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
 	mutex_unlock(&q->blkcg_mutex);
 	if (queue_is_mq(q))
 		blk_mq_unfreeze_queue(q, memflags);
-	if (pinned_blkg)
-		blkg_put(pinned_blkg);
-	if (pd_prealloc)
-		pol->pd_free_fn(pd_prealloc);
 	return ret;
 
 enomem:
 	/* alloc failed, take down everything */
-	mutex_lock(&q->blkcg_mutex);
 	list_for_each_entry(blkg, &q->blkg_list, q_node)
 		blkg_free_policy_data(blkg, pol);
 	ret = -ENOMEM;
@@ -2080,7 +2026,8 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
 		if (!preemptible() || !mutex_trylock(&q->blkcg_mutex))
 			return NULL;
 
-		blkg = blkg_lookup_create(blkcg, bio->bi_bdev->bd_disk);
+		blkg = blkg_lookup_create(blkcg, bio->bi_bdev->bd_disk,
+					  GFP_ATOMIC);
 		if (blkg)
 			blkg = blkg_lookup_tryget(blkg);
 		mutex_unlock(&q->blkcg_mutex);
-- 
2.51.0


  parent reply	other threads:[~2026-07-04 19:54 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-07-04 19:51 [RFC PATCH v1 00/17] blk-cgroup: protect blkgs with blkcg_mutex Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 01/17] nvme-multipath: retarget failedover bios from requeue work Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 02/17] dm thin: avoid bio_set_dev under pool lock Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 03/17] dm snapshot: avoid bio_set_dev in locked map paths Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 04/17] blk-throttle: protect throttle state with td lock Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 05/17] block: add bio_alloc_atomic() for atomic bio users Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 06/17] blk-cgroup: support non-blocking bio association Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 07/17] block: support non-blocking bio allocation with a bdev Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 08/17] bcache: avoid sleeping blkg association from locked paths Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 09/17] dm bufio: avoid blkg association from GFP_NOWAIT bio init Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 10/17] dm pcache: handle non-blocking bio clone init failure Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 11/17] block: avoid scheduling from non-blocking helper allocations Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 12/17] dm: avoid sleeping blkg association from NOWAIT remaps Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 13/17] bfq: avoid blkg lookup from locked cgroup update Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 14/17] blk-cgroup: protect blkgs with blkcg_mutex Yu Kuai
2026-07-04 19:51 ` [RFC PATCH v1 15/17] blk-cgroup: remove blkg radix tree preloading Yu Kuai
2026-07-04 19:51 ` Yu Kuai [this message]
2026-07-04 19:51 ` [RFC PATCH v1 17/17] blk-cgroup: share blkg creation between lookup and config prep Yu Kuai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260704195124.1375075-17-yukuai@kernel.org \
    --to=yukuai@kernel.org \
    --cc=agk@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=bmarzins@redhat.com \
    --cc=cengku@gmail.com \
    --cc=cgroups@vger.kernel.org \
    --cc=colyli@fygo.io \
    --cc=dm-devel@lists.linux.dev \
    --cc=dongsheng.yang@linux.dev \
    --cc=hch@lst.de \
    --cc=josef@toxicpanda.com \
    --cc=kbusch@kernel.org \
    --cc=kent.overstreet@linux.dev \
    --cc=linux-bcache@vger.kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=mpatocka@redhat.com \
    --cc=nilay@linux.ibm.com \
    --cc=sagi@grimberg.me \
    --cc=snitzer@kernel.org \
    --cc=tj@kernel.org \
    --cc=yukuai@fygo.io \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox