All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Carlos Maiolino <cem@kernel.org>,
	Dave Chinner <dchinner@redhat.com>,
	linux-xfs@vger.kernel.org,
	syzbot+0391d34e801643e2809b@syzkaller.appspotmail.com
Subject: Re: [PATCH 3/3] xfs: switch (back) to a per-buftarg buffer hash
Date: Mon, 19 Jan 2026 18:39:18 -0800	[thread overview]
Message-ID: <20260120023918.GG15551@frogsfrogsfrogs> (raw)
In-Reply-To: <20260119153156.4088290-4-hch@lst.de>

On Mon, Jan 19, 2026 at 04:31:37PM +0100, Christoph Hellwig wrote:
> The per-AG buffer hashes were added when all buffer lookups took a
> per-hash look.  Since then we've made lookups entirely lockless and
> removed the need for a hash-wide lock for inserts and removals as
> well.  With this there is no need to sharding the hash, so reduce the
> used resources by using a per-buftarg hash for all buftargs.

Hey, not having all the per-ag buffer cache sounds neat!

> Long after writing this initially, syzbot found a problem in the
> buffer cache teardown order, which this happens to fix as well.

What did we get wrong, specifically?

Also: Is there a simpler fix for this bug that we can stuff into old lts
kernels?  Or is this fix independent of the b_hold and lockref changes
in the previous patches?

--D

> Reported-by: syzbot+0391d34e801643e2809b@syzkaller.appspotmail.com
> Tested-by: syzbot+0391d34e801643e2809b@syzkaller.appspotmail.com
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/libxfs/xfs_ag.c | 13 ++---------
>  fs/xfs/libxfs/xfs_ag.h |  2 --
>  fs/xfs/xfs_buf.c       | 51 +++++++++++-------------------------------
>  fs/xfs/xfs_buf.h       | 10 +--------
>  fs/xfs/xfs_buf_mem.c   | 11 ++-------
>  5 files changed, 18 insertions(+), 69 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
> index 586918ed1cbf..a41d782e8e8c 100644
> --- a/fs/xfs/libxfs/xfs_ag.c
> +++ b/fs/xfs/libxfs/xfs_ag.c
> @@ -110,10 +110,7 @@ xfs_perag_uninit(
>  	struct xfs_group	*xg)
>  {
>  #ifdef __KERNEL__
> -	struct xfs_perag	*pag = to_perag(xg);
> -
> -	cancel_delayed_work_sync(&pag->pag_blockgc_work);
> -	xfs_buf_cache_destroy(&pag->pag_bcache);
> +	cancel_delayed_work_sync(&to_perag(xg)->pag_blockgc_work);
>  #endif
>  }
>  
> @@ -235,10 +232,6 @@ xfs_perag_alloc(
>  	INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
>  #endif /* __KERNEL__ */
>  
> -	error = xfs_buf_cache_init(&pag->pag_bcache);
> -	if (error)
> -		goto out_free_perag;
> -
>  	/*
>  	 * Pre-calculated geometry
>  	 */
> @@ -250,12 +243,10 @@ xfs_perag_alloc(
>  
>  	error = xfs_group_insert(mp, pag_group(pag), index, XG_TYPE_AG);
>  	if (error)
> -		goto out_buf_cache_destroy;
> +		goto out_free_perag;
>  
>  	return 0;
>  
> -out_buf_cache_destroy:
> -	xfs_buf_cache_destroy(&pag->pag_bcache);
>  out_free_perag:
>  	kfree(pag);
>  	return error;
> diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
> index 1f24cfa27321..f02323416973 100644
> --- a/fs/xfs/libxfs/xfs_ag.h
> +++ b/fs/xfs/libxfs/xfs_ag.h
> @@ -85,8 +85,6 @@ struct xfs_perag {
>  	int		pag_ici_reclaimable;	/* reclaimable inodes */
>  	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */
>  
> -	struct xfs_buf_cache	pag_bcache;
> -
>  	/* background prealloc block trimming */
>  	struct delayed_work	pag_blockgc_work;
>  #endif /* __KERNEL__ */
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 348c91335163..76eb7c5a73f1 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -363,20 +363,6 @@ static const struct rhashtable_params xfs_buf_hash_params = {
>  	.obj_cmpfn		= _xfs_buf_obj_cmp,
>  };
>  
> -int
> -xfs_buf_cache_init(
> -	struct xfs_buf_cache	*bch)
> -{
> -	return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
> -}
> -
> -void
> -xfs_buf_cache_destroy(
> -	struct xfs_buf_cache	*bch)
> -{
> -	rhashtable_destroy(&bch->bc_hash);
> -}
> -
>  static int
>  xfs_buf_map_verify(
>  	struct xfs_buftarg	*btp,
> @@ -434,7 +420,7 @@ xfs_buf_find_lock(
>  
>  static inline int
>  xfs_buf_lookup(
> -	struct xfs_buf_cache	*bch,
> +	struct xfs_buftarg	*btp,
>  	struct xfs_buf_map	*map,
>  	xfs_buf_flags_t		flags,
>  	struct xfs_buf		**bpp)
> @@ -443,7 +429,7 @@ xfs_buf_lookup(
>  	int			error;
>  
>  	rcu_read_lock();
> -	bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
> +	bp = rhashtable_lookup(&btp->bt_hash, map, xfs_buf_hash_params);
>  	if (!bp || !lockref_get_not_dead(&bp->b_lockref)) {
>  		rcu_read_unlock();
>  		return -ENOENT;
> @@ -468,7 +454,6 @@ xfs_buf_lookup(
>  static int
>  xfs_buf_find_insert(
>  	struct xfs_buftarg	*btp,
> -	struct xfs_buf_cache	*bch,
>  	struct xfs_perag	*pag,
>  	struct xfs_buf_map	*cmap,
>  	struct xfs_buf_map	*map,
> @@ -488,7 +473,7 @@ xfs_buf_find_insert(
>  	new_bp->b_pag = pag;
>  
>  	rcu_read_lock();
> -	bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
> +	bp = rhashtable_lookup_get_insert_fast(&btp->bt_hash,
>  			&new_bp->b_rhash_head, xfs_buf_hash_params);
>  	if (IS_ERR(bp)) {
>  		rcu_read_unlock();
> @@ -530,16 +515,6 @@ xfs_buftarg_get_pag(
>  	return xfs_perag_get(mp, xfs_daddr_to_agno(mp, map->bm_bn));
>  }
>  
> -static inline struct xfs_buf_cache *
> -xfs_buftarg_buf_cache(
> -	struct xfs_buftarg		*btp,
> -	struct xfs_perag		*pag)
> -{
> -	if (pag)
> -		return &pag->pag_bcache;
> -	return btp->bt_cache;
> -}
> -
>  /*
>   * Assembles a buffer covering the specified range. The code is optimised for
>   * cache hits, as metadata intensive workloads will see 3 orders of magnitude
> @@ -553,7 +528,6 @@ xfs_buf_get_map(
>  	xfs_buf_flags_t		flags,
>  	struct xfs_buf		**bpp)
>  {
> -	struct xfs_buf_cache	*bch;
>  	struct xfs_perag	*pag;
>  	struct xfs_buf		*bp = NULL;
>  	struct xfs_buf_map	cmap = { .bm_bn = map[0].bm_bn };
> @@ -570,9 +544,8 @@ xfs_buf_get_map(
>  		return error;
>  
>  	pag = xfs_buftarg_get_pag(btp, &cmap);
> -	bch = xfs_buftarg_buf_cache(btp, pag);
>  
> -	error = xfs_buf_lookup(bch, &cmap, flags, &bp);
> +	error = xfs_buf_lookup(btp, &cmap, flags, &bp);
>  	if (error && error != -ENOENT)
>  		goto out_put_perag;
>  
> @@ -584,7 +557,7 @@ xfs_buf_get_map(
>  			goto out_put_perag;
>  
>  		/* xfs_buf_find_insert() consumes the perag reference. */
> -		error = xfs_buf_find_insert(btp, bch, pag, &cmap, map, nmaps,
> +		error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps,
>  				flags, &bp);
>  		if (error)
>  			return error;
> @@ -848,11 +821,8 @@ xfs_buf_destroy(
>  	ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
>  
>  	if (!xfs_buf_is_uncached(bp)) {
> -		struct xfs_buf_cache	*bch =
> -			xfs_buftarg_buf_cache(bp->b_target, bp->b_pag);
> -
> -		rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
> -				xfs_buf_hash_params);
> +		rhashtable_remove_fast(&bp->b_target->bt_hash,
> +				&bp->b_rhash_head, xfs_buf_hash_params);
>  
>  		if (bp->b_pag)
>  			xfs_perag_put(bp->b_pag);
> @@ -1619,6 +1589,7 @@ xfs_destroy_buftarg(
>  	ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0);
>  	percpu_counter_destroy(&btp->bt_readahead_count);
>  	list_lru_destroy(&btp->bt_lru);
> +	rhashtable_destroy(&btp->bt_hash);
>  }
>  
>  void
> @@ -1713,8 +1684,10 @@ xfs_init_buftarg(
>  	ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
>  			     DEFAULT_RATELIMIT_BURST);
>  
> -	if (list_lru_init(&btp->bt_lru))
> +	if (rhashtable_init(&btp->bt_hash, &xfs_buf_hash_params))
>  		return -ENOMEM;
> +	if (list_lru_init(&btp->bt_lru))
> +		goto out_destroy_hash;
>  	if (percpu_counter_init(&btp->bt_readahead_count, 0, GFP_KERNEL))
>  		goto out_destroy_lru;
>  
> @@ -1732,6 +1705,8 @@ xfs_init_buftarg(
>  	percpu_counter_destroy(&btp->bt_readahead_count);
>  out_destroy_lru:
>  	list_lru_destroy(&btp->bt_lru);
> +out_destroy_hash:
> +	rhashtable_destroy(&btp->bt_hash);
>  	return -ENOMEM;
>  }
>  
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 3a1d066e1c13..bf39d89f0f6d 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -69,13 +69,6 @@ typedef unsigned int xfs_buf_flags_t;
>  	{ XBF_INCORE,		"INCORE" }, \
>  	{ XBF_TRYLOCK,		"TRYLOCK" }
>  
> -struct xfs_buf_cache {
> -	struct rhashtable	bc_hash;
> -};
> -
> -int xfs_buf_cache_init(struct xfs_buf_cache *bch);
> -void xfs_buf_cache_destroy(struct xfs_buf_cache *bch);
> -
>  /*
>   * The xfs_buftarg contains 2 notions of "sector size" -
>   *
> @@ -113,8 +106,7 @@ struct xfs_buftarg {
>  	unsigned int		bt_awu_min;
>  	unsigned int		bt_awu_max;
>  
> -	/* built-in cache, if we're not using the perag one */
> -	struct xfs_buf_cache	bt_cache[];
> +	struct rhashtable	bt_hash;
>  };
>  
>  struct xfs_buf_map {
> diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
> index 0106da0a9f44..86dbec5ee203 100644
> --- a/fs/xfs/xfs_buf_mem.c
> +++ b/fs/xfs/xfs_buf_mem.c
> @@ -58,7 +58,7 @@ xmbuf_alloc(
>  	struct xfs_buftarg	*btp;
>  	int			error;
>  
> -	btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL);
> +	btp = kzalloc(sizeof(*btp), GFP_KERNEL);
>  	if (!btp)
>  		return -ENOMEM;
>  
> @@ -81,10 +81,6 @@ xmbuf_alloc(
>  	/* ensure all writes are below EOF to avoid pagecache zeroing */
>  	i_size_write(inode, inode->i_sb->s_maxbytes);
>  
> -	error = xfs_buf_cache_init(btp->bt_cache);
> -	if (error)
> -		goto out_file;
> -
>  	/* Initialize buffer target */
>  	btp->bt_mount = mp;
>  	btp->bt_dev = (dev_t)-1U;
> @@ -95,15 +91,13 @@ xmbuf_alloc(
>  
>  	error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
>  	if (error)
> -		goto out_bcache;
> +		goto out_file;
>  
>  	trace_xmbuf_create(btp);
>  
>  	*btpp = btp;
>  	return 0;
>  
> -out_bcache:
> -	xfs_buf_cache_destroy(btp->bt_cache);
>  out_file:
>  	fput(file);
>  out_free_btp:
> @@ -122,7 +116,6 @@ xmbuf_free(
>  	trace_xmbuf_free(btp);
>  
>  	xfs_destroy_buftarg(btp);
> -	xfs_buf_cache_destroy(btp->bt_cache);
>  	fput(btp->bt_file);
>  	kfree(btp);
>  }
> -- 
> 2.47.3
> 
> 

  reply	other threads:[~2026-01-20  2:39 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-19 15:31 buffer cache simplification Christoph Hellwig
2026-01-19 15:31 ` [PATCH 1/3] xfs: don't keep a reference for buffers on the LRU Christoph Hellwig
2026-01-20  2:53   ` Darrick J. Wong
2026-01-20  6:55     ` Christoph Hellwig
2026-01-19 15:31 ` [PATCH 2/3] xfs: use a lockref for the buffer reference count Christoph Hellwig
2026-01-20  2:53   ` Darrick J. Wong
2026-01-19 15:31 ` [PATCH 3/3] xfs: switch (back) to a per-buftarg buffer hash Christoph Hellwig
2026-01-20  2:39   ` Darrick J. Wong [this message]
2026-01-20  7:06     ` Christoph Hellwig
2026-01-20 15:50       ` Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2026-01-22  5:26 buffer cache simplification v2 Christoph Hellwig
2026-01-22  5:26 ` [PATCH 3/3] xfs: switch (back) to a per-buftarg buffer hash Christoph Hellwig
2026-01-22 18:10   ` Darrick J. Wong
2026-01-23  5:36     ` Christoph Hellwig
2026-01-23  7:01       ` Darrick J. Wong
2026-01-26  5:37 buffer cache simplification v3 Christoph Hellwig
2026-01-26  5:38 ` [PATCH 3/3] xfs: switch (back) to a per-buftarg buffer hash Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260120023918.GG15551@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=cem@kernel.org \
    --cc=dchinner@redhat.com \
    --cc=hch@lst.de \
    --cc=linux-xfs@vger.kernel.org \
    --cc=syzbot+0391d34e801643e2809b@syzkaller.appspotmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.