From: "Darrick J. Wong" <djwong@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Carlos Maiolino <cem@kernel.org>,
Dave Chinner <dchinner@redhat.com>,
linux-xfs@vger.kernel.org
Subject: Re: [PATCH 2/3] xfs: use a lockref for the buffer reference count
Date: Mon, 19 Jan 2026 18:53:55 -0800 [thread overview]
Message-ID: <20260120025355.GI15551@frogsfrogsfrogs> (raw)
In-Reply-To: <20260119153156.4088290-3-hch@lst.de>
On Mon, Jan 19, 2026 at 04:31:36PM +0100, Christoph Hellwig wrote:
> The lockref structure allows incrementing/decrementing counters like
> an atomic_t for the fast path, while still allowing complex slow path
> operations as if the counter was protected by a lock. The only slow
> path operations that actually need to take the lock are the final
> put, LRU evictions and marking a buffer stale.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
This looks like a pretty straightforward conversion so
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> fs/xfs/xfs_buf.c | 79 ++++++++++++++++++----------------------------
> fs/xfs/xfs_buf.h | 4 +--
> fs/xfs/xfs_trace.h | 10 +++---
> 3 files changed, 38 insertions(+), 55 deletions(-)
>
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index aacdf080e400..348c91335163 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -31,20 +31,20 @@ struct kmem_cache *xfs_buf_cache;
> *
> * xfs_buf_stale:
> * b_sema (caller holds)
> - * b_lock
> + * b_lockref.lock
> * lru_lock
> *
> * xfs_buf_rele:
> - * b_lock
> + * b_lockref.lock
> * lru_lock
> *
> * xfs_buftarg_drain_rele
> * lru_lock
> - * b_lock (trylock due to inversion)
> + * b_lockref.lock (trylock due to inversion)
> *
> * xfs_buftarg_isolate
> * lru_lock
> - * b_lock (trylock due to inversion)
> + * b_lockref.lock (trylock due to inversion)
> */
>
> static void xfs_buf_submit(struct xfs_buf *bp);
> @@ -78,11 +78,11 @@ xfs_buf_stale(
> */
> bp->b_flags &= ~_XBF_DELWRI_Q;
>
> - spin_lock(&bp->b_lock);
> + spin_lock(&bp->b_lockref.lock);
> atomic_set(&bp->b_lru_ref, 0);
> - if (bp->b_hold >= 0)
> + if (!__lockref_is_dead(&bp->b_lockref))
> list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
> - spin_unlock(&bp->b_lock);
> + spin_unlock(&bp->b_lockref.lock);
> }
>
> static void
> @@ -274,10 +274,8 @@ xfs_buf_alloc(
> * inserting into the hash table are safe (and will have to wait for
> * the unlock to do anything non-trivial).
> */
> - bp->b_hold = 1;
> + lockref_init(&bp->b_lockref);
> sema_init(&bp->b_sema, 0); /* held, no waiters */
> -
> - spin_lock_init(&bp->b_lock);
> atomic_set(&bp->b_lru_ref, 1);
> init_completion(&bp->b_iowait);
> INIT_LIST_HEAD(&bp->b_lru);
> @@ -434,20 +432,6 @@ xfs_buf_find_lock(
> return 0;
> }
>
> -static bool
> -xfs_buf_try_hold(
> - struct xfs_buf *bp)
> -{
> - spin_lock(&bp->b_lock);
> - if (bp->b_hold == -1) {
> - spin_unlock(&bp->b_lock);
> - return false;
> - }
> - bp->b_hold++;
> - spin_unlock(&bp->b_lock);
> - return true;
> -}
> -
> static inline int
> xfs_buf_lookup(
> struct xfs_buf_cache *bch,
> @@ -460,7 +444,7 @@ xfs_buf_lookup(
>
> rcu_read_lock();
> bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
> - if (!bp || !xfs_buf_try_hold(bp)) {
> + if (!bp || !lockref_get_not_dead(&bp->b_lockref)) {
> rcu_read_unlock();
> return -ENOENT;
> }
> @@ -511,7 +495,7 @@ xfs_buf_find_insert(
> error = PTR_ERR(bp);
> goto out_free_buf;
> }
> - if (bp && xfs_buf_try_hold(bp)) {
> + if (bp && lockref_get_not_dead(&bp->b_lockref)) {
> /* found an existing buffer */
> rcu_read_unlock();
> error = xfs_buf_find_lock(bp, flags);
> @@ -853,16 +837,14 @@ xfs_buf_hold(
> {
> trace_xfs_buf_hold(bp, _RET_IP_);
>
> - spin_lock(&bp->b_lock);
> - bp->b_hold++;
> - spin_unlock(&bp->b_lock);
> + lockref_get(&bp->b_lockref);
> }
>
> static void
> xfs_buf_destroy(
> struct xfs_buf *bp)
> {
> - ASSERT(bp->b_hold < 0);
> + ASSERT(__lockref_is_dead(&bp->b_lockref));
> ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
>
> if (!xfs_buf_is_uncached(bp)) {
> @@ -888,19 +870,20 @@ xfs_buf_rele(
> {
> trace_xfs_buf_rele(bp, _RET_IP_);
>
> - spin_lock(&bp->b_lock);
> - if (!--bp->b_hold) {
> + if (lockref_put_or_lock(&bp->b_lockref))
> + return;
> + if (!--bp->b_lockref.count) {
> if (xfs_buf_is_uncached(bp) || !atomic_read(&bp->b_lru_ref))
> goto kill;
> list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru);
> }
> - spin_unlock(&bp->b_lock);
> + spin_unlock(&bp->b_lockref.lock);
> return;
>
> kill:
> - bp->b_hold = -1;
> + lockref_mark_dead(&bp->b_lockref);
> list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
> - spin_unlock(&bp->b_lock);
> + spin_unlock(&bp->b_lockref.lock);
>
> xfs_buf_destroy(bp);
> }
> @@ -1471,18 +1454,18 @@ xfs_buftarg_drain_rele(
> struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
> struct list_head *dispose = arg;
>
> - if (!spin_trylock(&bp->b_lock))
> + if (!spin_trylock(&bp->b_lockref.lock))
> return LRU_SKIP;
> - if (bp->b_hold > 0) {
> + if (bp->b_lockref.count > 0) {
> /* need to wait, so skip it this pass */
> - spin_unlock(&bp->b_lock);
> + spin_unlock(&bp->b_lockref.lock);
> trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
> return LRU_SKIP;
> }
>
> - bp->b_hold = -1;
> + lockref_mark_dead(&bp->b_lockref);
> list_lru_isolate_move(lru, item, dispose);
> - spin_unlock(&bp->b_lock);
> + spin_unlock(&bp->b_lockref.lock);
> return LRU_REMOVED;
> }
>
> @@ -1564,10 +1547,10 @@ xfs_buftarg_isolate(
> struct list_head *dispose = arg;
>
> /*
> - * We are inverting the lru lock vs bp->b_lock order here, so use a
> - * trylock. If we fail to get the lock, just skip the buffer.
> + * We are inverting the lru lock vs bp->b_lockref.lock order here, so
> + * use a trylock. If we fail to get the lock, just skip the buffer.
> */
> - if (!spin_trylock(&bp->b_lock))
> + if (!spin_trylock(&bp->b_lockref.lock))
> return LRU_SKIP;
>
> /*
> @@ -1575,9 +1558,9 @@ xfs_buftarg_isolate(
> * free it. It will be added to the LRU again when the reference count
> * hits zero.
> */
> - if (bp->b_hold > 0) {
> + if (bp->b_lockref.count > 0) {
> list_lru_isolate(lru, &bp->b_lru);
> - spin_unlock(&bp->b_lock);
> + spin_unlock(&bp->b_lockref.lock);
> return LRU_REMOVED;
> }
>
> @@ -1587,13 +1570,13 @@ xfs_buftarg_isolate(
> * buffer, otherwise it gets another trip through the LRU.
> */
> if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
> - spin_unlock(&bp->b_lock);
> + spin_unlock(&bp->b_lockref.lock);
> return LRU_ROTATE;
> }
>
> - bp->b_hold = -1;
> + lockref_mark_dead(&bp->b_lockref);
> list_lru_isolate_move(lru, item, dispose);
> - spin_unlock(&bp->b_lock);
> + spin_unlock(&bp->b_lockref.lock);
> return LRU_REMOVED;
> }
>
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 1117cd9cbfb9..3a1d066e1c13 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -14,6 +14,7 @@
> #include <linux/dax.h>
> #include <linux/uio.h>
> #include <linux/list_lru.h>
> +#include <linux/lockref.h>
>
> extern struct kmem_cache *xfs_buf_cache;
>
> @@ -154,8 +155,7 @@ struct xfs_buf {
>
> xfs_daddr_t b_rhash_key; /* buffer cache index */
> int b_length; /* size of buffer in BBs */
> - spinlock_t b_lock; /* internal state lock */
> - unsigned int b_hold; /* reference count */
> + struct lockref b_lockref; /* refcount + lock */
> atomic_t b_lru_ref; /* lru reclaim ref count */
> xfs_buf_flags_t b_flags; /* status flags */
> struct semaphore b_sema; /* semaphore for lockables */
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index f70afbf3cb19..abf022d5ff42 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -736,7 +736,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
> __entry->dev = bp->b_target->bt_dev;
> __entry->bno = xfs_buf_daddr(bp);
> __entry->nblks = bp->b_length;
> - __entry->hold = bp->b_hold;
> + __entry->hold = bp->b_lockref.count;
> __entry->pincount = atomic_read(&bp->b_pin_count);
> __entry->lockval = bp->b_sema.count;
> __entry->flags = bp->b_flags;
> @@ -810,7 +810,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
> __entry->bno = xfs_buf_daddr(bp);
> __entry->length = bp->b_length;
> __entry->flags = flags;
> - __entry->hold = bp->b_hold;
> + __entry->hold = bp->b_lockref.count;
> __entry->pincount = atomic_read(&bp->b_pin_count);
> __entry->lockval = bp->b_sema.count;
> __entry->caller_ip = caller_ip;
> @@ -854,7 +854,7 @@ TRACE_EVENT(xfs_buf_ioerror,
> __entry->dev = bp->b_target->bt_dev;
> __entry->bno = xfs_buf_daddr(bp);
> __entry->length = bp->b_length;
> - __entry->hold = bp->b_hold;
> + __entry->hold = bp->b_lockref.count;
> __entry->pincount = atomic_read(&bp->b_pin_count);
> __entry->lockval = bp->b_sema.count;
> __entry->error = error;
> @@ -898,7 +898,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
> __entry->buf_bno = xfs_buf_daddr(bip->bli_buf);
> __entry->buf_len = bip->bli_buf->b_length;
> __entry->buf_flags = bip->bli_buf->b_flags;
> - __entry->buf_hold = bip->bli_buf->b_hold;
> + __entry->buf_hold = bip->bli_buf->b_lockref.count;
> __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
> __entry->buf_lockval = bip->bli_buf->b_sema.count;
> __entry->li_flags = bip->bli_item.li_flags;
> @@ -5181,7 +5181,7 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
> __entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
> __entry->bno = xfs_buf_daddr(bp);
> __entry->nblks = bp->b_length;
> - __entry->hold = bp->b_hold;
> + __entry->hold = bp->b_lockref.count;
> __entry->pincount = atomic_read(&bp->b_pin_count);
> __entry->lockval = bp->b_sema.count;
> __entry->flags = bp->b_flags;
> --
> 2.47.3
>
>
next prev parent reply other threads:[~2026-01-20 2:53 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-19 15:31 buffer cache simplification Christoph Hellwig
2026-01-19 15:31 ` [PATCH 1/3] xfs: don't keep a reference for buffers on the LRU Christoph Hellwig
2026-01-20 2:53 ` Darrick J. Wong
2026-01-20 6:55 ` Christoph Hellwig
2026-01-19 15:31 ` [PATCH 2/3] xfs: use a lockref for the buffer reference count Christoph Hellwig
2026-01-20 2:53 ` Darrick J. Wong [this message]
2026-01-19 15:31 ` [PATCH 3/3] xfs: switch (back) to a per-buftarg buffer hash Christoph Hellwig
2026-01-20 2:39 ` Darrick J. Wong
2026-01-20 7:06 ` Christoph Hellwig
2026-01-20 15:50 ` Darrick J. Wong
-- strict thread matches above, loose matches on Subject: below --
2026-01-22 5:26 buffer cache simplification v2 Christoph Hellwig
2026-01-22 5:26 ` [PATCH 2/3] xfs: use a lockref for the buffer reference count Christoph Hellwig
2026-01-23 12:04 ` Carlos Maiolino
2026-01-26 5:37 buffer cache simplification v3 Christoph Hellwig
2026-01-26 5:38 ` [PATCH 2/3] xfs: use a lockref for the buffer reference count Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260120025355.GI15551@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=cem@kernel.org \
--cc=dchinner@redhat.com \
--cc=hch@lst.de \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox