* buffer cache simplification v4
@ 2026-03-16 15:41 Christoph Hellwig
2026-03-16 15:41 ` [PATCH 1/3] xfs: use a lockref for the buffer reference count Christoph Hellwig
` (3 more replies)
0 siblings, 4 replies; 10+ messages in thread
From: Christoph Hellwig @ 2026-03-16 15:41 UTC (permalink / raw)
To: Carlos Maiolino; +Cc: Dave Chinner, Brian Foster, linux-xfs
Hi all,
this series has a few old patches that simplify the LRU handling, and
moves back to only having a per-buftarg hash now that the buffer hash
is using the scalable rhashtable. While some of this looks like
performance work, performance and scalability is unchanged even on the
80 core dual socket system I test this on. Besides cleaning up the
code nice, it also happens to fix a syzcaller reported use after free
during buffer shutdown, which happened incidentally because of how the
tear down of the buftarg vs the perag structures is handled.
Changes since v3:
- split the change to handling how referenced buffers on the LRU
are handled into a separate patch
Changes since v2:
- mark b_hold as signed in patch 1 before removing it in patch 2
- document the changed locking in xfs_buf_rele_cached.
Changes since v2:
- add more details and a link to a commit message
Diffstat:
libxfs/xfs_ag.c | 13 ----
libxfs/xfs_ag.h | 2
xfs_buf.c | 148 ++++++++++++++++++++------------------------------------
xfs_buf.h | 14 +----
xfs_buf_mem.c | 11 ----
xfs_trace.h | 10 +--
6 files changed, 66 insertions(+), 132 deletions(-)
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 1/3] xfs: use a lockref for the buffer reference count
2026-03-16 15:41 buffer cache simplification v4 Christoph Hellwig
@ 2026-03-16 15:41 ` Christoph Hellwig
2026-03-16 15:41 ` [PATCH 2/3] xfs: switch (back) to a per-buftarg buffer hash Christoph Hellwig
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2026-03-16 15:41 UTC (permalink / raw)
To: Carlos Maiolino; +Cc: Dave Chinner, Brian Foster, linux-xfs, Darrick J. Wong
The lockref structure allows incrementing/decrementing counters like
an atomic_t for the fast path, while still allowing complex slow path
operations as if the counter was protected by a lock. The only slow
path operations that actually need to take the lock are the final
put, LRU evictions and marking a buffer stale.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
---
fs/xfs/xfs_buf.c | 79 ++++++++++++++++++----------------------------
fs/xfs/xfs_buf.h | 4 +--
fs/xfs/xfs_trace.h | 10 +++---
3 files changed, 38 insertions(+), 55 deletions(-)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 3cd37f082a69..d585848a6be1 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -31,20 +31,20 @@ struct kmem_cache *xfs_buf_cache;
*
* xfs_buf_stale:
* b_sema (caller holds)
- * b_lock
+ * b_lockref.lock
* lru_lock
*
* xfs_buf_rele:
- * b_lock
+ * b_lockref.lock
* lru_lock
*
* xfs_buftarg_drain_rele
* lru_lock
- * b_lock (trylock due to inversion)
+ * b_lockref.lock (trylock due to inversion)
*
* xfs_buftarg_isolate
* lru_lock
- * b_lock (trylock due to inversion)
+ * b_lockref.lock (trylock due to inversion)
*/
static void xfs_buf_submit(struct xfs_buf *bp);
@@ -78,11 +78,11 @@ xfs_buf_stale(
*/
bp->b_flags &= ~_XBF_DELWRI_Q;
- spin_lock(&bp->b_lock);
+ spin_lock(&bp->b_lockref.lock);
atomic_set(&bp->b_lru_ref, 0);
- if (bp->b_hold >= 0)
+ if (!__lockref_is_dead(&bp->b_lockref))
list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
- spin_unlock(&bp->b_lock);
+ spin_unlock(&bp->b_lockref.lock);
}
static void
@@ -274,10 +274,8 @@ xfs_buf_alloc(
* inserting into the hash table are safe (and will have to wait for
* the unlock to do anything non-trivial).
*/
- bp->b_hold = 1;
+ lockref_init(&bp->b_lockref);
sema_init(&bp->b_sema, 0); /* held, no waiters */
-
- spin_lock_init(&bp->b_lock);
atomic_set(&bp->b_lru_ref, 1);
init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_lru);
@@ -434,20 +432,6 @@ xfs_buf_find_lock(
return 0;
}
-static bool
-xfs_buf_try_hold(
- struct xfs_buf *bp)
-{
- spin_lock(&bp->b_lock);
- if (bp->b_hold == -1) {
- spin_unlock(&bp->b_lock);
- return false;
- }
- bp->b_hold++;
- spin_unlock(&bp->b_lock);
- return true;
-}
-
static inline int
xfs_buf_lookup(
struct xfs_buf_cache *bch,
@@ -460,7 +444,7 @@ xfs_buf_lookup(
rcu_read_lock();
bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
- if (!bp || !xfs_buf_try_hold(bp)) {
+ if (!bp || !lockref_get_not_dead(&bp->b_lockref)) {
rcu_read_unlock();
return -ENOENT;
}
@@ -511,7 +495,7 @@ xfs_buf_find_insert(
error = PTR_ERR(bp);
goto out_free_buf;
}
- if (bp && xfs_buf_try_hold(bp)) {
+ if (bp && lockref_get_not_dead(&bp->b_lockref)) {
/* found an existing buffer */
rcu_read_unlock();
error = xfs_buf_find_lock(bp, flags);
@@ -853,16 +837,14 @@ xfs_buf_hold(
{
trace_xfs_buf_hold(bp, _RET_IP_);
- spin_lock(&bp->b_lock);
- bp->b_hold++;
- spin_unlock(&bp->b_lock);
+ lockref_get(&bp->b_lockref);
}
static void
xfs_buf_destroy(
struct xfs_buf *bp)
{
- ASSERT(bp->b_hold < 0);
+ ASSERT(__lockref_is_dead(&bp->b_lockref));
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
if (!xfs_buf_is_uncached(bp)) {
@@ -888,19 +870,20 @@ xfs_buf_rele(
{
trace_xfs_buf_rele(bp, _RET_IP_);
- spin_lock(&bp->b_lock);
- if (!--bp->b_hold) {
+ if (lockref_put_or_lock(&bp->b_lockref))
+ return;
+ if (!--bp->b_lockref.count) {
if (xfs_buf_is_uncached(bp) || !atomic_read(&bp->b_lru_ref))
goto kill;
list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru);
}
- spin_unlock(&bp->b_lock);
+ spin_unlock(&bp->b_lockref.lock);
return;
kill:
- bp->b_hold = -1;
+ lockref_mark_dead(&bp->b_lockref);
list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
- spin_unlock(&bp->b_lock);
+ spin_unlock(&bp->b_lockref.lock);
xfs_buf_destroy(bp);
}
@@ -1471,18 +1454,18 @@ xfs_buftarg_drain_rele(
struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
struct list_head *dispose = arg;
- if (!spin_trylock(&bp->b_lock))
+ if (!spin_trylock(&bp->b_lockref.lock))
return LRU_SKIP;
- if (bp->b_hold > 0) {
+ if (bp->b_lockref.count > 0) {
/* need to wait, so skip it this pass */
- spin_unlock(&bp->b_lock);
+ spin_unlock(&bp->b_lockref.lock);
trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
return LRU_SKIP;
}
- bp->b_hold = -1;
+ lockref_mark_dead(&bp->b_lockref);
list_lru_isolate_move(lru, item, dispose);
- spin_unlock(&bp->b_lock);
+ spin_unlock(&bp->b_lockref.lock);
return LRU_REMOVED;
}
@@ -1564,10 +1547,10 @@ xfs_buftarg_isolate(
struct list_head *dispose = arg;
/*
- * We are inverting the lru lock vs bp->b_lock order here, so use a
- * trylock. If we fail to get the lock, just skip the buffer.
+ * We are inverting the lru lock vs bp->b_lockref.lock order here, so
+ * use a trylock. If we fail to get the lock, just skip the buffer.
*/
- if (!spin_trylock(&bp->b_lock))
+ if (!spin_trylock(&bp->b_lockref.lock))
return LRU_SKIP;
/*
@@ -1576,7 +1559,7 @@ xfs_buftarg_isolate(
* buffer, otherwise it gets another trip through the LRU.
*/
if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
- spin_unlock(&bp->b_lock);
+ spin_unlock(&bp->b_lockref.lock);
return LRU_ROTATE;
}
@@ -1584,15 +1567,15 @@ xfs_buftarg_isolate(
* If the buffer is in use, remove it from the LRU for now as we can't
* free it. It will be freed when the last reference drops.
*/
- if (bp->b_hold > 0) {
+ if (bp->b_lockref.count > 0) {
list_lru_isolate(lru, &bp->b_lru);
- spin_unlock(&bp->b_lock);
+ spin_unlock(&bp->b_lockref.lock);
return LRU_REMOVED;
}
- bp->b_hold = -1;
+ lockref_mark_dead(&bp->b_lockref);
list_lru_isolate_move(lru, item, dispose);
- spin_unlock(&bp->b_lock);
+ spin_unlock(&bp->b_lockref.lock);
return LRU_REMOVED;
}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index e7324d58bd96..3a1d066e1c13 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -14,6 +14,7 @@
#include <linux/dax.h>
#include <linux/uio.h>
#include <linux/list_lru.h>
+#include <linux/lockref.h>
extern struct kmem_cache *xfs_buf_cache;
@@ -154,7 +155,7 @@ struct xfs_buf {
xfs_daddr_t b_rhash_key; /* buffer cache index */
int b_length; /* size of buffer in BBs */
- int b_hold; /* reference count */
+ struct lockref b_lockref; /* refcount + lock */
atomic_t b_lru_ref; /* lru reclaim ref count */
xfs_buf_flags_t b_flags; /* status flags */
struct semaphore b_sema; /* semaphore for lockables */
@@ -164,7 +165,6 @@ struct xfs_buf {
* bt_lru_lock and not by b_sema
*/
struct list_head b_lru; /* lru list */
- spinlock_t b_lock; /* internal state lock */
wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list;
struct xfs_perag *b_pag;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 813e5a9f57eb..c191f4b163a8 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -739,7 +739,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
__entry->dev = bp->b_target->bt_dev;
__entry->bno = xfs_buf_daddr(bp);
__entry->nblks = bp->b_length;
- __entry->hold = bp->b_hold;
+ __entry->hold = bp->b_lockref.count;
__entry->pincount = atomic_read(&bp->b_pin_count);
__entry->lockval = bp->b_sema.count;
__entry->flags = bp->b_flags;
@@ -813,7 +813,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
__entry->bno = xfs_buf_daddr(bp);
__entry->length = bp->b_length;
__entry->flags = flags;
- __entry->hold = bp->b_hold;
+ __entry->hold = bp->b_lockref.count;
__entry->pincount = atomic_read(&bp->b_pin_count);
__entry->lockval = bp->b_sema.count;
__entry->caller_ip = caller_ip;
@@ -857,7 +857,7 @@ TRACE_EVENT(xfs_buf_ioerror,
__entry->dev = bp->b_target->bt_dev;
__entry->bno = xfs_buf_daddr(bp);
__entry->length = bp->b_length;
- __entry->hold = bp->b_hold;
+ __entry->hold = bp->b_lockref.count;
__entry->pincount = atomic_read(&bp->b_pin_count);
__entry->lockval = bp->b_sema.count;
__entry->error = error;
@@ -901,7 +901,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
__entry->buf_bno = xfs_buf_daddr(bip->bli_buf);
__entry->buf_len = bip->bli_buf->b_length;
__entry->buf_flags = bip->bli_buf->b_flags;
- __entry->buf_hold = bip->bli_buf->b_hold;
+ __entry->buf_hold = bip->bli_buf->b_lockref.count;
__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
__entry->buf_lockval = bip->bli_buf->b_sema.count;
__entry->li_flags = bip->bli_item.li_flags;
@@ -5185,7 +5185,7 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
__entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
__entry->bno = xfs_buf_daddr(bp);
__entry->nblks = bp->b_length;
- __entry->hold = bp->b_hold;
+ __entry->hold = bp->b_lockref.count;
__entry->pincount = atomic_read(&bp->b_pin_count);
__entry->lockval = bp->b_sema.count;
__entry->flags = bp->b_flags;
--
2.47.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/3] xfs: switch (back) to a per-buftarg buffer hash
2026-03-16 15:41 buffer cache simplification v4 Christoph Hellwig
2026-03-16 15:41 ` [PATCH 1/3] xfs: use a lockref for the buffer reference count Christoph Hellwig
@ 2026-03-16 15:41 ` Christoph Hellwig
2026-03-16 22:30 ` Darrick J. Wong
2026-03-16 15:41 ` [PATCH 3/3] xfs: don't decrement the buffer LRU count for in-use buffers Christoph Hellwig
2026-03-17 12:13 ` buffer cache simplification v4 Brian Foster
3 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2026-03-16 15:41 UTC (permalink / raw)
To: Carlos Maiolino
Cc: Dave Chinner, Brian Foster, linux-xfs,
syzbot+0391d34e801643e2809b
The per-AG buffer hashes were added when all buffer lookups took a
per-hash look. Since then we've made lookups entirely lockless and
removed the need for a hash-wide lock for inserts and removals as
well. With this there is no need to sharding the hash, so reduce the
used resources by using a per-buftarg hash for all buftargs.
Long after writing this initially, syzbot found a problem in the buffer
cache teardown order, which this happens to fix as well by doing the
entire buffer cache teardown in one places instead of splitting it
between destroying the buftarg and the perag structures.
Link: https://lore.kernel.org/linux-xfs/aLeUdemAZ5wmtZel@dread.disaster.area/
Reported-by: syzbot+0391d34e801643e2809b@syzkaller.appspotmail.com
Tested-by: syzbot+0391d34e801643e2809b@syzkaller.appspotmail.com
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/libxfs/xfs_ag.c | 13 ++---------
fs/xfs/libxfs/xfs_ag.h | 2 --
fs/xfs/xfs_buf.c | 51 +++++++++++-------------------------------
fs/xfs/xfs_buf.h | 10 +--------
fs/xfs/xfs_buf_mem.c | 11 ++-------
5 files changed, 18 insertions(+), 69 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index bd8fbb40b49e..dcd2f93b6a6c 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -110,10 +110,7 @@ xfs_perag_uninit(
struct xfs_group *xg)
{
#ifdef __KERNEL__
- struct xfs_perag *pag = to_perag(xg);
-
- cancel_delayed_work_sync(&pag->pag_blockgc_work);
- xfs_buf_cache_destroy(&pag->pag_bcache);
+ cancel_delayed_work_sync(&to_perag(xg)->pag_blockgc_work);
#endif
}
@@ -235,10 +232,6 @@ xfs_perag_alloc(
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
#endif /* __KERNEL__ */
- error = xfs_buf_cache_init(&pag->pag_bcache);
- if (error)
- goto out_free_perag;
-
/*
* Pre-calculated geometry
*/
@@ -250,12 +243,10 @@ xfs_perag_alloc(
error = xfs_group_insert(mp, pag_group(pag), index, XG_TYPE_AG);
if (error)
- goto out_buf_cache_destroy;
+ goto out_free_perag;
return 0;
-out_buf_cache_destroy:
- xfs_buf_cache_destroy(&pag->pag_bcache);
out_free_perag:
kfree(pag);
return error;
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 3cd4790768ff..16a9b43a3c27 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -85,8 +85,6 @@ struct xfs_perag {
int pag_ici_reclaimable; /* reclaimable inodes */
unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
- struct xfs_buf_cache pag_bcache;
-
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
#endif /* __KERNEL__ */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d585848a6be1..c0a4d0a37f57 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -363,20 +363,6 @@ static const struct rhashtable_params xfs_buf_hash_params = {
.obj_cmpfn = _xfs_buf_obj_cmp,
};
-int
-xfs_buf_cache_init(
- struct xfs_buf_cache *bch)
-{
- return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
-}
-
-void
-xfs_buf_cache_destroy(
- struct xfs_buf_cache *bch)
-{
- rhashtable_destroy(&bch->bc_hash);
-}
-
static int
xfs_buf_map_verify(
struct xfs_buftarg *btp,
@@ -434,7 +420,7 @@ xfs_buf_find_lock(
static inline int
xfs_buf_lookup(
- struct xfs_buf_cache *bch,
+ struct xfs_buftarg *btp,
struct xfs_buf_map *map,
xfs_buf_flags_t flags,
struct xfs_buf **bpp)
@@ -443,7 +429,7 @@ xfs_buf_lookup(
int error;
rcu_read_lock();
- bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
+ bp = rhashtable_lookup(&btp->bt_hash, map, xfs_buf_hash_params);
if (!bp || !lockref_get_not_dead(&bp->b_lockref)) {
rcu_read_unlock();
return -ENOENT;
@@ -468,7 +454,6 @@ xfs_buf_lookup(
static int
xfs_buf_find_insert(
struct xfs_buftarg *btp,
- struct xfs_buf_cache *bch,
struct xfs_perag *pag,
struct xfs_buf_map *cmap,
struct xfs_buf_map *map,
@@ -488,7 +473,7 @@ xfs_buf_find_insert(
new_bp->b_pag = pag;
rcu_read_lock();
- bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
+ bp = rhashtable_lookup_get_insert_fast(&btp->bt_hash,
&new_bp->b_rhash_head, xfs_buf_hash_params);
if (IS_ERR(bp)) {
rcu_read_unlock();
@@ -530,16 +515,6 @@ xfs_buftarg_get_pag(
return xfs_perag_get(mp, xfs_daddr_to_agno(mp, map->bm_bn));
}
-static inline struct xfs_buf_cache *
-xfs_buftarg_buf_cache(
- struct xfs_buftarg *btp,
- struct xfs_perag *pag)
-{
- if (pag)
- return &pag->pag_bcache;
- return btp->bt_cache;
-}
-
/*
* Assembles a buffer covering the specified range. The code is optimised for
* cache hits, as metadata intensive workloads will see 3 orders of magnitude
@@ -553,7 +528,6 @@ xfs_buf_get_map(
xfs_buf_flags_t flags,
struct xfs_buf **bpp)
{
- struct xfs_buf_cache *bch;
struct xfs_perag *pag;
struct xfs_buf *bp = NULL;
struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
@@ -570,9 +544,8 @@ xfs_buf_get_map(
return error;
pag = xfs_buftarg_get_pag(btp, &cmap);
- bch = xfs_buftarg_buf_cache(btp, pag);
- error = xfs_buf_lookup(bch, &cmap, flags, &bp);
+ error = xfs_buf_lookup(btp, &cmap, flags, &bp);
if (error && error != -ENOENT)
goto out_put_perag;
@@ -584,7 +557,7 @@ xfs_buf_get_map(
goto out_put_perag;
/* xfs_buf_find_insert() consumes the perag reference. */
- error = xfs_buf_find_insert(btp, bch, pag, &cmap, map, nmaps,
+ error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps,
flags, &bp);
if (error)
return error;
@@ -848,11 +821,8 @@ xfs_buf_destroy(
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
if (!xfs_buf_is_uncached(bp)) {
- struct xfs_buf_cache *bch =
- xfs_buftarg_buf_cache(bp->b_target, bp->b_pag);
-
- rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
- xfs_buf_hash_params);
+ rhashtable_remove_fast(&bp->b_target->bt_hash,
+ &bp->b_rhash_head, xfs_buf_hash_params);
if (bp->b_pag)
xfs_perag_put(bp->b_pag);
@@ -1618,6 +1588,7 @@ xfs_destroy_buftarg(
ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0);
percpu_counter_destroy(&btp->bt_readahead_count);
list_lru_destroy(&btp->bt_lru);
+ rhashtable_destroy(&btp->bt_hash);
}
void
@@ -1712,8 +1683,10 @@ xfs_init_buftarg(
ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
DEFAULT_RATELIMIT_BURST);
- if (list_lru_init(&btp->bt_lru))
+ if (rhashtable_init(&btp->bt_hash, &xfs_buf_hash_params))
return -ENOMEM;
+ if (list_lru_init(&btp->bt_lru))
+ goto out_destroy_hash;
if (percpu_counter_init(&btp->bt_readahead_count, 0, GFP_KERNEL))
goto out_destroy_lru;
@@ -1731,6 +1704,8 @@ xfs_init_buftarg(
percpu_counter_destroy(&btp->bt_readahead_count);
out_destroy_lru:
list_lru_destroy(&btp->bt_lru);
+out_destroy_hash:
+ rhashtable_destroy(&btp->bt_hash);
return -ENOMEM;
}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 3a1d066e1c13..bf39d89f0f6d 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -69,13 +69,6 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_INCORE, "INCORE" }, \
{ XBF_TRYLOCK, "TRYLOCK" }
-struct xfs_buf_cache {
- struct rhashtable bc_hash;
-};
-
-int xfs_buf_cache_init(struct xfs_buf_cache *bch);
-void xfs_buf_cache_destroy(struct xfs_buf_cache *bch);
-
/*
* The xfs_buftarg contains 2 notions of "sector size" -
*
@@ -113,8 +106,7 @@ struct xfs_buftarg {
unsigned int bt_awu_min;
unsigned int bt_awu_max;
- /* built-in cache, if we're not using the perag one */
- struct xfs_buf_cache bt_cache[];
+ struct rhashtable bt_hash;
};
struct xfs_buf_map {
diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
index b0b3696bf599..b2fd7276b131 100644
--- a/fs/xfs/xfs_buf_mem.c
+++ b/fs/xfs/xfs_buf_mem.c
@@ -58,7 +58,7 @@ xmbuf_alloc(
struct xfs_buftarg *btp;
int error;
- btp = kzalloc_flex(*btp, bt_cache, 1);
+ btp = kzalloc_obj(*btp);
if (!btp)
return -ENOMEM;
@@ -81,10 +81,6 @@ xmbuf_alloc(
/* ensure all writes are below EOF to avoid pagecache zeroing */
i_size_write(inode, inode->i_sb->s_maxbytes);
- error = xfs_buf_cache_init(btp->bt_cache);
- if (error)
- goto out_file;
-
/* Initialize buffer target */
btp->bt_mount = mp;
btp->bt_dev = (dev_t)-1U;
@@ -95,15 +91,13 @@ xmbuf_alloc(
error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
if (error)
- goto out_bcache;
+ goto out_file;
trace_xmbuf_create(btp);
*btpp = btp;
return 0;
-out_bcache:
- xfs_buf_cache_destroy(btp->bt_cache);
out_file:
fput(file);
out_free_btp:
@@ -122,7 +116,6 @@ xmbuf_free(
trace_xmbuf_free(btp);
xfs_destroy_buftarg(btp);
- xfs_buf_cache_destroy(btp->bt_cache);
fput(btp->bt_file);
kfree(btp);
}
--
2.47.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/3] xfs: don't decrement the buffer LRU count for in-use buffers
2026-03-16 15:41 buffer cache simplification v4 Christoph Hellwig
2026-03-16 15:41 ` [PATCH 1/3] xfs: use a lockref for the buffer reference count Christoph Hellwig
2026-03-16 15:41 ` [PATCH 2/3] xfs: switch (back) to a per-buftarg buffer hash Christoph Hellwig
@ 2026-03-16 15:41 ` Christoph Hellwig
2026-03-16 22:20 ` Darrick J. Wong
2026-03-17 12:13 ` buffer cache simplification v4 Brian Foster
3 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2026-03-16 15:41 UTC (permalink / raw)
To: Carlos Maiolino; +Cc: Dave Chinner, Brian Foster, linux-xfs
XFS buffers are added to the LRU when they are unused, but are only
removed from the LRU lazily when the LRU list scan finds a used buffer.
So far this only happen when the LRU counter hits 0, which is suboptimal
as buffers that were added to the LRU, but are in use again still consume
LRU scanning resources and are aged while actually in use.
Fix this by checking for in-use buffers and removing the from the LRU
before decrementing the LRU counter.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_buf.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c0a4d0a37f57..8ba9b74339a7 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1523,6 +1523,18 @@ xfs_buftarg_isolate(
if (!spin_trylock(&bp->b_lockref.lock))
return LRU_SKIP;
+ /*
+ * If the buffer is in use, remove it from the LRU for now. We can't
+ * free it while someone is using it, and we should also not count
+ * eviction passed for it, just as if it hadn't been added to the LRU
+ * yet.
+ */
+ if (bp->b_lockref.count > 0) {
+ list_lru_isolate(lru, &bp->b_lru);
+ spin_unlock(&bp->b_lockref.lock);
+ return LRU_REMOVED;
+ }
+
/*
* Decrement the b_lru_ref count unless the value is already
* zero. If the value is already zero, we need to reclaim the
@@ -1533,16 +1545,6 @@ xfs_buftarg_isolate(
return LRU_ROTATE;
}
- /*
- * If the buffer is in use, remove it from the LRU for now as we can't
- * free it. It will be freed when the last reference drops.
- */
- if (bp->b_lockref.count > 0) {
- list_lru_isolate(lru, &bp->b_lru);
- spin_unlock(&bp->b_lockref.lock);
- return LRU_REMOVED;
- }
-
lockref_mark_dead(&bp->b_lockref);
list_lru_isolate_move(lru, item, dispose);
spin_unlock(&bp->b_lockref.lock);
--
2.47.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] xfs: don't decrement the buffer LRU count for in-use buffers
2026-03-16 15:41 ` [PATCH 3/3] xfs: don't decrement the buffer LRU count for in-use buffers Christoph Hellwig
@ 2026-03-16 22:20 ` Darrick J. Wong
0 siblings, 0 replies; 10+ messages in thread
From: Darrick J. Wong @ 2026-03-16 22:20 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Carlos Maiolino, Dave Chinner, Brian Foster, linux-xfs
On Mon, Mar 16, 2026 at 04:41:35PM +0100, Christoph Hellwig wrote:
> XFS buffers are added to the LRU when they are unused, but are only
> removed from the LRU lazily when the LRU list scan finds a used buffer.
> So far this only happen when the LRU counter hits 0, which is suboptimal
> as buffers that were added to the LRU, but are in use again still consume
> LRU scanning resources and are aged while actually in use.
>
> Fix this by checking for in-use buffers and removing the from the LRU
> before decrementing the LRU counter.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
That makes sense to me -- if someone holds the buffer, it's clearly not
cold and dead enough to get kicked out.
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> fs/xfs/xfs_buf.c | 22 ++++++++++++----------
> 1 file changed, 12 insertions(+), 10 deletions(-)
>
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index c0a4d0a37f57..8ba9b74339a7 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1523,6 +1523,18 @@ xfs_buftarg_isolate(
> if (!spin_trylock(&bp->b_lockref.lock))
> return LRU_SKIP;
>
> + /*
> + * If the buffer is in use, remove it from the LRU for now. We can't
> + * free it while someone is using it, and we should also not count
> + * eviction passed for it, just as if it hadn't been added to the LRU
> + * yet.
> + */
> + if (bp->b_lockref.count > 0) {
> + list_lru_isolate(lru, &bp->b_lru);
> + spin_unlock(&bp->b_lockref.lock);
> + return LRU_REMOVED;
> + }
> +
> /*
> * Decrement the b_lru_ref count unless the value is already
> * zero. If the value is already zero, we need to reclaim the
> @@ -1533,16 +1545,6 @@ xfs_buftarg_isolate(
> return LRU_ROTATE;
> }
>
> - /*
> - * If the buffer is in use, remove it from the LRU for now as we can't
> - * free it. It will be freed when the last reference drops.
> - */
> - if (bp->b_lockref.count > 0) {
> - list_lru_isolate(lru, &bp->b_lru);
> - spin_unlock(&bp->b_lockref.lock);
> - return LRU_REMOVED;
> - }
> -
> lockref_mark_dead(&bp->b_lockref);
> list_lru_isolate_move(lru, item, dispose);
> spin_unlock(&bp->b_lockref.lock);
> --
> 2.47.3
>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 2/3] xfs: switch (back) to a per-buftarg buffer hash
2026-03-16 15:41 ` [PATCH 2/3] xfs: switch (back) to a per-buftarg buffer hash Christoph Hellwig
@ 2026-03-16 22:30 ` Darrick J. Wong
0 siblings, 0 replies; 10+ messages in thread
From: Darrick J. Wong @ 2026-03-16 22:30 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Carlos Maiolino, Dave Chinner, Brian Foster, linux-xfs,
syzbot+0391d34e801643e2809b
On Mon, Mar 16, 2026 at 04:41:34PM +0100, Christoph Hellwig wrote:
> The per-AG buffer hashes were added when all buffer lookups took a
> per-hash look. Since then we've made lookups entirely lockless and
> removed the need for a hash-wide lock for inserts and removals as
> well. With this there is no need to sharding the hash, so reduce the
> used resources by using a per-buftarg hash for all buftargs.
>
> Long after writing this initially, syzbot found a problem in the buffer
> cache teardown order, which this happens to fix as well by doing the
> entire buffer cache teardown in one places instead of splitting it
> between destroying the buftarg and the perag structures.
>
> Link: https://lore.kernel.org/linux-xfs/aLeUdemAZ5wmtZel@dread.disaster.area/
> Reported-by: syzbot+0391d34e801643e2809b@syzkaller.appspotmail.com
> Tested-by: syzbot+0391d34e801643e2809b@syzkaller.appspotmail.com
> Signed-off-by: Christoph Hellwig <hch@lst.de>
This looks reasonable to me, though it deserves a cursory glance from
the author of the original triage message.
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> fs/xfs/libxfs/xfs_ag.c | 13 ++---------
> fs/xfs/libxfs/xfs_ag.h | 2 --
> fs/xfs/xfs_buf.c | 51 +++++++++++-------------------------------
> fs/xfs/xfs_buf.h | 10 +--------
> fs/xfs/xfs_buf_mem.c | 11 ++-------
> 5 files changed, 18 insertions(+), 69 deletions(-)
>
> diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
> index bd8fbb40b49e..dcd2f93b6a6c 100644
> --- a/fs/xfs/libxfs/xfs_ag.c
> +++ b/fs/xfs/libxfs/xfs_ag.c
> @@ -110,10 +110,7 @@ xfs_perag_uninit(
> struct xfs_group *xg)
> {
> #ifdef __KERNEL__
> - struct xfs_perag *pag = to_perag(xg);
> -
> - cancel_delayed_work_sync(&pag->pag_blockgc_work);
> - xfs_buf_cache_destroy(&pag->pag_bcache);
> + cancel_delayed_work_sync(&to_perag(xg)->pag_blockgc_work);
> #endif
> }
>
> @@ -235,10 +232,6 @@ xfs_perag_alloc(
> INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
> #endif /* __KERNEL__ */
>
> - error = xfs_buf_cache_init(&pag->pag_bcache);
> - if (error)
> - goto out_free_perag;
> -
> /*
> * Pre-calculated geometry
> */
> @@ -250,12 +243,10 @@ xfs_perag_alloc(
>
> error = xfs_group_insert(mp, pag_group(pag), index, XG_TYPE_AG);
> if (error)
> - goto out_buf_cache_destroy;
> + goto out_free_perag;
>
> return 0;
>
> -out_buf_cache_destroy:
> - xfs_buf_cache_destroy(&pag->pag_bcache);
> out_free_perag:
> kfree(pag);
> return error;
> diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
> index 3cd4790768ff..16a9b43a3c27 100644
> --- a/fs/xfs/libxfs/xfs_ag.h
> +++ b/fs/xfs/libxfs/xfs_ag.h
> @@ -85,8 +85,6 @@ struct xfs_perag {
> int pag_ici_reclaimable; /* reclaimable inodes */
> unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
>
> - struct xfs_buf_cache pag_bcache;
> -
> /* background prealloc block trimming */
> struct delayed_work pag_blockgc_work;
> #endif /* __KERNEL__ */
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index d585848a6be1..c0a4d0a37f57 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -363,20 +363,6 @@ static const struct rhashtable_params xfs_buf_hash_params = {
> .obj_cmpfn = _xfs_buf_obj_cmp,
> };
>
> -int
> -xfs_buf_cache_init(
> - struct xfs_buf_cache *bch)
> -{
> - return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
> -}
> -
> -void
> -xfs_buf_cache_destroy(
> - struct xfs_buf_cache *bch)
> -{
> - rhashtable_destroy(&bch->bc_hash);
> -}
> -
> static int
> xfs_buf_map_verify(
> struct xfs_buftarg *btp,
> @@ -434,7 +420,7 @@ xfs_buf_find_lock(
>
> static inline int
> xfs_buf_lookup(
> - struct xfs_buf_cache *bch,
> + struct xfs_buftarg *btp,
> struct xfs_buf_map *map,
> xfs_buf_flags_t flags,
> struct xfs_buf **bpp)
> @@ -443,7 +429,7 @@ xfs_buf_lookup(
> int error;
>
> rcu_read_lock();
> - bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
> + bp = rhashtable_lookup(&btp->bt_hash, map, xfs_buf_hash_params);
> if (!bp || !lockref_get_not_dead(&bp->b_lockref)) {
> rcu_read_unlock();
> return -ENOENT;
> @@ -468,7 +454,6 @@ xfs_buf_lookup(
> static int
> xfs_buf_find_insert(
> struct xfs_buftarg *btp,
> - struct xfs_buf_cache *bch,
> struct xfs_perag *pag,
> struct xfs_buf_map *cmap,
> struct xfs_buf_map *map,
> @@ -488,7 +473,7 @@ xfs_buf_find_insert(
> new_bp->b_pag = pag;
>
> rcu_read_lock();
> - bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
> + bp = rhashtable_lookup_get_insert_fast(&btp->bt_hash,
> &new_bp->b_rhash_head, xfs_buf_hash_params);
> if (IS_ERR(bp)) {
> rcu_read_unlock();
> @@ -530,16 +515,6 @@ xfs_buftarg_get_pag(
> return xfs_perag_get(mp, xfs_daddr_to_agno(mp, map->bm_bn));
> }
>
> -static inline struct xfs_buf_cache *
> -xfs_buftarg_buf_cache(
> - struct xfs_buftarg *btp,
> - struct xfs_perag *pag)
> -{
> - if (pag)
> - return &pag->pag_bcache;
> - return btp->bt_cache;
> -}
> -
> /*
> * Assembles a buffer covering the specified range. The code is optimised for
> * cache hits, as metadata intensive workloads will see 3 orders of magnitude
> @@ -553,7 +528,6 @@ xfs_buf_get_map(
> xfs_buf_flags_t flags,
> struct xfs_buf **bpp)
> {
> - struct xfs_buf_cache *bch;
> struct xfs_perag *pag;
> struct xfs_buf *bp = NULL;
> struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
> @@ -570,9 +544,8 @@ xfs_buf_get_map(
> return error;
>
> pag = xfs_buftarg_get_pag(btp, &cmap);
> - bch = xfs_buftarg_buf_cache(btp, pag);
>
> - error = xfs_buf_lookup(bch, &cmap, flags, &bp);
> + error = xfs_buf_lookup(btp, &cmap, flags, &bp);
> if (error && error != -ENOENT)
> goto out_put_perag;
>
> @@ -584,7 +557,7 @@ xfs_buf_get_map(
> goto out_put_perag;
>
> /* xfs_buf_find_insert() consumes the perag reference. */
> - error = xfs_buf_find_insert(btp, bch, pag, &cmap, map, nmaps,
> + error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps,
> flags, &bp);
> if (error)
> return error;
> @@ -848,11 +821,8 @@ xfs_buf_destroy(
> ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
>
> if (!xfs_buf_is_uncached(bp)) {
> - struct xfs_buf_cache *bch =
> - xfs_buftarg_buf_cache(bp->b_target, bp->b_pag);
> -
> - rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
> - xfs_buf_hash_params);
> + rhashtable_remove_fast(&bp->b_target->bt_hash,
> + &bp->b_rhash_head, xfs_buf_hash_params);
>
> if (bp->b_pag)
> xfs_perag_put(bp->b_pag);
> @@ -1618,6 +1588,7 @@ xfs_destroy_buftarg(
> ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0);
> percpu_counter_destroy(&btp->bt_readahead_count);
> list_lru_destroy(&btp->bt_lru);
> + rhashtable_destroy(&btp->bt_hash);
> }
>
> void
> @@ -1712,8 +1683,10 @@ xfs_init_buftarg(
> ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
> DEFAULT_RATELIMIT_BURST);
>
> - if (list_lru_init(&btp->bt_lru))
> + if (rhashtable_init(&btp->bt_hash, &xfs_buf_hash_params))
> return -ENOMEM;
> + if (list_lru_init(&btp->bt_lru))
> + goto out_destroy_hash;
> if (percpu_counter_init(&btp->bt_readahead_count, 0, GFP_KERNEL))
> goto out_destroy_lru;
>
> @@ -1731,6 +1704,8 @@ xfs_init_buftarg(
> percpu_counter_destroy(&btp->bt_readahead_count);
> out_destroy_lru:
> list_lru_destroy(&btp->bt_lru);
> +out_destroy_hash:
> + rhashtable_destroy(&btp->bt_hash);
> return -ENOMEM;
> }
>
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 3a1d066e1c13..bf39d89f0f6d 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -69,13 +69,6 @@ typedef unsigned int xfs_buf_flags_t;
> { XBF_INCORE, "INCORE" }, \
> { XBF_TRYLOCK, "TRYLOCK" }
>
> -struct xfs_buf_cache {
> - struct rhashtable bc_hash;
> -};
> -
> -int xfs_buf_cache_init(struct xfs_buf_cache *bch);
> -void xfs_buf_cache_destroy(struct xfs_buf_cache *bch);
> -
> /*
> * The xfs_buftarg contains 2 notions of "sector size" -
> *
> @@ -113,8 +106,7 @@ struct xfs_buftarg {
> unsigned int bt_awu_min;
> unsigned int bt_awu_max;
>
> - /* built-in cache, if we're not using the perag one */
> - struct xfs_buf_cache bt_cache[];
> + struct rhashtable bt_hash;
> };
>
> struct xfs_buf_map {
> diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
> index b0b3696bf599..b2fd7276b131 100644
> --- a/fs/xfs/xfs_buf_mem.c
> +++ b/fs/xfs/xfs_buf_mem.c
> @@ -58,7 +58,7 @@ xmbuf_alloc(
> struct xfs_buftarg *btp;
> int error;
>
> - btp = kzalloc_flex(*btp, bt_cache, 1);
> + btp = kzalloc_obj(*btp);
> if (!btp)
> return -ENOMEM;
>
> @@ -81,10 +81,6 @@ xmbuf_alloc(
> /* ensure all writes are below EOF to avoid pagecache zeroing */
> i_size_write(inode, inode->i_sb->s_maxbytes);
>
> - error = xfs_buf_cache_init(btp->bt_cache);
> - if (error)
> - goto out_file;
> -
> /* Initialize buffer target */
> btp->bt_mount = mp;
> btp->bt_dev = (dev_t)-1U;
> @@ -95,15 +91,13 @@ xmbuf_alloc(
>
> error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
> if (error)
> - goto out_bcache;
> + goto out_file;
>
> trace_xmbuf_create(btp);
>
> *btpp = btp;
> return 0;
>
> -out_bcache:
> - xfs_buf_cache_destroy(btp->bt_cache);
> out_file:
> fput(file);
> out_free_btp:
> @@ -122,7 +116,6 @@ xmbuf_free(
> trace_xmbuf_free(btp);
>
> xfs_destroy_buftarg(btp);
> - xfs_buf_cache_destroy(btp->bt_cache);
> fput(btp->bt_file);
> kfree(btp);
> }
> --
> 2.47.3
>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: buffer cache simplification v4
2026-03-16 15:41 buffer cache simplification v4 Christoph Hellwig
` (2 preceding siblings ...)
2026-03-16 15:41 ` [PATCH 3/3] xfs: don't decrement the buffer LRU count for in-use buffers Christoph Hellwig
@ 2026-03-17 12:13 ` Brian Foster
2026-03-17 12:56 ` Christoph Hellwig
3 siblings, 1 reply; 10+ messages in thread
From: Brian Foster @ 2026-03-17 12:13 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Carlos Maiolino, Dave Chinner, linux-xfs
On Mon, Mar 16, 2026 at 04:41:32PM +0100, Christoph Hellwig wrote:
> Hi all,
>
> this series has a few old patches that simplify the LRU handling, and
> moves back to only having a per-buftarg hash now that the buffer hash
> is using the scalable rhashtable. While some of this looks like
> performance work, performance and scalability is unchanged even on the
> 80 core dual socket system I test this on. Besides cleaning up the
> code nice, it also happens to fix a syzcaller reported use after free
> during buffer shutdown, which happened incidentally because of how the
> tear down of the buftarg vs the perag structures is handled.
>
> Changes since v3:
> - split the change to handling how referenced buffers on the LRU
> are handled into a separate patch
>
It looks like there's a patch ordering problem or something here. It
doesn't apply to master, and looks like patch 1 is trying to modify
hunks that don't yet exist. A local rebase issue related to reordering
the change in patch 3 perhaps..?
Brian
> Changes since v2:
> - mark b_hold as signed in patch 1 before removing it in patch 2
> - document the changed locking in xfs_buf_rele_cached.
>
> Changes since v2:
> - add more details and a link to a commit message
>
> Diffstat:
> libxfs/xfs_ag.c | 13 ----
> libxfs/xfs_ag.h | 2
> xfs_buf.c | 148 ++++++++++++++++++++------------------------------------
> xfs_buf.h | 14 +----
> xfs_buf_mem.c | 11 ----
> xfs_trace.h | 10 +--
> 6 files changed, 66 insertions(+), 132 deletions(-)
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: buffer cache simplification v4
2026-03-17 12:13 ` buffer cache simplification v4 Brian Foster
@ 2026-03-17 12:56 ` Christoph Hellwig
2026-03-17 13:36 ` Brian Foster
0 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2026-03-17 12:56 UTC (permalink / raw)
To: Brian Foster; +Cc: Christoph Hellwig, Carlos Maiolino, Dave Chinner, linux-xfs
On Tue, Mar 17, 2026 at 08:13:04AM -0400, Brian Foster wrote:
> On Mon, Mar 16, 2026 at 04:41:32PM +0100, Christoph Hellwig wrote:
> > Hi all,
> >
> > this series has a few old patches that simplify the LRU handling, and
> > moves back to only having a per-buftarg hash now that the buffer hash
> > is using the scalable rhashtable. While some of this looks like
> > performance work, performance and scalability is unchanged even on the
> > 80 core dual socket system I test this on. Besides cleaning up the
> > code nice, it also happens to fix a syzcaller reported use after free
> > during buffer shutdown, which happened incidentally because of how the
> > tear down of the buftarg vs the perag structures is handled.
> >
> > Changes since v3:
> > - split the change to handling how referenced buffers on the LRU
> > are handled into a separate patch
> >
>
> It looks like there's a patch ordering problem or something here. It
> doesn't apply to master, and looks like patch 1 is trying to modify
> hunks that don't yet exist. A local rebase issue related to reordering
> the change in patch 3 perhaps..?
This is against the xfs for-next tree (commit
f8544b654f22b1138ba12bc0971a96963b20311d)
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: buffer cache simplification v4
2026-03-17 12:56 ` Christoph Hellwig
@ 2026-03-17 13:36 ` Brian Foster
2026-03-17 13:39 ` Christoph Hellwig
0 siblings, 1 reply; 10+ messages in thread
From: Brian Foster @ 2026-03-17 13:36 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Carlos Maiolino, Dave Chinner, linux-xfs
On Tue, Mar 17, 2026 at 01:56:04PM +0100, Christoph Hellwig wrote:
> On Tue, Mar 17, 2026 at 08:13:04AM -0400, Brian Foster wrote:
> > On Mon, Mar 16, 2026 at 04:41:32PM +0100, Christoph Hellwig wrote:
> > > Hi all,
> > >
> > > this series has a few old patches that simplify the LRU handling, and
> > > moves back to only having a per-buftarg hash now that the buffer hash
> > > is using the scalable rhashtable. While some of this looks like
> > > performance work, performance and scalability is unchanged even on the
> > > 80 core dual socket system I test this on. Besides cleaning up the
> > > code nice, it also happens to fix a syzcaller reported use after free
> > > during buffer shutdown, which happened incidentally because of how the
> > > tear down of the buftarg vs the perag structures is handled.
> > >
> > > Changes since v3:
> > > - split the change to handling how referenced buffers on the LRU
> > > are handled into a separate patch
> > >
> >
> > It looks like there's a patch ordering problem or something here. It
> > doesn't apply to master, and looks like patch 1 is trying to modify
> > hunks that don't yet exist. A local rebase issue related to reordering
> > the change in patch 3 perhaps..?
>
> This is against the xfs for-next tree (commit
> f8544b654f22b1138ba12bc0971a96963b20311d)
>
Still doesn't apply.. I.e., here's a hunk from patch 1:
@@ -154,7 +155,7 @@ struct xfs_buf {
xfs_daddr_t b_rhash_key; /* buffer cache index */
int b_length; /* size of buffer in BBs */
- int b_hold; /* reference count */
+ struct lockref b_lockref; /* refcount + lock */
atomic_t b_lru_ref; /* lru reclaim ref count */
xfs_buf_flags_t b_flags; /* status flags */
struct semaphore b_sema; /* semaphore for lockables */
Where is b_hold an int? AFAICT that was part of the first patch in the
previous version, which isn't included here.
Brian
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: buffer cache simplification v4
2026-03-17 13:36 ` Brian Foster
@ 2026-03-17 13:39 ` Christoph Hellwig
0 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2026-03-17 13:39 UTC (permalink / raw)
To: Brian Foster; +Cc: Christoph Hellwig, Carlos Maiolino, Dave Chinner, linux-xfs
On Tue, Mar 17, 2026 at 09:36:33AM -0400, Brian Foster wrote:
> Where is b_hold an int? AFAICT that was part of the first patch in the
> previous version, which isn't included here.
Ah, the first patch is missing as I did a git-send-email for too few
patches. Resending...
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2026-03-17 13:39 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-16 15:41 buffer cache simplification v4 Christoph Hellwig
2026-03-16 15:41 ` [PATCH 1/3] xfs: use a lockref for the buffer reference count Christoph Hellwig
2026-03-16 15:41 ` [PATCH 2/3] xfs: switch (back) to a per-buftarg buffer hash Christoph Hellwig
2026-03-16 22:30 ` Darrick J. Wong
2026-03-16 15:41 ` [PATCH 3/3] xfs: don't decrement the buffer LRU count for in-use buffers Christoph Hellwig
2026-03-16 22:20 ` Darrick J. Wong
2026-03-17 12:13 ` buffer cache simplification v4 Brian Foster
2026-03-17 12:56 ` Christoph Hellwig
2026-03-17 13:36 ` Brian Foster
2026-03-17 13:39 ` Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox