* [PATCH] xfs: remove xfs_buf_cache.bc_lock
@ 2025-01-27 15:05 ` Christoph Hellwig
2025-01-27 15:45 ` Carlos Maiolino
2025-01-27 20:19 ` Dave Chinner
0 siblings, 2 replies; 5+ messages in thread
From: Christoph Hellwig @ 2025-01-27 15:05 UTC (permalink / raw)
To: cem; +Cc: djwong, dchinner, linux-xfs, Lai, Yi
xfs_buf_cache.bc_lock serializes adding buffers to and removing them from
the hashtable. But as the rhashtable code already uses fine grained
internal locking for inserts and removals the extra protection isn't
actually required.
It also happens to fix a lock order inversion vs b_lock added by the
recent lookup race fix.
Fixes: ee10f6fcdb96 ("xfs: fix buffer lookup vs release race")
Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_buf.c | 20 ++++++++------------
fs/xfs/xfs_buf.h | 1 -
2 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d1d4a0a22e13..1fffa2990bd9 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache;
*
* xfs_buf_rele:
* b_lock
- * pag_buf_lock
- * lru_lock
+ * lru_lock
*
* xfs_buftarg_drain_rele
* lru_lock
@@ -502,7 +501,6 @@ int
xfs_buf_cache_init(
struct xfs_buf_cache *bch)
{
- spin_lock_init(&bch->bc_lock);
return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
}
@@ -652,17 +650,20 @@ xfs_buf_find_insert(
if (error)
goto out_free_buf;
- spin_lock(&bch->bc_lock);
+ /* The new buffer keeps the perag reference until it is freed. */
+ new_bp->b_pag = pag;
+
+ rcu_read_lock();
bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
&new_bp->b_rhash_head, xfs_buf_hash_params);
if (IS_ERR(bp)) {
+ rcu_read_unlock();
error = PTR_ERR(bp);
- spin_unlock(&bch->bc_lock);
goto out_free_buf;
}
if (bp && xfs_buf_try_hold(bp)) {
/* found an existing buffer */
- spin_unlock(&bch->bc_lock);
+ rcu_read_unlock();
error = xfs_buf_find_lock(bp, flags);
if (error)
xfs_buf_rele(bp);
@@ -670,10 +671,8 @@ xfs_buf_find_insert(
*bpp = bp;
goto out_free_buf;
}
+ rcu_read_unlock();
- /* The new buffer keeps the perag reference until it is freed. */
- new_bp->b_pag = pag;
- spin_unlock(&bch->bc_lock);
*bpp = new_bp;
return 0;
@@ -1090,7 +1089,6 @@ xfs_buf_rele_cached(
}
/* we are asked to drop the last reference */
- spin_lock(&bch->bc_lock);
__xfs_buf_ioacct_dec(bp);
if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
/*
@@ -1102,7 +1100,6 @@ xfs_buf_rele_cached(
bp->b_state &= ~XFS_BSTATE_DISPOSE;
else
bp->b_hold--;
- spin_unlock(&bch->bc_lock);
} else {
bp->b_hold--;
/*
@@ -1120,7 +1117,6 @@ xfs_buf_rele_cached(
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
xfs_buf_hash_params);
- spin_unlock(&bch->bc_lock);
if (pag)
xfs_perag_put(pag);
freebuf = true;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7e73663c5d4a..3b4ed42e11c0 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t;
#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */
struct xfs_buf_cache {
- spinlock_t bc_lock;
struct rhashtable bc_hash;
};
--
2.45.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] xfs: remove xfs_buf_cache.bc_lock
2025-01-27 15:05 ` [PATCH] xfs: remove xfs_buf_cache.bc_lock Christoph Hellwig
@ 2025-01-27 15:45 ` Carlos Maiolino
2025-01-27 20:19 ` Dave Chinner
1 sibling, 0 replies; 5+ messages in thread
From: Carlos Maiolino @ 2025-01-27 15:45 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: djwong, dchinner, linux-xfs, Lai, Yi
On Mon, Jan 27, 2025 at 04:05:39PM +0100, Christoph Hellwig wrote:
> xfs_buf_cache.bc_lock serializes adding buffers to and removing them from
> the hashtable. But as the rhashtable code already uses fine grained
> internal locking for inserts and removals the extra protection isn't
> actually required.
>
> It also happens to fix a lock order inversion vs b_lock added by the
> recent lookup race fix.
>
> Fixes: ee10f6fcdb96 ("xfs: fix buffer lookup vs release race")
> Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Looks good,
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
> ---
> fs/xfs/xfs_buf.c | 20 ++++++++------------
> fs/xfs/xfs_buf.h | 1 -
> 2 files changed, 8 insertions(+), 13 deletions(-)
>
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index d1d4a0a22e13..1fffa2990bd9 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache;
> *
> * xfs_buf_rele:
> * b_lock
> - * pag_buf_lock
> - * lru_lock
> + * lru_lock
> *
> * xfs_buftarg_drain_rele
> * lru_lock
> @@ -502,7 +501,6 @@ int
> xfs_buf_cache_init(
> struct xfs_buf_cache *bch)
> {
> - spin_lock_init(&bch->bc_lock);
> return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
> }
>
> @@ -652,17 +650,20 @@ xfs_buf_find_insert(
> if (error)
> goto out_free_buf;
>
> - spin_lock(&bch->bc_lock);
> + /* The new buffer keeps the perag reference until it is freed. */
> + new_bp->b_pag = pag;
> +
> + rcu_read_lock();
> bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
> &new_bp->b_rhash_head, xfs_buf_hash_params);
> if (IS_ERR(bp)) {
> + rcu_read_unlock();
> error = PTR_ERR(bp);
> - spin_unlock(&bch->bc_lock);
> goto out_free_buf;
> }
> if (bp && xfs_buf_try_hold(bp)) {
> /* found an existing buffer */
> - spin_unlock(&bch->bc_lock);
> + rcu_read_unlock();
> error = xfs_buf_find_lock(bp, flags);
> if (error)
> xfs_buf_rele(bp);
> @@ -670,10 +671,8 @@ xfs_buf_find_insert(
> *bpp = bp;
> goto out_free_buf;
> }
> + rcu_read_unlock();
>
> - /* The new buffer keeps the perag reference until it is freed. */
> - new_bp->b_pag = pag;
> - spin_unlock(&bch->bc_lock);
> *bpp = new_bp;
> return 0;
>
> @@ -1090,7 +1089,6 @@ xfs_buf_rele_cached(
> }
>
> /* we are asked to drop the last reference */
> - spin_lock(&bch->bc_lock);
> __xfs_buf_ioacct_dec(bp);
> if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
> /*
> @@ -1102,7 +1100,6 @@ xfs_buf_rele_cached(
> bp->b_state &= ~XFS_BSTATE_DISPOSE;
> else
> bp->b_hold--;
> - spin_unlock(&bch->bc_lock);
> } else {
> bp->b_hold--;
> /*
> @@ -1120,7 +1117,6 @@ xfs_buf_rele_cached(
> ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
> rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
> xfs_buf_hash_params);
> - spin_unlock(&bch->bc_lock);
> if (pag)
> xfs_perag_put(pag);
> freebuf = true;
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 7e73663c5d4a..3b4ed42e11c0 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t;
> #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */
>
> struct xfs_buf_cache {
> - spinlock_t bc_lock;
> struct rhashtable bc_hash;
> };
>
> --
> 2.45.2
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] xfs: remove xfs_buf_cache.bc_lock
2025-01-27 15:05 ` [PATCH] xfs: remove xfs_buf_cache.bc_lock Christoph Hellwig
2025-01-27 15:45 ` Carlos Maiolino
@ 2025-01-27 20:19 ` Dave Chinner
2025-01-28 5:06 ` Christoph Hellwig
1 sibling, 1 reply; 5+ messages in thread
From: Dave Chinner @ 2025-01-27 20:19 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: cem, djwong, dchinner, linux-xfs, Lai, Yi
On Mon, Jan 27, 2025 at 04:05:39PM +0100, Christoph Hellwig wrote:
> xfs_buf_cache.bc_lock serializes adding buffers to and removing them from
> the hashtable. But as the rhashtable code already uses fine grained
> internal locking for inserts and removals the extra protection isn't
> actually required.
>
> It also happens to fix a lock order inversion vs b_lock added by the
> recent lookup race fix.
>
> Fixes: ee10f6fcdb96 ("xfs: fix buffer lookup vs release race")
> Reported-by: "Lai, Yi" <yi1.lai@linux.intel.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/xfs/xfs_buf.c | 20 ++++++++------------
> fs/xfs/xfs_buf.h | 1 -
> 2 files changed, 8 insertions(+), 13 deletions(-)
>
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index d1d4a0a22e13..1fffa2990bd9 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache;
> *
> * xfs_buf_rele:
> * b_lock
> - * pag_buf_lock
> - * lru_lock
> + * lru_lock
> *
> * xfs_buftarg_drain_rele
> * lru_lock
> @@ -502,7 +501,6 @@ int
> xfs_buf_cache_init(
> struct xfs_buf_cache *bch)
> {
> - spin_lock_init(&bch->bc_lock);
> return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
> }
>
> @@ -652,17 +650,20 @@ xfs_buf_find_insert(
> if (error)
> goto out_free_buf;
>
> - spin_lock(&bch->bc_lock);
> + /* The new buffer keeps the perag reference until it is freed. */
> + new_bp->b_pag = pag;
> +
> + rcu_read_lock();
> bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
> &new_bp->b_rhash_head, xfs_buf_hash_params);
> if (IS_ERR(bp)) {
> + rcu_read_unlock();
> error = PTR_ERR(bp);
> - spin_unlock(&bch->bc_lock);
> goto out_free_buf;
> }
> if (bp && xfs_buf_try_hold(bp)) {
> /* found an existing buffer */
> - spin_unlock(&bch->bc_lock);
> + rcu_read_unlock();
> error = xfs_buf_find_lock(bp, flags);
> if (error)
> xfs_buf_rele(bp);
Ok, so now we can get racing inserts, which means this can find
the buffer that has just been inserted by another thread in this
same function. Or, indeed, and xfs_buf_lookup() call. What prevents
those racing tasks from using this buffer before the task that
inserted it can use it?
I think that the the buffer lock being initialised to "held" and
b_hold being initialised to 1 make this all work correctly, but
comments that explicitly spell out why RCU inserts are safe
(both in xfs_buf_alloc() for the init values and here) would be
appreciated.
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 7e73663c5d4a..3b4ed42e11c0 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t;
> #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */
>
> struct xfs_buf_cache {
> - spinlock_t bc_lock;
> struct rhashtable bc_hash;
> };
At this point, the struct xfs_buf_cache structure can go away,
right? (separate patch and all that...)
-Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] xfs: remove xfs_buf_cache.bc_lock
2025-01-27 20:19 ` Dave Chinner
@ 2025-01-28 5:06 ` Christoph Hellwig
2025-01-28 6:44 ` Dave Chinner
0 siblings, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2025-01-28 5:06 UTC (permalink / raw)
To: Dave Chinner; +Cc: Christoph Hellwig, cem, djwong, dchinner, linux-xfs, Lai, Yi
On Tue, Jan 28, 2025 at 07:19:11AM +1100, Dave Chinner wrote:
> Ok, so now we can get racing inserts, which means this can find
> the buffer that has just been inserted by another thread in this
> same function. Or, indeed, and xfs_buf_lookup() call.
Yes.
> What prevents
> those racing tasks from using this buffer before the task that
> inserted it can use it?
>
> I think that the the buffer lock being initialised to "held" and
> b_hold being initialised to 1 make this all work correctly,
Exactly, the buffer is inserted with the b_sema held and b_hold
initializes 1, aka locked and held.
> but
> comments that explicitly spell out why RCU inserts are safe
> (both in xfs_buf_alloc() for the init values and here) would be
> appreciated.
Sure.
> > struct xfs_buf_cache {
> > - spinlock_t bc_lock;
> > struct rhashtable bc_hash;
> > };
>
> At this point, the struct xfs_buf_cache structure can go away,
> right? (separate patch and all that...)
Yes. And in fact I think the per-pag hash should also go away, as with
the per-bucket locking there is no point in it. I've had this patch in
my testing runs for a while, which I think is where we should be
going:
http://git.infradead.org/?p=users/hch/xfs.git;a=commitdiff;h=890cd2cd255710ee5d3408bc60792b9cdad3adfb
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] xfs: remove xfs_buf_cache.bc_lock
2025-01-28 5:06 ` Christoph Hellwig
@ 2025-01-28 6:44 ` Dave Chinner
0 siblings, 0 replies; 5+ messages in thread
From: Dave Chinner @ 2025-01-28 6:44 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: cem, djwong, dchinner, linux-xfs, Lai, Yi
On Tue, Jan 28, 2025 at 06:06:14AM +0100, Christoph Hellwig wrote:
> On Tue, Jan 28, 2025 at 07:19:11AM +1100, Dave Chinner wrote:
> > Ok, so now we can get racing inserts, which means this can find
> > the buffer that has just been inserted by another thread in this
> > same function. Or, indeed, and xfs_buf_lookup() call.
>
> Yes.
>
> > What prevents
> > those racing tasks from using this buffer before the task that
> > inserted it can use it?
> >
> > I think that the the buffer lock being initialised to "held" and
> > b_hold being initialised to 1 make this all work correctly,
>
> Exactly, the buffer is inserted with the b_sema held and b_hold
> initializes 1, aka locked and held.
>
> > but
> > comments that explicitly spell out why RCU inserts are safe
> > (both in xfs_buf_alloc() for the init values and here) would be
> > appreciated.
>
> Sure.
Thanks.
> > > struct xfs_buf_cache {
> > > - spinlock_t bc_lock;
> > > struct rhashtable bc_hash;
> > > };
> >
> > At this point, the struct xfs_buf_cache structure can go away,
> > right? (separate patch and all that...)
>
> Yes. And in fact I think the per-pag hash should also go away, as with
> the per-bucket locking there is no point in it. I've had this patch in
> my testing runs for a while, which I think is where we should be
> going:
>
> http://git.infradead.org/?p=users/hch/xfs.git;a=commitdiff;h=890cd2cd255710ee5d3408bc60792b9cdad3adfb
*nod*
Code seems reasonable, but it'll need some benchmarking and
scalability analysis before merging...
-Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-01-28 6:44 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <F1frw9ISF6ezkoa1AYYRx2dhdiUS2CrMsKS_bCvbipw2Fm0rtSrrDZ3FBIeNcdwJN328johEKEeARThjO_0-JQ==@protonmail.internalid>
2025-01-27 15:05 ` [PATCH] xfs: remove xfs_buf_cache.bc_lock Christoph Hellwig
2025-01-27 15:45 ` Carlos Maiolino
2025-01-27 20:19 ` Dave Chinner
2025-01-28 5:06 ` Christoph Hellwig
2025-01-28 6:44 ` Dave Chinner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox