From: Christoph Hellwig <hch@lst.de>
To: linux-xfs@vger.kernel.org
Cc: Dave Chinner <david@fromorbit.com>
Subject: [PATCH 3/5] xfs: remove a superflous hash lookup when inserting new buffers
Date: Sun, 3 Apr 2022 14:01:17 +0200 [thread overview]
Message-ID: <20220403120119.235457-4-hch@lst.de> (raw)
In-Reply-To: <20220403120119.235457-1-hch@lst.de>
xfs_buf_get_map has a bit of a strange structure where the xfs_buf_find
helper is called twice before we actually insert a new buffer on a cache
miss. Given that the rhashtable has an interface to insert a new entry
and return the found one on a conflict we can easily get rid of the
double lookup by using that. To get a straight code path this requires
folding the xfs_buf_find helper into xfs_buf_get_map, but the counter
side we can now easily move the logic for the slow path cache insert
into a new helper.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_buf.c | 207 +++++++++++++++++++++++------------------------
1 file changed, 99 insertions(+), 108 deletions(-)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 0951b7cbd79675..ef645e15935369 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -504,12 +504,66 @@ xfs_buf_hash_destroy(
rhashtable_destroy(&pag->pag_buf_hash);
}
+static int
+xfs_buf_insert(
+ struct xfs_buftarg *btp,
+ struct xfs_buf_map *map,
+ int nmaps,
+ xfs_buf_flags_t flags,
+ struct xfs_perag *pag,
+ struct xfs_buf **bpp)
+{
+ int error;
+ struct xfs_buf *bp;
+
+ error = _xfs_buf_alloc(btp, map, nmaps, flags, &bp);
+ if (error)
+ return error;
+
+ /*
+ * For buffers that fit entirely within a single page, first attempt to
+ * allocate the memory from the heap to minimise memory usage. If we
+ * can't get heap memory for these small buffers, we fall back to using
+ * the page allocator.
+ */
+ if (BBTOB(bp->b_length) >= PAGE_SIZE ||
+ xfs_buf_alloc_kmem(bp, flags) < 0) {
+ error = xfs_buf_alloc_pages(bp, flags);
+ if (error)
+ goto out_free_buf;
+ }
+
+ /* the buffer keeps the perag reference until it is freed */
+ bp->b_pag = pag;
+
+ spin_lock(&pag->pag_buf_lock);
+ *bpp = rhashtable_lookup_get_insert_fast(&pag->pag_buf_hash,
+ &bp->b_rhash_head, xfs_buf_hash_params);
+ if (IS_ERR(*bpp)) {
+ error = PTR_ERR(*bpp);
+ goto out_unlock;
+ }
+ if (*bpp) {
+ /* found an existing buffer */
+ atomic_inc(&(*bpp)->b_hold);
+ error = -EEXIST;
+ goto out_unlock;
+ }
+ spin_unlock(&pag->pag_buf_lock);
+ *bpp = bp;
+ return 0;
+
+out_unlock:
+ spin_unlock(&pag->pag_buf_lock);
+out_free_buf:
+ xfs_buf_free(bp);
+ return error;
+}
+
/*
- * Look up a buffer in the buffer cache and return it referenced and locked
- * in @found_bp.
- *
- * If @new_bp is supplied and we have a lookup miss, insert @new_bp into the
- * cache.
+ * Assemble a buffer covering the specified range. The code is optimised for
+ * cache hits, as metadata intensive workloads will see 3 orders of magnitude
+ * more hits than misses.
*
* If XBF_TRYLOCK is set in @flags, only try to lock the buffer and return
* -EAGAIN if we fail to lock it.
@@ -517,27 +571,26 @@ xfs_buf_hash_destroy(
* Return values are:
* -EFSCORRUPTED if have been supplied with an invalid address
* -EAGAIN on trylock failure
- * -ENOENT if we fail to find a match and @new_bp was NULL
- * 0, with @found_bp:
- * - @new_bp if we inserted it into the cache
- * - the buffer we found and locked.
+ * -ENOENT if we fail to find a match and alloc was %false
+ * 0 if a buffer was found or newly created
*/
-static int
-xfs_buf_find(
+int
+xfs_buf_get_map(
struct xfs_buftarg *btp,
struct xfs_buf_map *map,
int nmaps,
xfs_buf_flags_t flags,
- struct xfs_buf *new_bp,
- struct xfs_buf **found_bp)
+ struct xfs_buf **bpp)
{
+ struct xfs_mount *mp = btp->bt_mount;
struct xfs_perag *pag;
struct xfs_buf *bp;
struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn };
+ int error = 0;
xfs_daddr_t eofs;
int i;
- *found_bp = NULL;
+ *bpp = NULL;
for (i = 0; i < nmaps; i++)
cmap.bm_len += map[i].bm_len;
@@ -550,54 +603,47 @@ xfs_buf_find(
* Corrupted block numbers can get through to here, unfortunately, so we
* have to check that the buffer falls within the filesystem bounds.
*/
- eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
- if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
- xfs_alert(btp->bt_mount,
- "%s: daddr 0x%llx out of range, EOFS 0x%llx",
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+ if (WARN_ON_ONCE(cmap.bm_bn < 0) || WARN_ON_ONCE(cmap.bm_bn >= eofs)) {
+ xfs_alert(mp, "%s: daddr 0x%llx out of range, EOFS 0x%llx",
__func__, cmap.bm_bn, eofs);
- WARN_ON(1);
return -EFSCORRUPTED;
}
- pag = xfs_perag_get(btp->bt_mount,
- xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
+ pag = xfs_perag_get(mp, xfs_daddr_to_agno(mp, cmap.bm_bn));
spin_lock(&pag->pag_buf_lock);
bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
xfs_buf_hash_params);
- if (bp) {
+ if (bp)
atomic_inc(&bp->b_hold);
- goto found;
- }
-
- /* No match found */
- if (!new_bp) {
- XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
- spin_unlock(&pag->pag_buf_lock);
- xfs_perag_put(pag);
- return -ENOENT;
- }
-
- /* the buffer keeps the perag reference until it is freed */
- new_bp->b_pag = pag;
- rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head,
- xfs_buf_hash_params);
spin_unlock(&pag->pag_buf_lock);
- *found_bp = new_bp;
- return 0;
-found:
- spin_unlock(&pag->pag_buf_lock);
+ if (unlikely(!bp)) {
+ if (flags & XBF_NOALLOC) {
+ XFS_STATS_INC(mp, xb_miss_locked);
+ xfs_perag_put(pag);
+ return -ENOENT;
+ }
+
+ error = xfs_buf_insert(btp, map, nmaps, flags, pag, &bp);
+ if (!error)
+ goto finish;
+ if (error != -EEXIST) {
+ xfs_perag_put(pag);
+ return error;
+ }
+ }
xfs_perag_put(pag);
if (!xfs_buf_trylock(bp)) {
if (flags & XBF_TRYLOCK) {
xfs_buf_rele(bp);
- XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
+ XFS_STATS_INC(mp, xb_busy_locked);
return -EAGAIN;
}
xfs_buf_lock(bp);
- XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
+ XFS_STATS_INC(mp, xb_get_locked_waited);
}
/*
@@ -612,64 +658,12 @@ xfs_buf_find(
}
trace_xfs_buf_find(bp, flags, _RET_IP_);
- XFS_STATS_INC(btp->bt_mount, xb_get_locked);
- *found_bp = bp;
- return 0;
-}
-
-/*
- * Assembles a buffer covering the specified range. The code is optimised for
- * cache hits, as metadata intensive workloads will see 3 orders of magnitude
- * more hits than misses.
- */
-int
-xfs_buf_get_map(
- struct xfs_buftarg *target,
- struct xfs_buf_map *map,
- int nmaps,
- xfs_buf_flags_t flags,
- struct xfs_buf **bpp)
-{
- struct xfs_buf *bp;
- struct xfs_buf *new_bp;
- int error;
-
- *bpp = NULL;
- error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
- if (!error)
- goto found;
- if (error != -ENOENT || (flags & XBF_NOALLOC))
- return error;
-
- error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp);
- if (error)
- return error;
-
- /*
- * For buffers that fit entirely within a single page, first attempt to
- * allocate the memory from the heap to minimise memory usage. If we
- * can't get heap memory for these small buffers, we fall back to using
- * the page allocator.
- */
- if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
- xfs_buf_alloc_kmem(new_bp, flags) < 0) {
- error = xfs_buf_alloc_pages(new_bp, flags);
- if (error)
- goto out_free_buf;
- }
-
- error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
- if (error)
- goto out_free_buf;
-
- if (bp != new_bp)
- xfs_buf_free(new_bp);
-
-found:
+ XFS_STATS_INC(mp, xb_get_locked);
+finish:
if (!bp->b_addr) {
error = _xfs_buf_map_pages(bp, flags);
if (unlikely(error)) {
- xfs_warn_ratelimited(target->bt_mount,
+ xfs_warn_ratelimited(mp,
"%s: failed to map %u pages", __func__,
bp->b_page_count);
xfs_buf_relse(bp);
@@ -684,13 +678,10 @@ xfs_buf_get_map(
if (!(flags & XBF_READ))
xfs_buf_ioerror(bp, 0);
- XFS_STATS_INC(target->bt_mount, xb_get);
+ XFS_STATS_INC(mp, xb_get);
trace_xfs_buf_get(bp, flags, _RET_IP_);
*bpp = bp;
return 0;
-out_free_buf:
- xfs_buf_free(new_bp);
- return error;
}
int
@@ -962,12 +953,12 @@ xfs_buf_rele(
/*
* We grab the b_lock here first to serialise racing xfs_buf_rele()
* calls. The pag_buf_lock being taken on the last reference only
- * serialises against racing lookups in xfs_buf_find(). IOWs, the second
- * to last reference we drop here is not serialised against the last
- * reference until we take bp->b_lock. Hence if we don't grab b_lock
- * first, the last "release" reference can win the race to the lock and
- * free the buffer before the second-to-last reference is processed,
- * leading to a use-after-free scenario.
+ * serialises against racing lookups in xfs_buf_get_map(). IOWs, the
+ * second to last reference we drop here is not serialised against the
+ * last reference until we take bp->b_lock. Hence if we don't grab
+ * b_lock first, the last "release" reference can win the race to the
+ * lock and free the buffer before the second-to-last reference is
+ * processed, leading to a use-after-free scenario.
*/
spin_lock(&bp->b_lock);
release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
--
2.30.2
next prev parent reply other threads:[~2022-04-03 12:01 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-03 12:01 lockless and cleaned up buffer lookup Christoph Hellwig
2022-04-03 12:01 ` [PATCH 1/5] xfs: add a flags argument to xfs_buf_get Christoph Hellwig
2022-04-03 12:01 ` [PATCH 2/5] xfs: replace xfs_buf_incore with an XBF_NOALLOC flag to xfs_buf_get* Christoph Hellwig
2022-04-03 21:54 ` Dave Chinner
2022-04-05 14:55 ` Christoph Hellwig
2022-04-05 21:21 ` Dave Chinner
2022-04-06 16:24 ` Christoph Hellwig
2022-04-03 12:01 ` Christoph Hellwig [this message]
2022-04-03 23:04 ` [PATCH 3/5] xfs: remove a superflous hash lookup when inserting new buffers Dave Chinner
2022-04-05 15:00 ` Christoph Hellwig
2022-04-05 22:01 ` Dave Chinner
2022-04-06 16:26 ` Christoph Hellwig
2022-04-03 12:01 ` [PATCH 4/5] xfs: reduce the number of atomic when locking a buffer after lookup Christoph Hellwig
2022-04-03 12:01 ` [PATCH 5/5] xfs: lockless buffer lookup Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220403120119.235457-4-hch@lst.de \
--to=hch@lst.de \
--cc=david@fromorbit.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox