From: Dave Chinner <david@fromorbit.com>
To: linux-xfs@vger.kernel.org
Subject: [PATCH 8/9] xfs: use vmalloc for multi-folio buffers
Date: Tue, 19 Mar 2024 09:45:59 +1100 [thread overview]
Message-ID: <20240318224715.3367463-9-david@fromorbit.com> (raw)
In-Reply-To: <20240318224715.3367463-1-david@fromorbit.com>
From: Christoph Hellwig <hch@lst.de>
Instead of allocating the folios manually using the bulk page
allocator and then using vm_map_page just use vmalloc to allocate
the entire buffer - vmalloc will use the bulk allocator internally
if it fits.
With this the b_folios array can go away as well as nothing uses it.
[dchinner: port to folio based buffers.]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_buf.c | 164 ++++++++++++-------------------------------
fs/xfs/xfs_buf.h | 2 -
fs/xfs/xfs_buf_mem.c | 9 +--
3 files changed, 45 insertions(+), 130 deletions(-)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 303945554415..6d6bad80722e 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -282,29 +282,6 @@ _xfs_buf_alloc(
return 0;
}
-static void
-xfs_buf_free_folios(
- struct xfs_buf *bp)
-{
- uint i;
-
- ASSERT(bp->b_flags & _XBF_FOLIOS);
-
- if (xfs_buf_is_vmapped(bp))
- vm_unmap_ram(bp->b_addr, bp->b_folio_count);
-
- for (i = 0; i < bp->b_folio_count; i++) {
- if (bp->b_folios[i])
- __folio_put(bp->b_folios[i]);
- }
- mm_account_reclaimed_pages(bp->b_folio_count);
-
- if (bp->b_folios != bp->b_folio_array)
- kfree(bp->b_folios);
- bp->b_folios = NULL;
- bp->b_flags &= ~_XBF_FOLIOS;
-}
-
static void
xfs_buf_free_callback(
struct callback_head *cb)
@@ -323,13 +300,22 @@ xfs_buf_free(
ASSERT(list_empty(&bp->b_lru));
- if (xfs_buftarg_is_mem(bp->b_target))
+ if (xfs_buftarg_is_mem(bp->b_target)) {
xmbuf_unmap_folio(bp);
- else if (bp->b_flags & _XBF_FOLIOS)
- xfs_buf_free_folios(bp);
- else if (bp->b_flags & _XBF_KMEM)
- kfree(bp->b_addr);
+ goto free;
+ }
+ if (!(bp->b_flags & _XBF_KMEM))
+ mm_account_reclaimed_pages(bp->b_folio_count);
+
+ if (bp->b_flags & _XBF_FOLIOS)
+ __folio_put(kmem_to_folio(bp->b_addr));
+ else
+ kvfree(bp->b_addr);
+
+ bp->b_flags &= _XBF_KMEM | _XBF_FOLIOS;
+
+free:
call_rcu(&bp->b_rcu, xfs_buf_free_callback);
}
@@ -356,8 +342,6 @@ xfs_buf_alloc_kmem(
bp->b_addr = NULL;
return -ENOMEM;
}
- bp->b_folios = bp->b_folio_array;
- bp->b_folios[0] = kmem_to_folio(bp->b_addr);
bp->b_folio_count = 1;
bp->b_flags |= _XBF_KMEM;
return 0;
@@ -377,14 +361,15 @@ xfs_buf_alloc_folio(
struct xfs_buf *bp,
gfp_t gfp_mask)
{
+ struct folio *folio;
int length = BBTOB(bp->b_length);
int order = get_order(length);
- bp->b_folio_array[0] = folio_alloc(gfp_mask, order);
- if (!bp->b_folio_array[0])
+ folio = folio_alloc(gfp_mask, order);
+ if (!folio)
return false;
- bp->b_folios = bp->b_folio_array;
+ bp->b_addr = folio_address(folio);
bp->b_folio_count = 1;
bp->b_flags |= _XBF_FOLIOS;
return true;
@@ -400,15 +385,11 @@ xfs_buf_alloc_folio(
* contiguous memory region that we don't have to map and unmap to access the
* data directly.
*
- * The second type of buffer is the multi-folio buffer. These are *always* made
- * up of single page folios so that they can be fed to vmap_ram() to return a
- * contiguous memory region we can access the data through.
- *
- * We don't use high order folios for this second type of buffer (yet) because
- * having variable size folios makes offset-to-folio indexing and iteration of
- * the data range more complex than if they are fixed size. This case should now
- * be the slow path, though, so unless we regularly fail to allocate high order
- * folios, there should be little need to optimise this path.
+ * The second type of buffer is the vmalloc()d buffer. This provides the buffer
+ * with the required contiguous memory region but backed by discontiguous
+ * physical pages. vmalloc() typically doesn't fail, but it can and so we may
+ * need to wrap the allocation in a loop to prevent low memory failures and
+ * shutdowns.
*/
static int
xfs_buf_alloc_folios(
@@ -416,7 +397,7 @@ xfs_buf_alloc_folios(
xfs_buf_flags_t flags)
{
gfp_t gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOWARN;
- long filled = 0;
+ unsigned nofs_flag;
if (flags & XBF_READ_AHEAD)
gfp_mask |= __GFP_NORETRY;
@@ -425,89 +406,32 @@ xfs_buf_alloc_folios(
if (!(flags & XBF_READ))
gfp_mask |= __GFP_ZERO;
- /* Optimistically attempt a single high order folio allocation. */
- if (xfs_buf_alloc_folio(bp, gfp_mask))
- return 0;
-
/* Fall back to allocating an array of single page folios. */
bp->b_folio_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
- if (bp->b_folio_count <= XB_FOLIOS) {
- bp->b_folios = bp->b_folio_array;
- } else {
- bp->b_folios = kzalloc(sizeof(struct folio *) * bp->b_folio_count,
- gfp_mask);
- if (!bp->b_folios)
- return -ENOMEM;
- }
- bp->b_flags |= _XBF_FOLIOS;
+ /* Optimistically attempt a single high order folio allocation. */
+ if (xfs_buf_alloc_folio(bp, gfp_mask))
+ return 0;
+
+ /* We are done if an order-0 allocation has already failed. */
+ if (bp->b_folio_count == 1)
+ return -ENOMEM;
/*
- * Bulk filling of pages can take multiple calls. Not filling the entire
- * array is not an allocation failure, so don't back off if we get at
- * least one extra page.
+ * XXX(dgc): I think dquot reclaim is the only place we can get
+ * to this function from memory reclaim context now. If we fix
+ * that like we've fixed inode reclaim to avoid writeback from
+ * reclaim, this nofs wrapping can go away.
*/
- for (;;) {
- long last = filled;
-
- filled = alloc_pages_bulk_array(gfp_mask, bp->b_folio_count,
- (struct page **)bp->b_folios);
- if (filled == bp->b_folio_count) {
- XFS_STATS_INC(bp->b_mount, xb_page_found);
- break;
- }
-
- if (filled != last)
- continue;
-
- if (flags & XBF_READ_AHEAD) {
- xfs_buf_free_folios(bp);
- return -ENOMEM;
- }
-
- XFS_STATS_INC(bp->b_mount, xb_page_retries);
- memalloc_retry_wait(gfp_mask);
- }
-
- if (bp->b_folio_count == 1) {
- /* A single folio buffer is always mappable */
- bp->b_addr = folio_address(bp->b_folios[0]);
- } else {
- int retried = 0;
- unsigned nofs_flag;
-
- /*
- * vm_map_ram() will allocate auxiliary structures (e.g.
- * pagetables) with GFP_KERNEL, yet we often under a scoped nofs
- * context here. Mixing GFP_KERNEL with GFP_NOFS allocations
- * from the same call site that can be run from both above and
- * below memory reclaim causes lockdep false positives. Hence we
- * always need to force this allocation to nofs context because
- * we can't pass __GFP_NOLOCKDEP down to auxillary structures to
- * prevent false positive lockdep reports.
- *
- * XXX(dgc): I think dquot reclaim is the only place we can get
- * to this function from memory reclaim context now. If we fix
- * that like we've fixed inode reclaim to avoid writeback from
- * reclaim, this nofs wrapping can go away.
- */
- nofs_flag = memalloc_nofs_save();
- do {
- bp->b_addr = vm_map_ram((struct page **)bp->b_folios,
- bp->b_folio_count, -1);
- if (bp->b_addr)
- break;
- vm_unmap_aliases();
- } while (retried++ <= 1);
- memalloc_nofs_restore(nofs_flag);
-
- if (!bp->b_addr) {
- xfs_warn_ratelimited(bp->b_mount,
- "%s: failed to map %u folios", __func__,
- bp->b_folio_count);
- xfs_buf_free_folios(bp);
- return -ENOMEM;
- }
+ nofs_flag = memalloc_nofs_save();
+ bp->b_addr = __vmalloc(BBTOB(bp->b_length), gfp_mask);
+ memalloc_nofs_restore(nofs_flag);
+
+ if (!bp->b_addr) {
+ xfs_warn_ratelimited(bp->b_mount,
+ "%s: failed to allocate %u folios", __func__,
+ bp->b_folio_count);
+ return -ENOMEM;
}
return 0;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 4d515407713b..68c24947ca1a 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -190,8 +190,6 @@ struct xfs_buf {
struct xfs_buf_log_item *b_log_item;
struct list_head b_li_list; /* Log items list head */
struct xfs_trans *b_transp;
- struct folio **b_folios; /* array of folio pointers */
- struct folio *b_folio_array[XB_FOLIOS]; /* inline folios */
struct xfs_buf_map *b_maps; /* compound buffer map */
struct xfs_buf_map __b_map; /* inline compound buffer map */
int b_map_count;
diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
index 26734c64c10e..336e7c8effb7 100644
--- a/fs/xfs/xfs_buf_mem.c
+++ b/fs/xfs/xfs_buf_mem.c
@@ -169,8 +169,6 @@ xmbuf_map_folio(
unlock_page(page);
bp->b_addr = page_address(page);
- bp->b_folios = bp->b_folio_array;
- bp->b_folios[0] = folio;
bp->b_folio_count = 1;
return 0;
}
@@ -180,15 +178,10 @@ void
xmbuf_unmap_folio(
struct xfs_buf *bp)
{
- struct folio *folio = bp->b_folios[0];
-
ASSERT(xfs_buftarg_is_mem(bp->b_target));
- folio_put(folio);
-
+ folio_put(kmem_to_folio(bp->b_addr));
bp->b_addr = NULL;
- bp->b_folios[0] = NULL;
- bp->b_folios = NULL;
bp->b_folio_count = 0;
}
--
2.43.0
next prev parent reply other threads:[~2024-03-18 22:47 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-18 22:45 [PATCH v2 0/9] xfs: use large folios for buffers Dave Chinner
2024-03-18 22:45 ` [PATCH 1/9] xfs: unmapped buffer item size straddling mismatch Dave Chinner
2024-03-18 22:45 ` [PATCH 2/9] xfs: use folios in the buffer cache Dave Chinner
2024-03-19 6:38 ` Christoph Hellwig
2024-03-19 6:52 ` Dave Chinner
2024-03-19 6:53 ` Christoph Hellwig
2024-03-19 21:42 ` Dave Chinner
2024-03-19 21:42 ` Dave Chinner
2024-03-19 17:15 ` Darrick J. Wong
2024-03-18 22:45 ` [PATCH 3/9] xfs: convert buffer cache to use high order folios Dave Chinner
2024-03-19 6:55 ` Christoph Hellwig
2024-03-19 17:29 ` Darrick J. Wong
2024-03-19 21:32 ` Christoph Hellwig
2024-03-19 21:38 ` Darrick J. Wong
2024-03-19 21:41 ` Christoph Hellwig
2024-03-19 22:23 ` Dave Chinner
2024-03-21 2:12 ` Darrick J. Wong
2024-03-21 2:40 ` Darrick J. Wong
2024-03-21 21:28 ` Christoph Hellwig
2024-03-21 21:39 ` Darrick J. Wong
2024-03-21 22:02 ` Christoph Hellwig
2024-03-19 21:55 ` Dave Chinner
2024-03-22 8:02 ` Pankaj Raghav (Samsung)
2024-03-22 22:04 ` Dave Chinner
2024-03-25 11:17 ` Pankaj Raghav (Samsung)
2024-03-18 22:45 ` [PATCH 4/9] xfs: kill XBF_UNMAPPED Dave Chinner
2024-03-19 17:30 ` Darrick J. Wong
2024-03-19 23:36 ` Dave Chinner
2024-03-18 22:45 ` [PATCH 5/9] xfs: buffer items don't straddle pages anymore Dave Chinner
2024-03-19 6:56 ` Christoph Hellwig
2024-03-19 17:31 ` Darrick J. Wong
2024-03-18 22:45 ` [PATCH 6/9] xfs: map buffers in xfs_buf_alloc_folios Dave Chinner
2024-03-19 17:34 ` Darrick J. Wong
2024-03-19 21:32 ` Christoph Hellwig
2024-03-19 21:39 ` Darrick J. Wong
2024-03-19 21:41 ` Christoph Hellwig
2024-03-18 22:45 ` [PATCH 7/9] xfs: walk b_addr for buffer I/O Dave Chinner
2024-03-19 17:42 ` Darrick J. Wong
2024-03-19 21:33 ` Christoph Hellwig
2024-03-18 22:45 ` Dave Chinner [this message]
2024-03-19 17:48 ` [PATCH 8/9] xfs: use vmalloc for multi-folio buffers Darrick J. Wong
2024-03-20 0:20 ` Dave Chinner
2024-03-18 22:46 ` [PATCH 9/9] xfs: rename bp->b_folio_count Dave Chinner
2024-03-19 7:37 ` Christoph Hellwig
2024-03-19 23:59 ` Dave Chinner
2024-03-19 0:24 ` [PATCH v2 0/9] xfs: use large folios for buffers Christoph Hellwig
2024-03-19 0:44 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240318224715.3367463-9-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox