[PATCH 8/9] xfs: use vmalloc for multi-folio buffers

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Dave Chinner <david@fromorbit.com>
To: linux-xfs@vger.kernel.org
Subject: [PATCH 8/9] xfs: use vmalloc for multi-folio buffers
Date: Tue, 19 Mar 2024 09:45:59 +1100	[thread overview]
Message-ID: <20240318224715.3367463-9-david@fromorbit.com> (raw)
In-Reply-To: <20240318224715.3367463-1-david@fromorbit.com>

From: Christoph Hellwig <hch@lst.de>

Instead of allocating the folios manually using the bulk page
allocator and then using vm_map_page just use vmalloc to allocate
the entire buffer - vmalloc will use the bulk allocator internally
if it fits.

With this the b_folios array can go away as well as nothing uses it.

[dchinner: port to folio based buffers.]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_buf.c     | 164 ++++++++++++-------------------------------
 fs/xfs/xfs_buf.h     |   2 -
 fs/xfs/xfs_buf_mem.c |   9 +--
 3 files changed, 45 insertions(+), 130 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 303945554415..6d6bad80722e 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -282,29 +282,6 @@ _xfs_buf_alloc(
 	return 0;
 }
 
-static void
-xfs_buf_free_folios(
-	struct xfs_buf	*bp)
-{
-	uint		i;
-
-	ASSERT(bp->b_flags & _XBF_FOLIOS);
-
-	if (xfs_buf_is_vmapped(bp))
-		vm_unmap_ram(bp->b_addr, bp->b_folio_count);
-
-	for (i = 0; i < bp->b_folio_count; i++) {
-		if (bp->b_folios[i])
-			__folio_put(bp->b_folios[i]);
-	}
-	mm_account_reclaimed_pages(bp->b_folio_count);
-
-	if (bp->b_folios != bp->b_folio_array)
-		kfree(bp->b_folios);
-	bp->b_folios = NULL;
-	bp->b_flags &= ~_XBF_FOLIOS;
-}
-
 static void
 xfs_buf_free_callback(
 	struct callback_head	*cb)
@@ -323,13 +300,22 @@ xfs_buf_free(
 
 	ASSERT(list_empty(&bp->b_lru));
 
-	if (xfs_buftarg_is_mem(bp->b_target))
+	if (xfs_buftarg_is_mem(bp->b_target)) {
 		xmbuf_unmap_folio(bp);
-	else if (bp->b_flags & _XBF_FOLIOS)
-		xfs_buf_free_folios(bp);
-	else if (bp->b_flags & _XBF_KMEM)
-		kfree(bp->b_addr);
+		goto free;
+	}
 
+	if (!(bp->b_flags & _XBF_KMEM))
+		mm_account_reclaimed_pages(bp->b_folio_count);
+
+	if (bp->b_flags & _XBF_FOLIOS)
+		__folio_put(kmem_to_folio(bp->b_addr));
+	else
+		kvfree(bp->b_addr);
+
+	bp->b_flags &= _XBF_KMEM | _XBF_FOLIOS;
+
+free:
 	call_rcu(&bp->b_rcu, xfs_buf_free_callback);
 }
 
@@ -356,8 +342,6 @@ xfs_buf_alloc_kmem(
 		bp->b_addr = NULL;
 		return -ENOMEM;
 	}
-	bp->b_folios = bp->b_folio_array;
-	bp->b_folios[0] = kmem_to_folio(bp->b_addr);
 	bp->b_folio_count = 1;
 	bp->b_flags |= _XBF_KMEM;
 	return 0;
@@ -377,14 +361,15 @@ xfs_buf_alloc_folio(
 	struct xfs_buf	*bp,
 	gfp_t		gfp_mask)
 {
+	struct folio	*folio;
 	int		length = BBTOB(bp->b_length);
 	int		order = get_order(length);
 
-	bp->b_folio_array[0] = folio_alloc(gfp_mask, order);
-	if (!bp->b_folio_array[0])
+	folio = folio_alloc(gfp_mask, order);
+	if (!folio)
 		return false;
 
-	bp->b_folios = bp->b_folio_array;
+	bp->b_addr = folio_address(folio);
 	bp->b_folio_count = 1;
 	bp->b_flags |= _XBF_FOLIOS;
 	return true;
@@ -400,15 +385,11 @@ xfs_buf_alloc_folio(
  * contiguous memory region that we don't have to map and unmap to access the
  * data directly.
  *
- * The second type of buffer is the multi-folio buffer. These are *always* made
- * up of single page folios so that they can be fed to vmap_ram() to return a
- * contiguous memory region we can access the data through.
- *
- * We don't use high order folios for this second type of buffer (yet) because
- * having variable size folios makes offset-to-folio indexing and iteration of
- * the data range more complex than if they are fixed size. This case should now
- * be the slow path, though, so unless we regularly fail to allocate high order
- * folios, there should be little need to optimise this path.
+ * The second type of buffer is the vmalloc()d buffer. This provides the buffer
+ * with the required contiguous memory region but backed by discontiguous
+ * physical pages. vmalloc() typically doesn't fail, but it can and so we may
+ * need to wrap the allocation in a loop to prevent low memory failures and
+ * shutdowns.
  */
 static int
 xfs_buf_alloc_folios(
@@ -416,7 +397,7 @@ xfs_buf_alloc_folios(
 	xfs_buf_flags_t	flags)
 {
 	gfp_t		gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOWARN;
-	long		filled = 0;
+	unsigned	nofs_flag;
 
 	if (flags & XBF_READ_AHEAD)
 		gfp_mask |= __GFP_NORETRY;
@@ -425,89 +406,32 @@ xfs_buf_alloc_folios(
 	if (!(flags & XBF_READ))
 		gfp_mask |= __GFP_ZERO;
 
-	/* Optimistically attempt a single high order folio allocation. */
-	if (xfs_buf_alloc_folio(bp, gfp_mask))
-		return 0;
-
 	/* Fall back to allocating an array of single page folios. */
 	bp->b_folio_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
-	if (bp->b_folio_count <= XB_FOLIOS) {
-		bp->b_folios = bp->b_folio_array;
-	} else {
-		bp->b_folios = kzalloc(sizeof(struct folio *) * bp->b_folio_count,
-					gfp_mask);
-		if (!bp->b_folios)
-			return -ENOMEM;
-	}
-	bp->b_flags |= _XBF_FOLIOS;
 
+	/* Optimistically attempt a single high order folio allocation. */
+	if (xfs_buf_alloc_folio(bp, gfp_mask))
+		return 0;
+
+	/* We are done if an order-0 allocation has already failed. */
+	if (bp->b_folio_count == 1)
+		return -ENOMEM;
 
 	/*
-	 * Bulk filling of pages can take multiple calls. Not filling the entire
-	 * array is not an allocation failure, so don't back off if we get at
-	 * least one extra page.
+	 * XXX(dgc): I think dquot reclaim is the only place we can get
+	 * to this function from memory reclaim context now. If we fix
+	 * that like we've fixed inode reclaim to avoid writeback from
+	 * reclaim, this nofs wrapping can go away.
 	 */
-	for (;;) {
-		long	last = filled;
-
-		filled = alloc_pages_bulk_array(gfp_mask, bp->b_folio_count,
-						(struct page **)bp->b_folios);
-		if (filled == bp->b_folio_count) {
-			XFS_STATS_INC(bp->b_mount, xb_page_found);
-			break;
-		}
-
-		if (filled != last)
-			continue;
-
-		if (flags & XBF_READ_AHEAD) {
-			xfs_buf_free_folios(bp);
-			return -ENOMEM;
-		}
-
-		XFS_STATS_INC(bp->b_mount, xb_page_retries);
-		memalloc_retry_wait(gfp_mask);
-	}
-
-	if (bp->b_folio_count == 1) {
-		/* A single folio buffer is always mappable */
-		bp->b_addr = folio_address(bp->b_folios[0]);
-	} else {
-		int retried = 0;
-		unsigned nofs_flag;
-
-		/*
-		 * vm_map_ram() will allocate auxiliary structures (e.g.
-		 * pagetables) with GFP_KERNEL, yet we often under a scoped nofs
-		 * context here. Mixing GFP_KERNEL with GFP_NOFS allocations
-		 * from the same call site that can be run from both above and
-		 * below memory reclaim causes lockdep false positives. Hence we
-		 * always need to force this allocation to nofs context because
-		 * we can't pass __GFP_NOLOCKDEP down to auxillary structures to
-		 * prevent false positive lockdep reports.
-		 *
-		 * XXX(dgc): I think dquot reclaim is the only place we can get
-		 * to this function from memory reclaim context now. If we fix
-		 * that like we've fixed inode reclaim to avoid writeback from
-		 * reclaim, this nofs wrapping can go away.
-		 */
-		nofs_flag = memalloc_nofs_save();
-		do {
-			bp->b_addr = vm_map_ram((struct page **)bp->b_folios,
-					bp->b_folio_count, -1);
-			if (bp->b_addr)
-				break;
-			vm_unmap_aliases();
-		} while (retried++ <= 1);
-		memalloc_nofs_restore(nofs_flag);
-
-		if (!bp->b_addr) {
-			xfs_warn_ratelimited(bp->b_mount,
-				"%s: failed to map %u folios", __func__,
-				bp->b_folio_count);
-			xfs_buf_free_folios(bp);
-			return -ENOMEM;
-		}
+	nofs_flag = memalloc_nofs_save();
+	bp->b_addr = __vmalloc(BBTOB(bp->b_length), gfp_mask);
+	memalloc_nofs_restore(nofs_flag);
+
+	if (!bp->b_addr) {
+		xfs_warn_ratelimited(bp->b_mount,
+			"%s: failed to allocate %u folios", __func__,
+			bp->b_folio_count);
+		return -ENOMEM;
 	}
 
 	return 0;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 4d515407713b..68c24947ca1a 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -190,8 +190,6 @@ struct xfs_buf {
 	struct xfs_buf_log_item	*b_log_item;
 	struct list_head	b_li_list;	/* Log items list head */
 	struct xfs_trans	*b_transp;
-	struct folio		**b_folios;	/* array of folio pointers */
-	struct folio		*b_folio_array[XB_FOLIOS]; /* inline folios */
 	struct xfs_buf_map	*b_maps;	/* compound buffer map */
 	struct xfs_buf_map	__b_map;	/* inline compound buffer map */
 	int			b_map_count;
diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
index 26734c64c10e..336e7c8effb7 100644
--- a/fs/xfs/xfs_buf_mem.c
+++ b/fs/xfs/xfs_buf_mem.c
@@ -169,8 +169,6 @@ xmbuf_map_folio(
 	unlock_page(page);
 
 	bp->b_addr = page_address(page);
-	bp->b_folios = bp->b_folio_array;
-	bp->b_folios[0] = folio;
 	bp->b_folio_count = 1;
 	return 0;
 }
@@ -180,15 +178,10 @@ void
 xmbuf_unmap_folio(
 	struct xfs_buf		*bp)
 {
-	struct folio		*folio = bp->b_folios[0];
-
 	ASSERT(xfs_buftarg_is_mem(bp->b_target));
 
-	folio_put(folio);
-
+	folio_put(kmem_to_folio(bp->b_addr));
 	bp->b_addr = NULL;
-	bp->b_folios[0] = NULL;
-	bp->b_folios = NULL;
 	bp->b_folio_count = 0;
 }
 
-- 
2.43.0

next prev parent reply	other threads:[~2024-03-18 22:47 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-18 22:45 [PATCH v2 0/9] xfs: use large folios for buffers Dave Chinner
2024-03-18 22:45 ` [PATCH 1/9] xfs: unmapped buffer item size straddling mismatch Dave Chinner
2024-03-18 22:45 ` [PATCH 2/9] xfs: use folios in the buffer cache Dave Chinner
2024-03-19  6:38   ` Christoph Hellwig
2024-03-19  6:52     ` Dave Chinner
2024-03-19  6:53   ` Christoph Hellwig
2024-03-19 21:42     ` Dave Chinner
2024-03-19 21:42     ` Dave Chinner
2024-03-19 17:15   ` Darrick J. Wong
2024-03-18 22:45 ` [PATCH 3/9] xfs: convert buffer cache to use high order folios Dave Chinner
2024-03-19  6:55   ` Christoph Hellwig
2024-03-19 17:29   ` Darrick J. Wong
2024-03-19 21:32     ` Christoph Hellwig
2024-03-19 21:38       ` Darrick J. Wong
2024-03-19 21:41         ` Christoph Hellwig
2024-03-19 22:23           ` Dave Chinner
2024-03-21  2:12           ` Darrick J. Wong
2024-03-21  2:40             ` Darrick J. Wong
2024-03-21 21:28               ` Christoph Hellwig
2024-03-21 21:39                 ` Darrick J. Wong
2024-03-21 22:02                   ` Christoph Hellwig
2024-03-19 21:55     ` Dave Chinner
2024-03-22  8:02   ` Pankaj Raghav (Samsung)
2024-03-22 22:04     ` Dave Chinner
2024-03-25 11:17       ` Pankaj Raghav (Samsung)
2024-03-18 22:45 ` [PATCH 4/9] xfs: kill XBF_UNMAPPED Dave Chinner
2024-03-19 17:30   ` Darrick J. Wong
2024-03-19 23:36     ` Dave Chinner
2024-03-18 22:45 ` [PATCH 5/9] xfs: buffer items don't straddle pages anymore Dave Chinner
2024-03-19  6:56   ` Christoph Hellwig
2024-03-19 17:31   ` Darrick J. Wong
2024-03-18 22:45 ` [PATCH 6/9] xfs: map buffers in xfs_buf_alloc_folios Dave Chinner
2024-03-19 17:34   ` Darrick J. Wong
2024-03-19 21:32     ` Christoph Hellwig
2024-03-19 21:39       ` Darrick J. Wong
2024-03-19 21:41         ` Christoph Hellwig
2024-03-18 22:45 ` [PATCH 7/9] xfs: walk b_addr for buffer I/O Dave Chinner
2024-03-19 17:42   ` Darrick J. Wong
2024-03-19 21:33     ` Christoph Hellwig
2024-03-18 22:45 ` Dave Chinner [this message]
2024-03-19 17:48   ` [PATCH 8/9] xfs: use vmalloc for multi-folio buffers Darrick J. Wong
2024-03-20  0:20     ` Dave Chinner
2024-03-18 22:46 ` [PATCH 9/9] xfs: rename bp->b_folio_count Dave Chinner
2024-03-19  7:37   ` Christoph Hellwig
2024-03-19 23:59     ` Dave Chinner
2024-03-19  0:24 ` [PATCH v2 0/9] xfs: use large folios for buffers Christoph Hellwig
2024-03-19  0:44   ` Dave Chinner

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:30394555441 dfblob:6d6bad80722 dfblob:4d515407713
dfblob:68c24947ca1 dfblob:26734c64c10 dfblob:336e7c8effb )
 OR (
bs:"[PATCH 8/9] xfs: use vmalloc for multi-folio buffers" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240318224715.3367463-9-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.