public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Carlos Maiolino <cem@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>, Damien Le Moal <dlemoal@kernel.org>,
	 Hans Holmberg <hans.holmberg@wdc.com>,
	linux-block@vger.kernel.org, linux-xfs@vger.kernel.org
Subject: Re: [PATCH 3/3] xfs: rework zone GC buffer management
Date: Fri, 9 Jan 2026 13:24:19 +0100	[thread overview]
Message-ID: <aWDzaJB7PXyW09Nb@nidhogg.toxiclabs.cc> (raw)
In-Reply-To: <20260106075914.1614368-4-hch@lst.de>

On Tue, Jan 06, 2026 at 08:58:54AM +0100, Christoph Hellwig wrote:
> The double buffering where just one scratch area is used at a time does
> not efficiently use the available memory.  It was originally implemented
> when GC I/O could happen out of order, but that was removed before
> upstream submission to avoid fragmentation.  Now that all GC I/Os are
> processed in order, just use a number of buffers as a simple ring buffer.
> 
> For a synthetic benchmark that fills 256MiB HDD zones and punches out
> holes to free half the space this leads to a decrease of GC time by
> a little more than 25%.
> 
> Thanks to Hans Holmberg <hans.holmberg@wdc.com> for testing and
> benchmarking.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
> Reviewed-by: Damien Le Moal <dlemoal@kernel.org>

Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>

> ---
>  fs/xfs/xfs_zone_gc.c | 106 ++++++++++++++++++++++++-------------------
>  1 file changed, 59 insertions(+), 47 deletions(-)
> 
> diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
> index 40408b1132e0..960cf79d484e 100644
> --- a/fs/xfs/xfs_zone_gc.c
> +++ b/fs/xfs/xfs_zone_gc.c
> @@ -50,23 +50,11 @@
>   */
>  
>  /*
> - * Size of each GC scratch pad.  This is also the upper bound for each
> - * GC I/O, which helps to keep latency down.
> + * Size of each GC scratch allocation, and the number of buffers.
>   */
> -#define XFS_GC_CHUNK_SIZE	SZ_1M
> -
> -/*
> - * Scratchpad data to read GCed data into.
> - *
> - * The offset member tracks where the next allocation starts, and freed tracks
> - * the amount of space that is not used anymore.
> - */
> -#define XFS_ZONE_GC_NR_SCRATCH	2
> -struct xfs_zone_scratch {
> -	struct folio			*folio;
> -	unsigned int			offset;
> -	unsigned int			freed;
> -};
> +#define XFS_GC_BUF_SIZE		SZ_1M
> +#define XFS_GC_NR_BUFS		2
> +static_assert(XFS_GC_NR_BUFS < BIO_MAX_VECS);
>  
>  /*
>   * Chunk that is read and written for each GC operation.
> @@ -141,10 +129,14 @@ struct xfs_zone_gc_data {
>  	struct bio_set			bio_set;
>  
>  	/*
> -	 * Scratchpad used, and index to indicated which one is used.
> +	 * Scratchpad to buffer GC data, organized as a ring buffer over
> +	 * discontiguous folios.  scratch_head is where the buffer is filled,
> +	 * and scratch_tail tracks the buffer space freed.
>  	 */
> -	struct xfs_zone_scratch		scratch[XFS_ZONE_GC_NR_SCRATCH];
> -	unsigned int			scratch_idx;
> +	struct folio			*scratch_folios[XFS_GC_NR_BUFS];
> +	unsigned int			scratch_size;
> +	unsigned int			scratch_head;
> +	unsigned int			scratch_tail;
>  
>  	/*
>  	 * List of bios currently being read, written and reset.
> @@ -210,20 +202,16 @@ xfs_zone_gc_data_alloc(
>  	if (!data->iter.recs)
>  		goto out_free_data;
>  
> -	/*
> -	 * We actually only need a single bio_vec.  It would be nice to have
> -	 * a flag that only allocates the inline bvecs and not the separate
> -	 * bvec pool.
> -	 */
>  	if (bioset_init(&data->bio_set, 16, offsetof(struct xfs_gc_bio, bio),
>  			BIOSET_NEED_BVECS))
>  		goto out_free_recs;
> -	for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++) {
> -		data->scratch[i].folio =
> -			folio_alloc(GFP_KERNEL, get_order(XFS_GC_CHUNK_SIZE));
> -		if (!data->scratch[i].folio)
> +	for (i = 0; i < XFS_GC_NR_BUFS; i++) {
> +		data->scratch_folios[i] =
> +			folio_alloc(GFP_KERNEL, get_order(XFS_GC_BUF_SIZE));
> +		if (!data->scratch_folios[i])
>  			goto out_free_scratch;
>  	}
> +	data->scratch_size = XFS_GC_BUF_SIZE * XFS_GC_NR_BUFS;
>  	INIT_LIST_HEAD(&data->reading);
>  	INIT_LIST_HEAD(&data->writing);
>  	INIT_LIST_HEAD(&data->resetting);
> @@ -232,7 +220,7 @@ xfs_zone_gc_data_alloc(
>  
>  out_free_scratch:
>  	while (--i >= 0)
> -		folio_put(data->scratch[i].folio);
> +		folio_put(data->scratch_folios[i]);
>  	bioset_exit(&data->bio_set);
>  out_free_recs:
>  	kfree(data->iter.recs);
> @@ -247,8 +235,8 @@ xfs_zone_gc_data_free(
>  {
>  	int			i;
>  
> -	for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++)
> -		folio_put(data->scratch[i].folio);
> +	for (i = 0; i < XFS_GC_NR_BUFS; i++)
> +		folio_put(data->scratch_folios[i]);
>  	bioset_exit(&data->bio_set);
>  	kfree(data->iter.recs);
>  	kfree(data);
> @@ -590,7 +578,12 @@ static unsigned int
>  xfs_zone_gc_scratch_available(
>  	struct xfs_zone_gc_data	*data)
>  {
> -	return XFS_GC_CHUNK_SIZE - data->scratch[data->scratch_idx].offset;
> +	if (!data->scratch_tail)
> +		return data->scratch_size - data->scratch_head;
> +
> +	if (!data->scratch_head)
> +		return data->scratch_tail;
> +	return (data->scratch_size - data->scratch_head) + data->scratch_tail;
>  }
>  
>  static bool
> @@ -664,6 +657,28 @@ xfs_zone_gc_alloc_blocks(
>  	return oz;
>  }
>  
> +static void
> +xfs_zone_gc_add_data(
> +	struct xfs_gc_bio	*chunk)
> +{
> +	struct xfs_zone_gc_data	*data = chunk->data;
> +	unsigned int		len = chunk->len;
> +	unsigned int		off = data->scratch_head;
> +
> +	do {
> +		unsigned int	this_off = off % XFS_GC_BUF_SIZE;
> +		unsigned int	this_len = min(len, XFS_GC_BUF_SIZE - this_off);
> +
> +		bio_add_folio_nofail(&chunk->bio,
> +				data->scratch_folios[off / XFS_GC_BUF_SIZE],
> +				this_len, this_off);
> +		len -= this_len;
> +		off += this_len;
> +		if (off == data->scratch_size)
> +			off = 0;
> +	} while (len);
> +}
> +
>  static bool
>  xfs_zone_gc_start_chunk(
>  	struct xfs_zone_gc_data	*data)
> @@ -677,6 +692,7 @@ xfs_zone_gc_start_chunk(
>  	struct xfs_inode	*ip;
>  	struct bio		*bio;
>  	xfs_daddr_t		daddr;
> +	unsigned int		len;
>  	bool			is_seq;
>  
>  	if (xfs_is_shutdown(mp))
> @@ -691,17 +707,19 @@ xfs_zone_gc_start_chunk(
>  		return false;
>  	}
>  
> -	bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ, GFP_NOFS, &data->bio_set);
> +	len = XFS_FSB_TO_B(mp, irec.rm_blockcount);
> +	bio = bio_alloc_bioset(bdev,
> +			min(howmany(len, XFS_GC_BUF_SIZE) + 1, XFS_GC_NR_BUFS),
> +			REQ_OP_READ, GFP_NOFS, &data->bio_set);
>  
>  	chunk = container_of(bio, struct xfs_gc_bio, bio);
>  	chunk->ip = ip;
>  	chunk->offset = XFS_FSB_TO_B(mp, irec.rm_offset);
> -	chunk->len = XFS_FSB_TO_B(mp, irec.rm_blockcount);
> +	chunk->len = len;
>  	chunk->old_startblock =
>  		xfs_rgbno_to_rtb(iter->victim_rtg, irec.rm_startblock);
>  	chunk->new_daddr = daddr;
>  	chunk->is_seq = is_seq;
> -	chunk->scratch = &data->scratch[data->scratch_idx];
>  	chunk->data = data;
>  	chunk->oz = oz;
>  	chunk->victim_rtg = iter->victim_rtg;
> @@ -710,13 +728,9 @@ xfs_zone_gc_start_chunk(
>  
>  	bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
>  	bio->bi_end_io = xfs_zone_gc_end_io;
> -	bio_add_folio_nofail(bio, chunk->scratch->folio, chunk->len,
> -			chunk->scratch->offset);
> -	chunk->scratch->offset += chunk->len;
> -	if (chunk->scratch->offset == XFS_GC_CHUNK_SIZE) {
> -		data->scratch_idx =
> -			(data->scratch_idx + 1) % XFS_ZONE_GC_NR_SCRATCH;
> -	}
> +	xfs_zone_gc_add_data(chunk);
> +	data->scratch_head = (data->scratch_head + len) % data->scratch_size;
> +
>  	WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW);
>  	list_add_tail(&chunk->entry, &data->reading);
>  	xfs_zone_gc_iter_advance(iter, irec.rm_blockcount);
> @@ -834,6 +848,7 @@ xfs_zone_gc_finish_chunk(
>  	struct xfs_gc_bio	*chunk)
>  {
>  	uint			iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
> +	struct xfs_zone_gc_data	*data = chunk->data;
>  	struct xfs_inode	*ip = chunk->ip;
>  	struct xfs_mount	*mp = ip->i_mount;
>  	int			error;
> @@ -845,11 +860,8 @@ xfs_zone_gc_finish_chunk(
>  		return;
>  	}
>  
> -	chunk->scratch->freed += chunk->len;
> -	if (chunk->scratch->freed == chunk->scratch->offset) {
> -		chunk->scratch->offset = 0;
> -		chunk->scratch->freed = 0;
> -	}
> +	data->scratch_tail =
> +		(data->scratch_tail + chunk->len) % data->scratch_size;
>  
>  	/*
>  	 * Cycle through the iolock and wait for direct I/O and layouts to
> -- 
> 2.47.3
> 
> 

  reply	other threads:[~2026-01-09 12:24 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-06  7:58 improve zoned XFS GC buffer management v2 Christoph Hellwig
2026-01-06  7:58 ` [PATCH 1/3] block: add a bio_reuse helper Christoph Hellwig
2026-01-09 12:12   ` Carlos Maiolino
2026-01-09 16:19   ` Darrick J. Wong
2026-01-09 16:25     ` Christoph Hellwig
2026-01-10  0:50       ` Darrick J. Wong
2026-01-09 16:30   ` Carlos Maiolino
2026-01-12 20:22   ` Jens Axboe
2026-01-06  7:58 ` [PATCH 2/3] xfs: use bio_reuse in the zone GC code Christoph Hellwig
2026-01-09 12:13   ` Carlos Maiolino
2026-01-06  7:58 ` [PATCH 3/3] xfs: rework zone GC buffer management Christoph Hellwig
2026-01-09 12:24   ` Carlos Maiolino [this message]
2026-01-09 15:03 ` improve zoned XFS GC buffer management v2 Carlos Maiolino
  -- strict thread matches above, loose matches on Subject: below --
2026-01-14 13:06 improve zoned XFS GC buffer management v4 Christoph Hellwig
2026-01-14 13:06 ` [PATCH 3/3] xfs: rework zone GC buffer management Christoph Hellwig
2026-01-14 19:59   ` Darrick J. Wong
2026-01-15  6:12     ` Christoph Hellwig
2026-01-25  0:03   ` Chris Mason
2026-01-26 20:30     ` Keith Busch
2026-01-27  6:53       ` Christoph Hellwig
2026-01-13  7:19 improve zoned XFS GC buffer management v3 Christoph Hellwig
2026-01-13  7:19 ` [PATCH 3/3] xfs: rework zone GC buffer management Christoph Hellwig
2025-12-18  6:31 improve zoned XFS " Christoph Hellwig
2025-12-18  6:31 ` [PATCH 3/3] xfs: rework zone " Christoph Hellwig
2025-12-18  7:12   ` Damien Le Moal
2025-12-19  8:06   ` Hans Holmberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aWDzaJB7PXyW09Nb@nidhogg.toxiclabs.cc \
    --to=cem@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=dlemoal@kernel.org \
    --cc=hans.holmberg@wdc.com \
    --cc=hch@lst.de \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox