public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Carlos Maiolino <cem@kernel.org>
Cc: Damien Le Moal <dlemoal@kernel.org>,
	Hans Holmberg <hans.holmberg@wdc.com>,
	linux-xfs@vger.kernel.org, Carlos Maiolino <cmaiolino@redhat.com>
Subject: [PATCH 7/9] xfs: reduce special casing for the open GC zone
Date: Tue, 31 Mar 2026 17:27:29 +0200	[thread overview]
Message-ID: <20260331152751.4048218-8-hch@lst.de> (raw)
In-Reply-To: <20260331152751.4048218-1-hch@lst.de>

Currently the open zone used for garbage collection is a special snow
flake, and it has been a bit annoying for some further zoned XFS work
I've been doing.

Remove the zi_open_gc_field and instead track the open GC zone in the
zi_open_zones list together with the normal open zones, and keep an extra
pointer and a reference of in the GC thread's data structure.  This means
anything iterating over open zones just has to look at zi_open_zones, and
the life time rules are consistent.  It also helps to add support for
multiple open GC zones if we ever need them, and removes a bit of code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
---
 fs/xfs/xfs_zone_alloc.c | 19 +++++++----
 fs/xfs/xfs_zone_gc.c    | 71 ++++++++++++++++++++++-------------------
 fs/xfs/xfs_zone_info.c  | 12 +++----
 fs/xfs/xfs_zone_priv.h  | 15 ++-------
 4 files changed, 58 insertions(+), 59 deletions(-)

diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index 17a3762aa951..a851b98143c0 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -174,16 +174,18 @@ xfs_open_zone_mark_full(
 	WRITE_ONCE(rtg->rtg_open_zone, NULL);
 
 	spin_lock(&zi->zi_open_zones_lock);
-	if (oz->oz_is_gc) {
-		ASSERT(current == zi->zi_gc_thread);
-		zi->zi_open_gc_zone = NULL;
-	} else {
+	if (oz->oz_is_gc)
+		zi->zi_nr_open_gc_zones--;
+	else
 		zi->zi_nr_open_zones--;
-		list_del_init(&oz->oz_entry);
-	}
+	list_del_init(&oz->oz_entry);
 	spin_unlock(&zi->zi_open_zones_lock);
 
-	wake_up_all(&zi->zi_zone_wait);
+	if (oz->oz_is_gc)
+		wake_up_process(zi->zi_gc_thread);
+	else
+		wake_up_all(&zi->zi_zone_wait);
+
 	if (used < rtg_blocks(rtg))
 		xfs_zone_account_reclaimable(rtg, rtg_blocks(rtg) - used);
 	xfs_open_zone_put(oz);
@@ -557,6 +559,9 @@ xfs_try_use_zone(
 	struct xfs_open_zone	*oz,
 	unsigned int		goodness)
 {
+	if (oz->oz_is_gc)
+		return false;
+
 	if (oz->oz_allocated == rtg_blocks(oz->oz_rtg))
 		return false;
 
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index 2c2fa924fecd..30bcc415eaeb 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -125,6 +125,7 @@ struct xfs_zone_gc_iter {
  */
 struct xfs_zone_gc_data {
 	struct xfs_mount		*mp;
+	struct xfs_open_zone		*oz;
 
 	/* bioset used to allocate the gc_bios */
 	struct bio_set			bio_set;
@@ -525,9 +526,10 @@ xfs_zone_gc_select_victim(
 }
 
 static int
-xfs_zone_gc_steal_open(
-	struct xfs_zone_info	*zi)
+xfs_zone_gc_steal_open_zone(
+	struct xfs_zone_gc_data	*data)
 {
+	struct xfs_zone_info	*zi = data->mp->m_zone_info;
 	struct xfs_open_zone	*oz, *found = NULL;
 
 	spin_lock(&zi->zi_open_zones_lock);
@@ -542,10 +544,12 @@ xfs_zone_gc_steal_open(
 
 	trace_xfs_zone_gc_target_stolen(found->oz_rtg);
 	found->oz_is_gc = true;
-	list_del_init(&found->oz_entry);
 	zi->zi_nr_open_zones--;
-	zi->zi_open_gc_zone = found;
+	zi->zi_nr_open_gc_zones++;
 	spin_unlock(&zi->zi_open_zones_lock);
+
+	atomic_inc(&found->oz_ref);
+	data->oz = found;
 	return 0;
 }
 
@@ -554,39 +558,43 @@ xfs_zone_gc_steal_open(
  */
 static bool
 xfs_zone_gc_select_target(
-	struct xfs_mount	*mp)
+	struct xfs_zone_gc_data	*data)
 {
-	struct xfs_zone_info	*zi = mp->m_zone_info;
-	struct xfs_open_zone	*oz = zi->zi_open_gc_zone;
+	struct xfs_zone_info	*zi = data->mp->m_zone_info;
 
-	if (oz) {
+	if (data->oz) {
 		/*
 		 * If we have space available, just keep using the existing
 		 * zone.
 		 */
-		if (oz->oz_allocated < rtg_blocks(oz->oz_rtg))
+		if (data->oz->oz_allocated < rtg_blocks(data->oz->oz_rtg))
 			return true;
 
 		/*
 		 * Wait for all writes to the current zone to finish before
 		 * picking a new one.
 		 */
-		if (oz->oz_written < rtg_blocks(oz->oz_rtg))
+		if (data->oz->oz_written < rtg_blocks(data->oz->oz_rtg))
 			return false;
+
+		xfs_open_zone_put(data->oz);
 	}
 
 	/*
 	 * Open a new zone when there is none currently in use.
 	 */
 	ASSERT(zi->zi_nr_open_zones <=
-		mp->m_max_open_zones - XFS_OPEN_GC_ZONES);
-	oz = xfs_open_zone(mp, WRITE_LIFE_NOT_SET, true);
-	if (oz)
-		trace_xfs_zone_gc_target_opened(oz->oz_rtg);
+		data->mp->m_max_open_zones - XFS_OPEN_GC_ZONES);
+	data->oz = xfs_open_zone(data->mp, WRITE_LIFE_NOT_SET, true);
+	if (!data->oz)
+		return false;
+	trace_xfs_zone_gc_target_opened(data->oz->oz_rtg);
+	atomic_inc(&data->oz->oz_ref);
 	spin_lock(&zi->zi_open_zones_lock);
-	zi->zi_open_gc_zone = oz;
+	zi->zi_nr_open_gc_zones++;
+	list_add_tail(&data->oz->oz_entry, &zi->zi_open_zones);
 	spin_unlock(&zi->zi_open_zones_lock);
-	return !!oz;
+	return true;
 }
 
 static void
@@ -609,7 +617,7 @@ xfs_zone_gc_alloc_blocks(
 	bool			*is_seq)
 {
 	struct xfs_mount	*mp = data->mp;
-	struct xfs_open_zone	*oz = mp->m_zone_info->zi_open_gc_zone;
+	struct xfs_open_zone	*oz = data->oz;
 
 	*count_fsb = min(*count_fsb, XFS_B_TO_FSB(mp, data->scratch_available));
 
@@ -683,7 +691,7 @@ xfs_zone_gc_can_start_chunk(
 			return false;
 	}
 
-	return xfs_zone_gc_select_target(data->mp);
+	return xfs_zone_gc_select_target(data);
 }
 
 static bool
@@ -728,7 +736,7 @@ xfs_zone_gc_start_chunk(
 	chunk->new_daddr = daddr;
 	chunk->is_seq = is_seq;
 	chunk->data = data;
-	chunk->oz = mp->m_zone_info->zi_open_gc_zone;
+	chunk->oz = data->oz;
 	chunk->victim_rtg = iter->victim_rtg;
 	atomic_inc(&rtg_group(chunk->victim_rtg)->xg_active_ref);
 	atomic_inc(&chunk->victim_rtg->rtg_gccount);
@@ -1134,6 +1142,8 @@ xfs_zoned_gcd(
 	}
 	xfs_clear_zonegc_running(mp);
 
+	if (data->oz)
+		xfs_open_zone_put(data->oz);
 	if (data->iter.victim_rtg)
 		xfs_rtgroup_rele(data->iter.victim_rtg);
 
@@ -1183,6 +1193,10 @@ xfs_zone_gc_mount(
 	struct xfs_zone_gc_data	*data;
 	int			error;
 
+	data = xfs_zone_gc_data_alloc(mp);
+	if (!data)
+		return -ENOMEM;
+
 	/*
 	 * If there are no free zones available for GC, or the number of open
 	 * zones has reached the open zone limit, pick the open zone with
@@ -1192,35 +1206,30 @@ xfs_zone_gc_mount(
 	 */
 	if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_FREE) ||
 	    zi->zi_nr_open_zones >= mp->m_max_open_zones) {
-		error = xfs_zone_gc_steal_open(zi);
+		error = xfs_zone_gc_steal_open_zone(data);
 		if (error) {
 			xfs_warn(mp, "unable to steal an open zone for gc");
-			return error;
+			goto out_free_gc_data;
 		}
 	}
 
-	data = xfs_zone_gc_data_alloc(mp);
-	if (!data) {
-		error = -ENOMEM;
-		goto out_put_gc_zone;
-	}
-
 	zi->zi_gc_thread = kthread_create(xfs_zoned_gcd, data,
 			"xfs-zone-gc/%s", mp->m_super->s_id);
 	if (IS_ERR(zi->zi_gc_thread)) {
 		xfs_warn(mp, "unable to create zone gc thread");
 		error = PTR_ERR(zi->zi_gc_thread);
-		goto out_free_gc_data;
+		goto out_put_oz;
 	}
 
 	/* xfs_zone_gc_start will unpark for rw mounts */
 	kthread_park(zi->zi_gc_thread);
 	return 0;
 
+out_put_oz:
+	if (data->oz)
+		xfs_open_zone_put(data->oz);
 out_free_gc_data:
 	kfree(data);
-out_put_gc_zone:
-	xfs_open_zone_put(zi->zi_open_gc_zone);
 	return error;
 }
 
@@ -1231,6 +1240,4 @@ xfs_zone_gc_unmount(
 	struct xfs_zone_info	*zi = mp->m_zone_info;
 
 	kthread_stop(zi->zi_gc_thread);
-	if (zi->zi_open_gc_zone)
-		xfs_open_zone_put(zi->zi_open_gc_zone);
 }
diff --git a/fs/xfs/xfs_zone_info.c b/fs/xfs/xfs_zone_info.c
index a2af44011654..dcdc1dd206b2 100644
--- a/fs/xfs/xfs_zone_info.c
+++ b/fs/xfs/xfs_zone_info.c
@@ -30,11 +30,12 @@ xfs_show_open_zone(
 	struct seq_file		*m,
 	struct xfs_open_zone	*oz)
 {
-	seq_printf(m, "\t  zone %d, wp %u, written %u, used %u, hint %s\n",
+	seq_printf(m, "\t  zone %d, wp %u, written %u, used %u, hint %s %s\n",
 		rtg_rgno(oz->oz_rtg),
 		oz->oz_allocated, oz->oz_written,
 		rtg_rmap(oz->oz_rtg)->i_used_blocks,
-		xfs_write_hint_to_str(oz->oz_write_hint));
+		xfs_write_hint_to_str(oz->oz_write_hint),
+		oz->oz_is_gc ? "(GC)" : "");
 }
 
 static void
@@ -58,9 +59,8 @@ xfs_show_full_zone_used_distribution(
 	spin_unlock(&zi->zi_used_buckets_lock);
 
 	full = mp->m_sb.sb_rgcount;
-	if (zi->zi_open_gc_zone)
-		full--;
 	full -= zi->zi_nr_open_zones;
+	full -= zi->zi_nr_open_gc_zones;
 	full -= atomic_read(&zi->zi_nr_free_zones);
 	full -= reclaimable;
 
@@ -100,10 +100,6 @@ xfs_zoned_show_stats(
 	seq_puts(m, "\topen zones:\n");
 	list_for_each_entry(oz, &zi->zi_open_zones, oz_entry)
 		xfs_show_open_zone(m, oz);
-	if (zi->zi_open_gc_zone) {
-		seq_puts(m, "\topen gc zone:\n");
-		xfs_show_open_zone(m, zi->zi_open_gc_zone);
-	}
 	spin_unlock(&zi->zi_open_zones_lock);
 	seq_puts(m, "\tused blocks distribution (fully written zones):\n");
 	xfs_show_full_zone_used_distribution(m, mp);
diff --git a/fs/xfs/xfs_zone_priv.h b/fs/xfs/xfs_zone_priv.h
index 8fbf9a52964e..fcb57506d8e6 100644
--- a/fs/xfs/xfs_zone_priv.h
+++ b/fs/xfs/xfs_zone_priv.h
@@ -32,11 +32,7 @@ struct xfs_open_zone {
 	 */
 	enum rw_hint		oz_write_hint;
 
-	/*
-	 * Is this open zone used for garbage collection?  There can only be a
-	 * single open GC zone, which is pointed to by zi_open_gc_zone in
-	 * struct xfs_zone_info.  Constant over the life time of an open zone.
-	 */
+	/* Is this open zone used for garbage collection? */
 	bool			oz_is_gc;
 
 	/*
@@ -68,6 +64,7 @@ struct xfs_zone_info {
 	spinlock_t		zi_open_zones_lock;
 	struct list_head	zi_open_zones;
 	unsigned int		zi_nr_open_zones;
+	unsigned int		zi_nr_open_gc_zones;
 
 	/*
 	 * Free zone search cursor and number of free zones:
@@ -81,15 +78,9 @@ struct xfs_zone_info {
 	wait_queue_head_t	zi_zone_wait;
 
 	/*
-	 * Pointer to the GC thread, and the current open zone used by GC
-	 * (if any).
-	 *
-	 * zi_open_gc_zone is mostly private to the GC thread, but can be read
-	 * for debugging from other threads, in which case zi_open_zones_lock
-	 * must be taken to access it.
+	 * Pointer to the GC thread.
 	 */
 	struct task_struct      *zi_gc_thread;
-	struct xfs_open_zone	*zi_open_gc_zone;
 
 	/*
 	 * List of zones that need a reset:
-- 
2.47.3


  parent reply	other threads:[~2026-03-31 15:28 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-31 15:27 cleanup open GC zone handling v2 Christoph Hellwig
2026-03-31 15:27 ` [PATCH 1/9] xfs: delay initial open of the GC zone Christoph Hellwig
2026-03-31 15:27 ` [PATCH 2/9] xfs: add a separate tracepoint for stealing an open zone for GC Christoph Hellwig
2026-03-31 15:27 ` [PATCH 3/9] xfs: put the open zone later xfs_open_zone_put Christoph Hellwig
2026-03-31 15:27 ` [PATCH 4/9] xfs: rename xfs_zone_gc_iter_next to xfs_zone_gc_iter_irec Christoph Hellwig
2026-03-31 15:27 ` [PATCH 5/9] xfs: refactor GC zone selection helpers Christoph Hellwig
2026-03-31 15:27 ` [PATCH 6/9] xfs: streamline GC zone selection Christoph Hellwig
2026-03-31 15:27 ` Christoph Hellwig [this message]
2026-03-31 15:27 ` [PATCH 8/9] xfs: expose the number of open zones in sysfs Christoph Hellwig
2026-03-31 19:45   ` Damien Le Moal
2026-04-01 13:10   ` Hans Holmberg
2026-03-31 15:27 ` [PATCH 9/9] xfs: untangle the open zones reporting in mountinfo Christoph Hellwig
2026-04-07 13:38 ` cleanup open GC zone handling v2 Carlos Maiolino

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260331152751.4048218-8-hch@lst.de \
    --to=hch@lst.de \
    --cc=cem@kernel.org \
    --cc=cmaiolino@redhat.com \
    --cc=dlemoal@kernel.org \
    --cc=hans.holmberg@wdc.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox