From: "Darrick J. Wong" <djwong@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Carlos Maiolino <cem@kernel.org>,
Hans Holmberg <hans.holmberg@wdc.com>,
linux-xfs@vger.kernel.org
Subject: Re: [PATCH 03/45] xfs: support reserved blocks for the rt extent counter
Date: Tue, 25 Feb 2025 10:05:56 -0800 [thread overview]
Message-ID: <20250225180556.GI6242@frogsfrogsfrogs> (raw)
In-Reply-To: <20250218081153.3889537-4-hch@lst.de>
On Tue, Feb 18, 2025 at 09:10:06AM +0100, Christoph Hellwig wrote:
> The zoned space allocator will need reserved RT extents for garbage
> collection and zeroing of partial blocks. Move the resblks related
> fields into the freecounter array so that they can be used for all
> counters.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/xfs/scrub/fscounters.c | 2 +-
> fs/xfs/xfs_fsops.c | 25 ++++++++++++----------
> fs/xfs/xfs_fsops.h | 3 ++-
> fs/xfs/xfs_ioctl.c | 6 +++---
> fs/xfs/xfs_mount.c | 44 ++++++++++++++++++---------------------
> fs/xfs/xfs_mount.h | 12 ++++++++---
> fs/xfs/xfs_super.c | 32 +++++++++++++++++-----------
> 7 files changed, 69 insertions(+), 55 deletions(-)
>
> diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
> index 207a238de429..9dd893ece188 100644
> --- a/fs/xfs/scrub/fscounters.c
> +++ b/fs/xfs/scrub/fscounters.c
> @@ -350,7 +350,7 @@ xchk_fscount_aggregate_agcounts(
> * The global incore space reservation is taken from the incore
> * counters, so leave that out of the computation.
> */
> - fsc->fdblocks -= mp->m_resblks_avail;
> + fsc->fdblocks -= mp->m_free[XC_FREE_BLOCKS].res_avail;
>
> /*
> * Delayed allocation reservations are taken out of the incore counters
> diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
> index 58249f37a7ad..f055aebe4c7a 100644
> --- a/fs/xfs/xfs_fsops.c
> +++ b/fs/xfs/xfs_fsops.c
> @@ -366,6 +366,7 @@ xfs_growfs_log(
> int
> xfs_reserve_blocks(
> struct xfs_mount *mp,
> + enum xfs_free_counter ctr,
> uint64_t request)
> {
> int64_t lcounter, delta;
> @@ -373,6 +374,8 @@ xfs_reserve_blocks(
> int64_t free;
> int error = 0;
>
> + ASSERT(ctr < XC_FREE_NR);
> +
> /*
> * With per-cpu counters, this becomes an interesting problem. we need
> * to work out if we are freeing or allocation blocks first, then we can
> @@ -391,16 +394,16 @@ xfs_reserve_blocks(
> * counters directly since we shouldn't have any problems unreserving
> * space.
> */
> - if (mp->m_resblks > request) {
> - lcounter = mp->m_resblks_avail - request;
> + if (mp->m_free[ctr].res_total > request) {
> + lcounter = mp->m_free[ctr].res_avail - request;
> if (lcounter > 0) { /* release unused blocks */
> fdblks_delta = lcounter;
> - mp->m_resblks_avail -= lcounter;
> + mp->m_free[ctr].res_avail -= lcounter;
> }
> - mp->m_resblks = request;
> + mp->m_free[ctr].res_total = request;
> if (fdblks_delta) {
> spin_unlock(&mp->m_sb_lock);
> - xfs_add_fdblocks(mp, fdblks_delta);
> + xfs_add_freecounter(mp, ctr, fdblks_delta);
> spin_lock(&mp->m_sb_lock);
> }
>
> @@ -419,10 +422,10 @@ xfs_reserve_blocks(
> * space to fill it because mod_fdblocks will refill an undersized
> * reserve when it can.
> */
> - free = xfs_sum_freecounter_raw(mp, XC_FREE_BLOCKS) -
> - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS);
> - delta = request - mp->m_resblks;
> - mp->m_resblks = request;
> + free = xfs_sum_freecounter_raw(mp, ctr) -
> + xfs_freecounter_unavailable(mp, ctr);
> + delta = request - mp->m_free[ctr].res_total;
> + mp->m_free[ctr].res_total = request;
> if (delta > 0 && free > 0) {
> /*
> * We'll either succeed in getting space from the free block
> @@ -436,9 +439,9 @@ xfs_reserve_blocks(
> */
> fdblks_delta = min(free, delta);
> spin_unlock(&mp->m_sb_lock);
> - error = xfs_dec_fdblocks(mp, fdblks_delta, 0);
> + error = xfs_dec_freecounter(mp, ctr, fdblks_delta, 0);
> if (!error)
> - xfs_add_fdblocks(mp, fdblks_delta);
> + xfs_add_freecounter(mp, ctr, fdblks_delta);
> spin_lock(&mp->m_sb_lock);
> }
> out:
> diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
> index 3e2f73bcf831..9d23c361ef56 100644
> --- a/fs/xfs/xfs_fsops.h
> +++ b/fs/xfs/xfs_fsops.h
> @@ -8,7 +8,8 @@
>
> int xfs_growfs_data(struct xfs_mount *mp, struct xfs_growfs_data *in);
> int xfs_growfs_log(struct xfs_mount *mp, struct xfs_growfs_log *in);
> -int xfs_reserve_blocks(struct xfs_mount *mp, uint64_t request);
> +int xfs_reserve_blocks(struct xfs_mount *mp, enum xfs_free_counter cnt,
> + uint64_t request);
> int xfs_fs_goingdown(struct xfs_mount *mp, uint32_t inflags);
>
> int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index 0418aad2db91..d250f7f74e3b 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -1131,15 +1131,15 @@ xfs_ioctl_getset_resblocks(
> error = mnt_want_write_file(filp);
> if (error)
> return error;
> - error = xfs_reserve_blocks(mp, fsop.resblks);
> + error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, fsop.resblks);
> mnt_drop_write_file(filp);
> if (error)
> return error;
> }
>
> spin_lock(&mp->m_sb_lock);
> - fsop.resblks = mp->m_resblks;
> - fsop.resblks_avail = mp->m_resblks_avail;
> + fsop.resblks = mp->m_free[XC_FREE_BLOCKS].res_total;
> + fsop.resblks_avail = mp->m_free[XC_FREE_BLOCKS].res_avail;
> spin_unlock(&mp->m_sb_lock);
>
> if (copy_to_user(arg, &fsop, sizeof(fsop)))
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index ee97a927bc3b..c401fd47c763 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -1056,7 +1056,8 @@ xfs_mountfs(
> * we were already there on the last unmount. Warn if this occurs.
> */
> if (!xfs_is_readonly(mp)) {
> - error = xfs_reserve_blocks(mp, xfs_default_resblks(mp));
> + error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS,
> + xfs_default_resblks(mp));
> if (error)
> xfs_warn(mp,
> "Unable to allocate reserve blocks. Continuing without reserve pool.");
> @@ -1176,7 +1177,7 @@ xfs_unmountfs(
> * we only every apply deltas to the superblock and hence the incore
> * value does not matter....
> */
> - error = xfs_reserve_blocks(mp, 0);
> + error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, 0);
> if (error)
> xfs_warn(mp, "Unable to free reserved block pool. "
> "Freespace may not be correct on next mount.");
> @@ -1247,26 +1248,26 @@ xfs_add_freecounter(
> enum xfs_free_counter ctr,
> uint64_t delta)
> {
> - bool has_resv_pool = (ctr == XC_FREE_BLOCKS);
> + struct xfs_freecounter *counter = &mp->m_free[ctr];
> uint64_t res_used;
>
> /*
> * If the reserve pool is depleted, put blocks back into it first.
> * Most of the time the pool is full.
> */
> - if (!has_resv_pool || mp->m_resblks == mp->m_resblks_avail) {
> - percpu_counter_add(&mp->m_free[ctr].count, delta);
> + if (likely(counter->res_avail == counter->res_total)) {
> + percpu_counter_add(&counter->count, delta);
> return;
> }
>
> spin_lock(&mp->m_sb_lock);
> - res_used = mp->m_resblks - mp->m_resblks_avail;
> + res_used = counter->res_total - counter->res_avail;
> if (res_used > delta) {
> - mp->m_resblks_avail += delta;
> + counter->res_avail += delta;
> } else {
> delta -= res_used;
> - mp->m_resblks_avail = mp->m_resblks;
> - percpu_counter_add(&mp->m_free[ctr].count, delta);
> + counter->res_avail = counter->res_total;
> + percpu_counter_add(&counter->count, delta);
> }
> spin_unlock(&mp->m_sb_lock);
> }
> @@ -1280,15 +1281,10 @@ xfs_dec_freecounter(
> uint64_t delta,
> bool rsvd)
> {
> - struct percpu_counter *counter = &mp->m_free[ctr].count;
> - uint64_t set_aside = 0;
> + struct xfs_freecounter *counter = &mp->m_free[ctr];
> s32 batch;
> - bool has_resv_pool;
>
> ASSERT(ctr < XC_FREE_NR);
> - has_resv_pool = (ctr == XC_FREE_BLOCKS);
> - if (rsvd)
> - ASSERT(has_resv_pool);
>
> /*
> * Taking blocks away, need to be more accurate the closer we
> @@ -1298,7 +1294,7 @@ xfs_dec_freecounter(
> * then make everything serialise as we are real close to
> * ENOSPC.
> */
> - if (__percpu_counter_compare(counter, 2 * XFS_FDBLOCKS_BATCH,
> + if (__percpu_counter_compare(&counter->count, 2 * XFS_FDBLOCKS_BATCH,
> XFS_FDBLOCKS_BATCH) < 0)
> batch = 1;
> else
> @@ -1315,25 +1311,25 @@ xfs_dec_freecounter(
> * problems (i.e. transaction abort, pagecache discards, etc.) than
> * slightly premature -ENOSPC.
> */
> - if (has_resv_pool)
> - set_aside = xfs_freecounter_unavailable(mp, ctr);
> - percpu_counter_add_batch(counter, -((int64_t)delta), batch);
> - if (__percpu_counter_compare(counter, set_aside,
> + percpu_counter_add_batch(&counter->count, -((int64_t)delta), batch);
> + if (__percpu_counter_compare(&counter->count,
> + xfs_freecounter_unavailable(mp, ctr),
> XFS_FDBLOCKS_BATCH) < 0) {
> /*
> * Lock up the sb for dipping into reserves before releasing the
> * space that took us to ENOSPC.
> */
> spin_lock(&mp->m_sb_lock);
> - percpu_counter_add(counter, delta);
> + percpu_counter_add(&counter->count, delta);
> if (!rsvd)
> goto fdblocks_enospc;
> - if (delta > mp->m_resblks_avail) {
> - xfs_warn_once(mp,
> + if (delta > counter->res_avail) {
> + if (ctr == XC_FREE_BLOCKS)
> + xfs_warn_once(mp,
> "Reserve blocks depleted! Consider increasing reserve pool size.");
> goto fdblocks_enospc;
> }
> - mp->m_resblks_avail -= delta;
> + counter->res_avail -= delta;
> spin_unlock(&mp->m_sb_lock);
> }
>
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 7f3265d669bc..f63410acc8fd 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -108,6 +108,15 @@ struct xfs_groups {
> struct xfs_freecounter {
> /* free blocks for general use: */
> struct percpu_counter count;
> +
> + /* total reserved blocks: */
> + uint64_t res_total;
> +
> + /* available reserved blocks: */
> + uint64_t res_avail;
> +
> + /* reserved blks @ remount,ro: */
> + uint64_t res_saved;
> };
>
> /*
> @@ -250,9 +259,6 @@ typedef struct xfs_mount {
> atomic64_t m_allocbt_blks;
>
> struct xfs_groups m_groups[XG_TYPE_MAX];
> - uint64_t m_resblks; /* total reserved blocks */
> - uint64_t m_resblks_avail;/* available reserved blocks */
> - uint64_t m_resblks_save; /* reserved blks @ remount,ro */
> struct delayed_work m_reclaim_work; /* background inode reclaim */
> struct dentry *m_debugfs; /* debugfs parent */
> struct xfs_kobj m_kobj;
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index b08d28a895cb..1e61283efdfe 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -924,24 +924,32 @@ xfs_fs_statfs(
> }
>
> STATIC void
> -xfs_save_resvblks(struct xfs_mount *mp)
> +xfs_save_resvblks(
> + struct xfs_mount *mp)
> {
> - mp->m_resblks_save = mp->m_resblks;
> - xfs_reserve_blocks(mp, 0);
> + enum xfs_free_counter i;
> +
> + for (i = 0; i < XC_FREE_NR; i++) {
> + mp->m_free[i].res_saved = mp->m_free[i].res_total;
> + xfs_reserve_blocks(mp, i, 0);
> + }
> }
>
> STATIC void
> -xfs_restore_resvblks(struct xfs_mount *mp)
> +xfs_restore_resvblks(
> + struct xfs_mount *mp)
> {
> - uint64_t resblks;
> -
> - if (mp->m_resblks_save) {
> - resblks = mp->m_resblks_save;
> - mp->m_resblks_save = 0;
> - } else
> - resblks = xfs_default_resblks(mp);
> + uint64_t resblks;
> + enum xfs_free_counter i;
>
> - xfs_reserve_blocks(mp, resblks);
> + for (i = 0; i < XC_FREE_NR; i++) {
> + if (mp->m_free[i].res_saved) {
> + resblks = mp->m_free[i].res_saved;
> + mp->m_free[i].res_saved = 0;
> + } else
> + resblks = xfs_default_resblks(mp);
Until "xfs: preserve RT reservations across remounts", this should be:
if (mp->m_free[i].res_saved) {
resblks = mp->m_free[i].res_saved;
mp->m_free[i].res_saved = 0;
} else if (i == XC_FREE_BLOCKS) {
resblks = xfs_default_resblks(mp);
} else {
resblks = 0;
}
Because otherwise we can end up "restoring" 8192 extents into
XC_FREE_RTEXTENTS even though we don't actually have reserved free
rtextents yet. I /think/ this fixes the frextents accounting errors
that I saw while trying to bisect to figure out the fdblocks accounting
errors.
--D
> + xfs_reserve_blocks(mp, i, resblks);
> + }
> }
>
> /*
> --
> 2.45.2
>
>
next prev parent reply other threads:[~2025-02-25 18:05 UTC|newest]
Thread overview: 61+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-18 8:10 support for zoned devices v3 Christoph Hellwig
2025-02-18 8:10 ` [PATCH 01/45] xfs: reflow xfs_dec_freecounter Christoph Hellwig
2025-02-18 8:10 ` [PATCH 02/45] xfs: generalize the freespace and reserved blocks handling Christoph Hellwig
2025-02-19 22:11 ` Darrick J. Wong
2025-02-18 8:10 ` [PATCH 03/45] xfs: support reserved blocks for the rt extent counter Christoph Hellwig
2025-02-25 18:05 ` Darrick J. Wong [this message]
2025-02-26 1:20 ` Christoph Hellwig
2025-02-18 8:10 ` [PATCH 04/45] xfs: trace in-memory freecounter reservations Christoph Hellwig
2025-02-18 8:10 ` [PATCH 05/45] xfs: preserve RT reservations across remounts Christoph Hellwig
2025-02-18 8:10 ` [PATCH 06/45] xfs: fixup the metabtree reservation in xrep_reap_metadir_fsblocks Christoph Hellwig
2025-02-19 18:41 ` Darrick J. Wong
2025-02-18 8:10 ` [PATCH 07/45] xfs: make metabtree reservations global Christoph Hellwig
2025-02-19 18:44 ` Darrick J. Wong
2025-02-18 8:10 ` [PATCH 08/45] xfs: reduce metafile reservations Christoph Hellwig
2025-02-18 8:10 ` [PATCH 09/45] xfs: factor out a xfs_rt_check_size helper Christoph Hellwig
2025-02-18 8:10 ` [PATCH 10/45] xfs: add a rtg_blocks helper Christoph Hellwig
2025-02-18 8:10 ` [PATCH 11/45] xfs: move xfs_bmapi_reserve_delalloc to xfs_iomap.c Christoph Hellwig
2025-02-19 21:47 ` Darrick J. Wong
2025-02-18 8:10 ` [PATCH 12/45] xfs: skip always_cow inodes in xfs_reflink_trim_around_shared Christoph Hellwig
2025-02-18 8:10 ` [PATCH 13/45] xfs: refine the unaligned check for always COW inodes in xfs_file_dio_write Christoph Hellwig
2025-02-18 8:10 ` [PATCH 14/45] xfs: support XFS_BMAPI_REMAP in xfs_bmap_del_extent_delay Christoph Hellwig
2025-02-18 8:10 ` [PATCH 15/45] xfs: add a xfs_rtrmap_highest_rgbno helper Christoph Hellwig
2025-02-18 8:10 ` [PATCH 16/45] xfs: define the zoned on-disk format Christoph Hellwig
2025-02-18 8:10 ` [PATCH 17/45] xfs: allow internal RT devices for zoned mode Christoph Hellwig
2025-02-18 8:10 ` [PATCH 18/45] xfs: export zoned geometry via XFS_FSOP_GEOM Christoph Hellwig
2025-02-18 8:10 ` [PATCH 19/45] xfs: disable sb_frextents for zoned file systems Christoph Hellwig
2025-02-18 8:10 ` [PATCH 20/45] xfs: disable FITRIM for zoned RT devices Christoph Hellwig
2025-02-18 8:10 ` [PATCH 21/45] xfs: don't call xfs_can_free_eofblocks from ->release for zoned inodes Christoph Hellwig
2025-02-18 8:10 ` [PATCH 22/45] xfs: skip zoned RT inodes in xfs_inodegc_want_queue_rt_file Christoph Hellwig
2025-02-18 8:10 ` [PATCH 23/45] xfs: parse and validate hardware zone information Christoph Hellwig
2025-02-18 8:10 ` [PATCH 24/45] xfs: add the zoned space allocator Christoph Hellwig
2025-02-19 21:58 ` Darrick J. Wong
2025-02-20 6:17 ` Christoph Hellwig
2025-02-18 8:10 ` [PATCH 25/45] xfs: add support for zoned space reservations Christoph Hellwig
2025-02-19 22:00 ` Darrick J. Wong
2025-02-18 8:10 ` [PATCH 26/45] xfs: implement zoned garbage collection Christoph Hellwig
2025-02-19 22:02 ` Darrick J. Wong
2025-02-18 8:10 ` [PATCH 27/45] xfs: implement buffered writes to zoned RT devices Christoph Hellwig
2025-02-19 21:47 ` Darrick J. Wong
2025-02-20 6:16 ` Christoph Hellwig
2025-02-20 16:57 ` Darrick J. Wong
2025-02-25 17:59 ` Darrick J. Wong
2025-02-18 8:10 ` [PATCH 28/45] xfs: implement direct " Christoph Hellwig
2025-02-18 8:10 ` [PATCH 29/45] xfs: wire up zoned block freeing in xfs_rtextent_free_finish_item Christoph Hellwig
2025-02-18 8:10 ` [PATCH 30/45] xfs: hide reserved RT blocks from statfs Christoph Hellwig
2025-02-18 8:10 ` [PATCH 31/45] xfs: support growfs on zoned file systems Christoph Hellwig
2025-02-18 8:10 ` [PATCH 32/45] xfs: allow COW forks on zoned file systems in xchk_bmap Christoph Hellwig
2025-02-18 8:10 ` [PATCH 33/45] xfs: support xchk_xref_is_used_rt_space on zoned file systems Christoph Hellwig
2025-02-18 8:10 ` [PATCH 34/45] xfs: support xrep_require_rtext_inuse " Christoph Hellwig
2025-02-18 8:10 ` [PATCH 35/45] xfs: enable fsmap reporting for internal RT devices Christoph Hellwig
2025-02-18 8:10 ` [PATCH 36/45] xfs: disable reflink for zoned file systems Christoph Hellwig
2025-02-18 8:10 ` [PATCH 37/45] xfs: disable rt quotas " Christoph Hellwig
2025-02-18 8:10 ` [PATCH 38/45] xfs: enable the zoned RT device feature Christoph Hellwig
2025-02-18 8:10 ` [PATCH 39/45] xfs: support zone gaps Christoph Hellwig
2025-02-18 8:10 ` [PATCH 40/45] xfs: add a max_open_zones mount option Christoph Hellwig
2025-02-18 8:10 ` [PATCH 41/45] xfs: support write life time based data placement Christoph Hellwig
2025-02-19 18:49 ` Darrick J. Wong
2025-02-18 8:10 ` [PATCH 42/45] xfs: wire up the show_stats super operation Christoph Hellwig
2025-02-18 8:10 ` [PATCH 43/45] xfs: export zone stats in /proc/*/mountstats Christoph Hellwig
2025-02-18 8:10 ` [PATCH 44/45] xfs: contain more sysfs code in xfs_sysfs.c Christoph Hellwig
2025-02-18 8:10 ` [PATCH 45/45] xfs: export max_open_zones in sysfs Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250225180556.GI6242@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=cem@kernel.org \
--cc=hans.holmberg@wdc.com \
--cc=hch@lst.de \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox