* [PATCH 1/5] xfs: use kfree_rcu_mightsleep to free the perag structures
2024-08-21 6:38 convert XFS perag lookup to xarrays v2 Christoph Hellwig
@ 2024-08-21 6:38 ` Christoph Hellwig
2024-08-21 16:19 ` Darrick J. Wong
2024-08-21 6:38 ` [PATCH 2/5] xfs: move the tagged perag lookup helpers to xfs_icache.c Christoph Hellwig
` (3 subsequent siblings)
4 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-08-21 6:38 UTC (permalink / raw)
To: Chandan Babu R, Matthew Wilcox
Cc: Darrick J. Wong, Andrew Morton, linux-xfs, linux-kernel,
linux-fsdevel
Using the kfree_rcu_mightsleep is simpler and removes the need for a
rcu_head in the perag structure.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/libxfs/xfs_ag.c | 12 +-----------
fs/xfs/libxfs/xfs_ag.h | 3 ---
2 files changed, 1 insertion(+), 14 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 7e80732cb54708..4b5a39a83f7aed 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -235,16 +235,6 @@ xfs_initialize_perag_data(
return error;
}
-STATIC void
-__xfs_free_perag(
- struct rcu_head *head)
-{
- struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
-
- ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
- kfree(pag);
-}
-
/*
* Free up the per-ag resources associated with the mount structure.
*/
@@ -270,7 +260,7 @@ xfs_free_perag(
xfs_perag_rele(pag);
XFS_IS_CORRUPT(pag->pag_mount,
atomic_read(&pag->pag_active_ref) != 0);
- call_rcu(&pag->rcu_head, __xfs_free_perag);
+ kfree_rcu_mightsleep(pag);
}
}
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 35de09a2516c70..d62c266c0b44d5 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -63,9 +63,6 @@ struct xfs_perag {
/* Blocks reserved for the reverse mapping btree. */
struct xfs_ag_resv pag_rmapbt_resv;
- /* for rcu-safe freeing */
- struct rcu_head rcu_head;
-
/* Precalculated geometry info */
xfs_agblock_t block_count;
xfs_agblock_t min_block;
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCH 1/5] xfs: use kfree_rcu_mightsleep to free the perag structures
2024-08-21 6:38 ` [PATCH 1/5] xfs: use kfree_rcu_mightsleep to free the perag structures Christoph Hellwig
@ 2024-08-21 16:19 ` Darrick J. Wong
2024-08-22 3:42 ` Christoph Hellwig
0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2024-08-21 16:19 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Chandan Babu R, Matthew Wilcox, Andrew Morton, linux-xfs,
linux-kernel, linux-fsdevel
On Wed, Aug 21, 2024 at 08:38:28AM +0200, Christoph Hellwig wrote:
> Using the kfree_rcu_mightsleep is simpler and removes the need for a
> rcu_head in the perag structure.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/xfs/libxfs/xfs_ag.c | 12 +-----------
> fs/xfs/libxfs/xfs_ag.h | 3 ---
> 2 files changed, 1 insertion(+), 14 deletions(-)
>
> diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
> index 7e80732cb54708..4b5a39a83f7aed 100644
> --- a/fs/xfs/libxfs/xfs_ag.c
> +++ b/fs/xfs/libxfs/xfs_ag.c
> @@ -235,16 +235,6 @@ xfs_initialize_perag_data(
> return error;
> }
>
> -STATIC void
> -__xfs_free_perag(
> - struct rcu_head *head)
> -{
> - struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
> -
> - ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
> - kfree(pag);
> -}
> -
> /*
> * Free up the per-ag resources associated with the mount structure.
> */
> @@ -270,7 +260,7 @@ xfs_free_perag(
> xfs_perag_rele(pag);
> XFS_IS_CORRUPT(pag->pag_mount,
> atomic_read(&pag->pag_active_ref) != 0);
> - call_rcu(&pag->rcu_head, __xfs_free_perag);
> + kfree_rcu_mightsleep(pag);
I started wondering, have you seen any complaints from might_sleep when
freeing pags after a failed growfs? Then I wondered if growfs_data
could actually take any locks that would prevent sleeping, which led me
to another question: why do growfs_{data,log} hold m_growlock but
growfs_rt doesn't? Is that actually safe?
I think the kfree_rcu_mightsleep conversion is ok, but I want to see if
anything blows up with the rtgroups variant.
--D
> }
> }
>
> diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
> index 35de09a2516c70..d62c266c0b44d5 100644
> --- a/fs/xfs/libxfs/xfs_ag.h
> +++ b/fs/xfs/libxfs/xfs_ag.h
> @@ -63,9 +63,6 @@ struct xfs_perag {
> /* Blocks reserved for the reverse mapping btree. */
> struct xfs_ag_resv pag_rmapbt_resv;
>
> - /* for rcu-safe freeing */
> - struct rcu_head rcu_head;
> -
> /* Precalculated geometry info */
> xfs_agblock_t block_count;
> xfs_agblock_t min_block;
> --
> 2.43.0
>
>
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH 1/5] xfs: use kfree_rcu_mightsleep to free the perag structures
2024-08-21 16:19 ` Darrick J. Wong
@ 2024-08-22 3:42 ` Christoph Hellwig
0 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2024-08-22 3:42 UTC (permalink / raw)
To: Darrick J. Wong
Cc: Christoph Hellwig, Chandan Babu R, Matthew Wilcox, Andrew Morton,
linux-xfs, linux-kernel, linux-fsdevel
On Wed, Aug 21, 2024 at 09:19:39AM -0700, Darrick J. Wong wrote:
> I started wondering, have you seen any complaints from might_sleep when
> freeing pags after a failed growfs?
No, why would I? We're not freeing perags with a spinlock held there.
> Then I wondered if growfs_data
> could actually take any locks that would prevent sleeping, which led me
> to another question: why do growfs_{data,log} hold m_growlock but
> growfs_rt doesn't? Is that actually safe?
As far as I can tell growfs_rt is missing a m_growlock critical section
and right now we allow parallel calls to growfs_rt, which could lead
to unexpected results.
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 2/5] xfs: move the tagged perag lookup helpers to xfs_icache.c
2024-08-21 6:38 convert XFS perag lookup to xarrays v2 Christoph Hellwig
2024-08-21 6:38 ` [PATCH 1/5] xfs: use kfree_rcu_mightsleep to free the perag structures Christoph Hellwig
@ 2024-08-21 6:38 ` Christoph Hellwig
2024-08-21 16:34 ` Darrick J. Wong
2024-08-21 6:38 ` [PATCH 3/5] xfs: simplify tagged perag iteration Christoph Hellwig
` (2 subsequent siblings)
4 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-08-21 6:38 UTC (permalink / raw)
To: Chandan Babu R, Matthew Wilcox
Cc: Darrick J. Wong, Andrew Morton, linux-xfs, linux-kernel,
linux-fsdevel
The tagged perag helpers are only used in xfs_icache.c in the kernel code
and not at all in xfsprogs. Move them to xfs_icache.c in preparation for
switching to an xarray, for which I have no plan to implement the tagged
lookup functions for userspace.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/libxfs/xfs_ag.c | 51 -------------------------------------
fs/xfs/libxfs/xfs_ag.h | 11 --------
fs/xfs/xfs_icache.c | 58 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 58 insertions(+), 62 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 4b5a39a83f7aed..87f00f0180846f 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -56,31 +56,6 @@ xfs_perag_get(
return pag;
}
-/*
- * search from @first to find the next perag with the given tag set.
- */
-struct xfs_perag *
-xfs_perag_get_tag(
- struct xfs_mount *mp,
- xfs_agnumber_t first,
- unsigned int tag)
-{
- struct xfs_perag *pag;
- int found;
-
- rcu_read_lock();
- found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
- (void **)&pag, first, 1, tag);
- if (found <= 0) {
- rcu_read_unlock();
- return NULL;
- }
- trace_xfs_perag_get_tag(pag, _RET_IP_);
- atomic_inc(&pag->pag_ref);
- rcu_read_unlock();
- return pag;
-}
-
/* Get a passive reference to the given perag. */
struct xfs_perag *
xfs_perag_hold(
@@ -127,32 +102,6 @@ xfs_perag_grab(
return pag;
}
-/*
- * search from @first to find the next perag with the given tag set.
- */
-struct xfs_perag *
-xfs_perag_grab_tag(
- struct xfs_mount *mp,
- xfs_agnumber_t first,
- int tag)
-{
- struct xfs_perag *pag;
- int found;
-
- rcu_read_lock();
- found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
- (void **)&pag, first, 1, tag);
- if (found <= 0) {
- rcu_read_unlock();
- return NULL;
- }
- trace_xfs_perag_grab_tag(pag, _RET_IP_);
- if (!atomic_inc_not_zero(&pag->pag_active_ref))
- pag = NULL;
- rcu_read_unlock();
- return pag;
-}
-
void
xfs_perag_rele(
struct xfs_perag *pag)
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index d62c266c0b44d5..d9cccd093b60e0 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -153,15 +153,11 @@ void xfs_free_perag(struct xfs_mount *mp);
/* Passive AG references */
struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
-struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
- unsigned int tag);
struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag);
void xfs_perag_put(struct xfs_perag *pag);
/* Active AG references */
struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t);
-struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *, xfs_agnumber_t,
- int tag);
void xfs_perag_rele(struct xfs_perag *pag);
/*
@@ -263,13 +259,6 @@ xfs_perag_next(
(agno) = 0; \
for_each_perag_from((mp), (agno), (pag))
-#define for_each_perag_tag(mp, agno, pag, tag) \
- for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \
- (pag) != NULL; \
- (agno) = (pag)->pag_agno + 1, \
- xfs_perag_rele(pag), \
- (pag) = xfs_perag_grab_tag((mp), (agno), (tag)))
-
static inline struct xfs_perag *
xfs_perag_next_wrap(
struct xfs_perag *pag,
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index cf629302d48e74..ac604640d36229 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -292,6 +292,64 @@ xfs_perag_clear_inode_tag(
trace_xfs_perag_clear_inode_tag(pag, _RET_IP_);
}
+/*
+ * Search from @first to find the next perag with the given tag set.
+ */
+static struct xfs_perag *
+xfs_perag_get_tag(
+ struct xfs_mount *mp,
+ xfs_agnumber_t first,
+ unsigned int tag)
+{
+ struct xfs_perag *pag;
+ int found;
+
+ rcu_read_lock();
+ found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+ (void **)&pag, first, 1, tag);
+ if (found <= 0) {
+ rcu_read_unlock();
+ return NULL;
+ }
+ trace_xfs_perag_get_tag(pag, _RET_IP_);
+ atomic_inc(&pag->pag_ref);
+ rcu_read_unlock();
+ return pag;
+}
+
+/*
+ * Search from @first to find the next perag with the given tag set.
+ */
+static struct xfs_perag *
+xfs_perag_grab_tag(
+ struct xfs_mount *mp,
+ xfs_agnumber_t first,
+ int tag)
+{
+ struct xfs_perag *pag;
+ int found;
+
+ rcu_read_lock();
+ found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+ (void **)&pag, first, 1, tag);
+ if (found <= 0) {
+ rcu_read_unlock();
+ return NULL;
+ }
+ trace_xfs_perag_grab_tag(pag, _RET_IP_);
+ if (!atomic_inc_not_zero(&pag->pag_active_ref))
+ pag = NULL;
+ rcu_read_unlock();
+ return pag;
+}
+
+#define for_each_perag_tag(mp, agno, pag, tag) \
+ for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \
+ (pag) != NULL; \
+ (agno) = (pag)->pag_agno + 1, \
+ xfs_perag_rele(pag), \
+ (pag) = xfs_perag_grab_tag((mp), (agno), (tag)))
+
/*
* When we recycle a reclaimable inode, we need to re-initialise the VFS inode
* part of the structure. This is made more complex by the fact we store
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCH 2/5] xfs: move the tagged perag lookup helpers to xfs_icache.c
2024-08-21 6:38 ` [PATCH 2/5] xfs: move the tagged perag lookup helpers to xfs_icache.c Christoph Hellwig
@ 2024-08-21 16:34 ` Darrick J. Wong
2024-08-22 3:47 ` Christoph Hellwig
2024-08-28 6:28 ` Christoph Hellwig
0 siblings, 2 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-08-21 16:34 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Chandan Babu R, Matthew Wilcox, Andrew Morton, linux-xfs,
linux-kernel, linux-fsdevel
On Wed, Aug 21, 2024 at 08:38:29AM +0200, Christoph Hellwig wrote:
> The tagged perag helpers are only used in xfs_icache.c in the kernel code
> and not at all in xfsprogs. Move them to xfs_icache.c in preparation for
> switching to an xarray, for which I have no plan to implement the tagged
> lookup functions for userspace.
I don't particularly like moving these functions to another file, but I
suppose the icache is the only user of these tags. How hard is it to
make userspace stubs that assert if anyone ever tries to use it?
--D
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/xfs/libxfs/xfs_ag.c | 51 -------------------------------------
> fs/xfs/libxfs/xfs_ag.h | 11 --------
> fs/xfs/xfs_icache.c | 58 ++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 58 insertions(+), 62 deletions(-)
>
> diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
> index 4b5a39a83f7aed..87f00f0180846f 100644
> --- a/fs/xfs/libxfs/xfs_ag.c
> +++ b/fs/xfs/libxfs/xfs_ag.c
> @@ -56,31 +56,6 @@ xfs_perag_get(
> return pag;
> }
>
> -/*
> - * search from @first to find the next perag with the given tag set.
> - */
> -struct xfs_perag *
> -xfs_perag_get_tag(
> - struct xfs_mount *mp,
> - xfs_agnumber_t first,
> - unsigned int tag)
> -{
> - struct xfs_perag *pag;
> - int found;
> -
> - rcu_read_lock();
> - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
> - (void **)&pag, first, 1, tag);
> - if (found <= 0) {
> - rcu_read_unlock();
> - return NULL;
> - }
> - trace_xfs_perag_get_tag(pag, _RET_IP_);
> - atomic_inc(&pag->pag_ref);
> - rcu_read_unlock();
> - return pag;
> -}
> -
> /* Get a passive reference to the given perag. */
> struct xfs_perag *
> xfs_perag_hold(
> @@ -127,32 +102,6 @@ xfs_perag_grab(
> return pag;
> }
>
> -/*
> - * search from @first to find the next perag with the given tag set.
> - */
> -struct xfs_perag *
> -xfs_perag_grab_tag(
> - struct xfs_mount *mp,
> - xfs_agnumber_t first,
> - int tag)
> -{
> - struct xfs_perag *pag;
> - int found;
> -
> - rcu_read_lock();
> - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
> - (void **)&pag, first, 1, tag);
> - if (found <= 0) {
> - rcu_read_unlock();
> - return NULL;
> - }
> - trace_xfs_perag_grab_tag(pag, _RET_IP_);
> - if (!atomic_inc_not_zero(&pag->pag_active_ref))
> - pag = NULL;
> - rcu_read_unlock();
> - return pag;
> -}
> -
> void
> xfs_perag_rele(
> struct xfs_perag *pag)
> diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
> index d62c266c0b44d5..d9cccd093b60e0 100644
> --- a/fs/xfs/libxfs/xfs_ag.h
> +++ b/fs/xfs/libxfs/xfs_ag.h
> @@ -153,15 +153,11 @@ void xfs_free_perag(struct xfs_mount *mp);
>
> /* Passive AG references */
> struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
> -struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
> - unsigned int tag);
> struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag);
> void xfs_perag_put(struct xfs_perag *pag);
>
> /* Active AG references */
> struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t);
> -struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *, xfs_agnumber_t,
> - int tag);
> void xfs_perag_rele(struct xfs_perag *pag);
>
> /*
> @@ -263,13 +259,6 @@ xfs_perag_next(
> (agno) = 0; \
> for_each_perag_from((mp), (agno), (pag))
>
> -#define for_each_perag_tag(mp, agno, pag, tag) \
> - for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \
> - (pag) != NULL; \
> - (agno) = (pag)->pag_agno + 1, \
> - xfs_perag_rele(pag), \
> - (pag) = xfs_perag_grab_tag((mp), (agno), (tag)))
> -
> static inline struct xfs_perag *
> xfs_perag_next_wrap(
> struct xfs_perag *pag,
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index cf629302d48e74..ac604640d36229 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -292,6 +292,64 @@ xfs_perag_clear_inode_tag(
> trace_xfs_perag_clear_inode_tag(pag, _RET_IP_);
> }
>
> +/*
> + * Search from @first to find the next perag with the given tag set.
> + */
> +static struct xfs_perag *
> +xfs_perag_get_tag(
> + struct xfs_mount *mp,
> + xfs_agnumber_t first,
> + unsigned int tag)
> +{
> + struct xfs_perag *pag;
> + int found;
> +
> + rcu_read_lock();
> + found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
> + (void **)&pag, first, 1, tag);
> + if (found <= 0) {
> + rcu_read_unlock();
> + return NULL;
> + }
> + trace_xfs_perag_get_tag(pag, _RET_IP_);
> + atomic_inc(&pag->pag_ref);
> + rcu_read_unlock();
> + return pag;
> +}
> +
> +/*
> + * Search from @first to find the next perag with the given tag set.
> + */
> +static struct xfs_perag *
> +xfs_perag_grab_tag(
> + struct xfs_mount *mp,
> + xfs_agnumber_t first,
> + int tag)
> +{
> + struct xfs_perag *pag;
> + int found;
> +
> + rcu_read_lock();
> + found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
> + (void **)&pag, first, 1, tag);
> + if (found <= 0) {
> + rcu_read_unlock();
> + return NULL;
> + }
> + trace_xfs_perag_grab_tag(pag, _RET_IP_);
> + if (!atomic_inc_not_zero(&pag->pag_active_ref))
> + pag = NULL;
> + rcu_read_unlock();
> + return pag;
> +}
> +
> +#define for_each_perag_tag(mp, agno, pag, tag) \
> + for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \
> + (pag) != NULL; \
> + (agno) = (pag)->pag_agno + 1, \
> + xfs_perag_rele(pag), \
> + (pag) = xfs_perag_grab_tag((mp), (agno), (tag)))
> +
> /*
> * When we recycle a reclaimable inode, we need to re-initialise the VFS inode
> * part of the structure. This is made more complex by the fact we store
> --
> 2.43.0
>
>
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH 2/5] xfs: move the tagged perag lookup helpers to xfs_icache.c
2024-08-21 16:34 ` Darrick J. Wong
@ 2024-08-22 3:47 ` Christoph Hellwig
2024-08-28 6:28 ` Christoph Hellwig
1 sibling, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2024-08-22 3:47 UTC (permalink / raw)
To: Darrick J. Wong
Cc: Christoph Hellwig, Chandan Babu R, Matthew Wilcox, Andrew Morton,
linux-xfs, linux-kernel, linux-fsdevel
On Wed, Aug 21, 2024 at 09:34:07AM -0700, Darrick J. Wong wrote:
> On Wed, Aug 21, 2024 at 08:38:29AM +0200, Christoph Hellwig wrote:
> > The tagged perag helpers are only used in xfs_icache.c in the kernel code
> > and not at all in xfsprogs. Move them to xfs_icache.c in preparation for
> > switching to an xarray, for which I have no plan to implement the tagged
> > lookup functions for userspace.
>
> I don't particularly like moving these functions to another file, but I
> suppose the icache is the only user of these tags. How hard is it to
> make userspace stubs that assert if anyone ever tries to use it?
It might be easier to just implement them in that case like the underlying
radix tree ones. But given that they are unused I'd feel rather
uncomfortable about it. And more importantly I like to have the
function (only one is left by the end) close to the callers as that makes
reading and understanding the code easier.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 2/5] xfs: move the tagged perag lookup helpers to xfs_icache.c
2024-08-21 16:34 ` Darrick J. Wong
2024-08-22 3:47 ` Christoph Hellwig
@ 2024-08-28 6:28 ` Christoph Hellwig
2024-08-28 16:10 ` Darrick J. Wong
1 sibling, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-08-28 6:28 UTC (permalink / raw)
To: Darrick J. Wong
Cc: Christoph Hellwig, Chandan Babu R, Matthew Wilcox, Andrew Morton,
linux-xfs, linux-kernel, linux-fsdevel
On Wed, Aug 21, 2024 at 09:34:07AM -0700, Darrick J. Wong wrote:
> I don't particularly like moving these functions to another file, but I
> suppose the icache is the only user of these tags. How hard is it to
> make userspace stubs that assert if anyone ever tries to use it?
I looked into not moving them, but the annoying thing is that we then
need to make the ici_tag_to_mark helper added later and the marks
global. Unless this is a blocker for you I'd much prefer to just
keep all the tag/mark logic contained in icache.c for now. Things
might change a bit if/when we do the generic xfs_group and also use
tags for garbage collection of zoned rtgs, but I'd rather build the
right abstraction when we get to that. That will probably also
include sorting out the current mess with the ICI vs IWALK flags.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 2/5] xfs: move the tagged perag lookup helpers to xfs_icache.c
2024-08-28 6:28 ` Christoph Hellwig
@ 2024-08-28 16:10 ` Darrick J. Wong
2024-08-29 3:48 ` Christoph Hellwig
0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2024-08-28 16:10 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Christoph Hellwig, Chandan Babu R, Matthew Wilcox, Andrew Morton,
linux-xfs, linux-kernel, linux-fsdevel
On Tue, Aug 27, 2024 at 11:28:48PM -0700, Christoph Hellwig wrote:
> On Wed, Aug 21, 2024 at 09:34:07AM -0700, Darrick J. Wong wrote:
> > I don't particularly like moving these functions to another file, but I
> > suppose the icache is the only user of these tags. How hard is it to
> > make userspace stubs that assert if anyone ever tries to use it?
>
> I looked into not moving them, but the annoying thing is that we then
> need to make the ici_tag_to_mark helper added later and the marks
> global. Unless this is a blocker for you I'd much prefer to just
> keep all the tag/mark logic contained in icache.c for now. Things
> might change a bit if/when we do the generic xfs_group and also use
> tags for garbage collection of zoned rtgs, but I'd rather build the
> right abstraction when we get to that. That will probably also
> include sorting out the current mess with the ICI vs IWALK flags.
Or converting pag_ici_root to an xarray, and then we can make all of
them use the same mark symbols. <shrug>
--D
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 2/5] xfs: move the tagged perag lookup helpers to xfs_icache.c
2024-08-28 16:10 ` Darrick J. Wong
@ 2024-08-29 3:48 ` Christoph Hellwig
0 siblings, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2024-08-29 3:48 UTC (permalink / raw)
To: Darrick J. Wong
Cc: Christoph Hellwig, Christoph Hellwig, Chandan Babu R,
Matthew Wilcox, Andrew Morton, linux-xfs, linux-kernel,
linux-fsdevel
On Wed, Aug 28, 2024 at 09:10:04AM -0700, Darrick J. Wong wrote:
> > tags for garbage collection of zoned rtgs, but I'd rather build the
> > right abstraction when we get to that. That will probably also
> > include sorting out the current mess with the ICI vs IWALK flags.
>
> Or converting pag_ici_root to an xarray, and then we can make all of
> them use the same mark symbols. <shrug>
Maybe we could, but someone intentionally separated them out (and than
someone, not sure if the same persons were involved, was very sloppy
about the separation) so I'll need to look a bit more at the history.
And maybe think about a better way of doing this.
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 3/5] xfs: simplify tagged perag iteration
2024-08-21 6:38 convert XFS perag lookup to xarrays v2 Christoph Hellwig
2024-08-21 6:38 ` [PATCH 1/5] xfs: use kfree_rcu_mightsleep to free the perag structures Christoph Hellwig
2024-08-21 6:38 ` [PATCH 2/5] xfs: move the tagged perag lookup helpers to xfs_icache.c Christoph Hellwig
@ 2024-08-21 6:38 ` Christoph Hellwig
2024-08-21 16:30 ` Darrick J. Wong
2024-08-21 6:38 ` [PATCH 4/5] xfs: convert perag lookup to xarray Christoph Hellwig
2024-08-21 6:38 ` [PATCH 5/5] xfs: use xas_for_each_marked in xfs_reclaim_inodes_count Christoph Hellwig
4 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-08-21 6:38 UTC (permalink / raw)
To: Chandan Babu R, Matthew Wilcox
Cc: Darrick J. Wong, Andrew Morton, linux-xfs, linux-kernel,
linux-fsdevel
Pass the old perag structure to the tagged loop helpers so that they can
grab the old agno before releasing the reference. This removes the need
to separately track the agno and the iterator macro, and thus also
obsoletes the for_each_perag_tag syntactic sugar.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_icache.c | 69 +++++++++++++++++++++------------------------
fs/xfs/xfs_trace.h | 4 +--
2 files changed, 34 insertions(+), 39 deletions(-)
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index ac604640d36229..4d71fbfe71299a 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -296,60 +296,63 @@ xfs_perag_clear_inode_tag(
* Search from @first to find the next perag with the given tag set.
*/
static struct xfs_perag *
-xfs_perag_get_tag(
+xfs_perag_get_next_tag(
struct xfs_mount *mp,
- xfs_agnumber_t first,
+ struct xfs_perag *pag,
unsigned int tag)
{
- struct xfs_perag *pag;
+ unsigned long index = 0;
int found;
+ if (pag) {
+ index = pag->pag_agno + 1;
+ xfs_perag_rele(pag);
+ }
+
rcu_read_lock();
found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
- (void **)&pag, first, 1, tag);
+ (void **)&pag, index, 1, tag);
if (found <= 0) {
rcu_read_unlock();
return NULL;
}
- trace_xfs_perag_get_tag(pag, _RET_IP_);
+ trace_xfs_perag_get_next_tag(pag, _RET_IP_);
atomic_inc(&pag->pag_ref);
rcu_read_unlock();
return pag;
}
/*
- * Search from @first to find the next perag with the given tag set.
+ * Find the next AG after @pag, or the first AG if @pag is NULL.
*/
static struct xfs_perag *
-xfs_perag_grab_tag(
+xfs_perag_grab_next_tag(
struct xfs_mount *mp,
- xfs_agnumber_t first,
+ struct xfs_perag *pag,
int tag)
{
- struct xfs_perag *pag;
+ unsigned long index = 0;
int found;
+ if (pag) {
+ index = pag->pag_agno + 1;
+ xfs_perag_rele(pag);
+ }
+
rcu_read_lock();
found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
- (void **)&pag, first, 1, tag);
+ (void **)&pag, index, 1, tag);
if (found <= 0) {
rcu_read_unlock();
return NULL;
}
- trace_xfs_perag_grab_tag(pag, _RET_IP_);
+ trace_xfs_perag_grab_next_tag(pag, _RET_IP_);
if (!atomic_inc_not_zero(&pag->pag_active_ref))
pag = NULL;
rcu_read_unlock();
return pag;
}
-#define for_each_perag_tag(mp, agno, pag, tag) \
- for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \
- (pag) != NULL; \
- (agno) = (pag)->pag_agno + 1, \
- xfs_perag_rele(pag), \
- (pag) = xfs_perag_grab_tag((mp), (agno), (tag)))
-
/*
* When we recycle a reclaimable inode, we need to re-initialise the VFS inode
* part of the structure. This is made more complex by the fact we store
@@ -1077,15 +1080,11 @@ long
xfs_reclaim_inodes_count(
struct xfs_mount *mp)
{
- struct xfs_perag *pag;
- xfs_agnumber_t ag = 0;
+ struct xfs_perag *pag = NULL;
long reclaimable = 0;
- while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
- ag = pag->pag_agno + 1;
+ while ((pag = xfs_perag_get_next_tag(mp, pag, XFS_ICI_RECLAIM_TAG)))
reclaimable += pag->pag_ici_reclaimable;
- xfs_perag_put(pag);
- }
return reclaimable;
}
@@ -1427,14 +1426,13 @@ void
xfs_blockgc_start(
struct xfs_mount *mp)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
if (xfs_set_blockgc_enabled(mp))
return;
trace_xfs_blockgc_start(mp, __return_address);
- for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
+ while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG)))
xfs_blockgc_queue(pag);
}
@@ -1550,21 +1548,19 @@ int
xfs_blockgc_flush_all(
struct xfs_mount *mp)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
trace_xfs_blockgc_flush_all(mp, __return_address);
/*
- * For each blockgc worker, move its queue time up to now. If it
- * wasn't queued, it will not be requeued. Then flush whatever's
- * left.
+ * For each blockgc worker, move its queue time up to now. If it wasn't
+ * queued, it will not be requeued. Then flush whatever is left.
*/
- for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
+ while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG)))
mod_delayed_work(pag->pag_mount->m_blockgc_wq,
&pag->pag_blockgc_work, 0);
- for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
+ while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG)))
flush_delayed_work(&pag->pag_blockgc_work);
return xfs_inodegc_flush(mp);
@@ -1810,12 +1806,11 @@ xfs_icwalk(
enum xfs_icwalk_goal goal,
struct xfs_icwalk *icw)
{
- struct xfs_perag *pag;
+ struct xfs_perag *pag = NULL;
int error = 0;
int last_error = 0;
- xfs_agnumber_t agno;
- for_each_perag_tag(mp, agno, pag, goal) {
+ while ((pag = xfs_perag_grab_next_tag(mp, pag, goal))) {
error = xfs_icwalk_ag(pag, goal, icw);
if (error) {
last_error = error;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 180ce697305a92..002d012ebd83cb 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -210,11 +210,11 @@ DEFINE_EVENT(xfs_perag_class, name, \
TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), \
TP_ARGS(pag, caller_ip))
DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_next_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_hold);
DEFINE_PERAG_REF_EVENT(xfs_perag_put);
DEFINE_PERAG_REF_EVENT(xfs_perag_grab);
-DEFINE_PERAG_REF_EVENT(xfs_perag_grab_tag);
+DEFINE_PERAG_REF_EVENT(xfs_perag_grab_next_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_rele);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCH 3/5] xfs: simplify tagged perag iteration
2024-08-21 6:38 ` [PATCH 3/5] xfs: simplify tagged perag iteration Christoph Hellwig
@ 2024-08-21 16:30 ` Darrick J. Wong
0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-08-21 16:30 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Chandan Babu R, Matthew Wilcox, Andrew Morton, linux-xfs,
linux-kernel, linux-fsdevel
On Wed, Aug 21, 2024 at 08:38:30AM +0200, Christoph Hellwig wrote:
> Pass the old perag structure to the tagged loop helpers so that they can
> grab the old agno before releasing the reference. This removes the need
> to separately track the agno and the iterator macro, and thus also
> obsoletes the for_each_perag_tag syntactic sugar.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/xfs/xfs_icache.c | 69 +++++++++++++++++++++------------------------
> fs/xfs/xfs_trace.h | 4 +--
> 2 files changed, 34 insertions(+), 39 deletions(-)
>
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index ac604640d36229..4d71fbfe71299a 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -296,60 +296,63 @@ xfs_perag_clear_inode_tag(
> * Search from @first to find the next perag with the given tag set.
> */
> static struct xfs_perag *
> -xfs_perag_get_tag(
> +xfs_perag_get_next_tag(
> struct xfs_mount *mp,
> - xfs_agnumber_t first,
> + struct xfs_perag *pag,
> unsigned int tag)
> {
> - struct xfs_perag *pag;
> + unsigned long index = 0;
> int found;
>
> + if (pag) {
> + index = pag->pag_agno + 1;
> + xfs_perag_rele(pag);
> + }
Please update the comment to reflect the replacement of @first with
@pag, and be sure to note that one starts the iteration by passing in
@pag == null, like you did below.
--D
> +
> rcu_read_lock();
> found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
> - (void **)&pag, first, 1, tag);
> + (void **)&pag, index, 1, tag);
> if (found <= 0) {
> rcu_read_unlock();
> return NULL;
> }
> - trace_xfs_perag_get_tag(pag, _RET_IP_);
> + trace_xfs_perag_get_next_tag(pag, _RET_IP_);
> atomic_inc(&pag->pag_ref);
> rcu_read_unlock();
> return pag;
> }
>
> /*
> - * Search from @first to find the next perag with the given tag set.
> + * Find the next AG after @pag, or the first AG if @pag is NULL.
> */
> static struct xfs_perag *
> -xfs_perag_grab_tag(
> +xfs_perag_grab_next_tag(
> struct xfs_mount *mp,
> - xfs_agnumber_t first,
> + struct xfs_perag *pag,
> int tag)
> {
> - struct xfs_perag *pag;
> + unsigned long index = 0;
> int found;
>
> + if (pag) {
> + index = pag->pag_agno + 1;
> + xfs_perag_rele(pag);
> + }
> +
> rcu_read_lock();
> found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
> - (void **)&pag, first, 1, tag);
> + (void **)&pag, index, 1, tag);
> if (found <= 0) {
> rcu_read_unlock();
> return NULL;
> }
> - trace_xfs_perag_grab_tag(pag, _RET_IP_);
> + trace_xfs_perag_grab_next_tag(pag, _RET_IP_);
> if (!atomic_inc_not_zero(&pag->pag_active_ref))
> pag = NULL;
> rcu_read_unlock();
> return pag;
> }
>
> -#define for_each_perag_tag(mp, agno, pag, tag) \
> - for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \
> - (pag) != NULL; \
> - (agno) = (pag)->pag_agno + 1, \
> - xfs_perag_rele(pag), \
> - (pag) = xfs_perag_grab_tag((mp), (agno), (tag)))
> -
> /*
> * When we recycle a reclaimable inode, we need to re-initialise the VFS inode
> * part of the structure. This is made more complex by the fact we store
> @@ -1077,15 +1080,11 @@ long
> xfs_reclaim_inodes_count(
> struct xfs_mount *mp)
> {
> - struct xfs_perag *pag;
> - xfs_agnumber_t ag = 0;
> + struct xfs_perag *pag = NULL;
> long reclaimable = 0;
>
> - while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
> - ag = pag->pag_agno + 1;
> + while ((pag = xfs_perag_get_next_tag(mp, pag, XFS_ICI_RECLAIM_TAG)))
> reclaimable += pag->pag_ici_reclaimable;
> - xfs_perag_put(pag);
> - }
> return reclaimable;
> }
>
> @@ -1427,14 +1426,13 @@ void
> xfs_blockgc_start(
> struct xfs_mount *mp)
> {
> - struct xfs_perag *pag;
> - xfs_agnumber_t agno;
> + struct xfs_perag *pag = NULL;
>
> if (xfs_set_blockgc_enabled(mp))
> return;
>
> trace_xfs_blockgc_start(mp, __return_address);
> - for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
> + while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG)))
> xfs_blockgc_queue(pag);
> }
>
> @@ -1550,21 +1548,19 @@ int
> xfs_blockgc_flush_all(
> struct xfs_mount *mp)
> {
> - struct xfs_perag *pag;
> - xfs_agnumber_t agno;
> + struct xfs_perag *pag = NULL;
>
> trace_xfs_blockgc_flush_all(mp, __return_address);
>
> /*
> - * For each blockgc worker, move its queue time up to now. If it
> - * wasn't queued, it will not be requeued. Then flush whatever's
> - * left.
> + * For each blockgc worker, move its queue time up to now. If it wasn't
> + * queued, it will not be requeued. Then flush whatever is left.
> */
> - for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
> + while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG)))
> mod_delayed_work(pag->pag_mount->m_blockgc_wq,
> &pag->pag_blockgc_work, 0);
>
> - for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
> + while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG)))
> flush_delayed_work(&pag->pag_blockgc_work);
>
> return xfs_inodegc_flush(mp);
> @@ -1810,12 +1806,11 @@ xfs_icwalk(
> enum xfs_icwalk_goal goal,
> struct xfs_icwalk *icw)
> {
> - struct xfs_perag *pag;
> + struct xfs_perag *pag = NULL;
> int error = 0;
> int last_error = 0;
> - xfs_agnumber_t agno;
>
> - for_each_perag_tag(mp, agno, pag, goal) {
> + while ((pag = xfs_perag_grab_next_tag(mp, pag, goal))) {
> error = xfs_icwalk_ag(pag, goal, icw);
> if (error) {
> last_error = error;
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 180ce697305a92..002d012ebd83cb 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -210,11 +210,11 @@ DEFINE_EVENT(xfs_perag_class, name, \
> TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), \
> TP_ARGS(pag, caller_ip))
> DEFINE_PERAG_REF_EVENT(xfs_perag_get);
> -DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
> +DEFINE_PERAG_REF_EVENT(xfs_perag_get_next_tag);
> DEFINE_PERAG_REF_EVENT(xfs_perag_hold);
> DEFINE_PERAG_REF_EVENT(xfs_perag_put);
> DEFINE_PERAG_REF_EVENT(xfs_perag_grab);
> -DEFINE_PERAG_REF_EVENT(xfs_perag_grab_tag);
> +DEFINE_PERAG_REF_EVENT(xfs_perag_grab_next_tag);
> DEFINE_PERAG_REF_EVENT(xfs_perag_rele);
> DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
> DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
> --
> 2.43.0
>
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 4/5] xfs: convert perag lookup to xarray
2024-08-21 6:38 convert XFS perag lookup to xarrays v2 Christoph Hellwig
` (2 preceding siblings ...)
2024-08-21 6:38 ` [PATCH 3/5] xfs: simplify tagged perag iteration Christoph Hellwig
@ 2024-08-21 6:38 ` Christoph Hellwig
2024-08-21 16:28 ` Darrick J. Wong
2024-08-21 6:38 ` [PATCH 5/5] xfs: use xas_for_each_marked in xfs_reclaim_inodes_count Christoph Hellwig
4 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-08-21 6:38 UTC (permalink / raw)
To: Chandan Babu R, Matthew Wilcox
Cc: Darrick J. Wong, Andrew Morton, linux-xfs, linux-kernel,
linux-fsdevel
Convert the perag lookup from the legacy radix tree to the xarray,
which allows for much nicer iteration and bulk lookup semantics.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/libxfs/xfs_ag.c | 31 +++++++-------------------
fs/xfs/xfs_icache.c | 50 +++++++++++++++++++++---------------------
fs/xfs/xfs_mount.h | 3 +--
fs/xfs/xfs_super.c | 3 +--
4 files changed, 35 insertions(+), 52 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 87f00f0180846f..5f0494702e0b55 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -46,7 +46,7 @@ xfs_perag_get(
struct xfs_perag *pag;
rcu_read_lock();
- pag = radix_tree_lookup(&mp->m_perag_tree, agno);
+ pag = xa_load(&mp->m_perags, agno);
if (pag) {
trace_xfs_perag_get(pag, _RET_IP_);
ASSERT(atomic_read(&pag->pag_ref) >= 0);
@@ -92,7 +92,7 @@ xfs_perag_grab(
struct xfs_perag *pag;
rcu_read_lock();
- pag = radix_tree_lookup(&mp->m_perag_tree, agno);
+ pag = xa_load(&mp->m_perags, agno);
if (pag) {
trace_xfs_perag_grab(pag, _RET_IP_);
if (!atomic_inc_not_zero(&pag->pag_active_ref))
@@ -195,9 +195,7 @@ xfs_free_perag(
xfs_agnumber_t agno;
for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
- spin_lock(&mp->m_perag_lock);
- pag = radix_tree_delete(&mp->m_perag_tree, agno);
- spin_unlock(&mp->m_perag_lock);
+ pag = xa_erase(&mp->m_perags, agno);
ASSERT(pag);
XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
xfs_defer_drain_free(&pag->pag_intents_drain);
@@ -286,9 +284,7 @@ xfs_free_unused_perag_range(
xfs_agnumber_t index;
for (index = agstart; index < agend; index++) {
- spin_lock(&mp->m_perag_lock);
- pag = radix_tree_delete(&mp->m_perag_tree, index);
- spin_unlock(&mp->m_perag_lock);
+ pag = xa_erase(&mp->m_perags, index);
if (!pag)
break;
xfs_buf_cache_destroy(&pag->pag_bcache);
@@ -329,20 +325,11 @@ xfs_initialize_perag(
pag->pag_agno = index;
pag->pag_mount = mp;
- error = radix_tree_preload(GFP_KERNEL | __GFP_RETRY_MAYFAIL);
- if (error)
- goto out_free_pag;
-
- spin_lock(&mp->m_perag_lock);
- if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
- WARN_ON_ONCE(1);
- spin_unlock(&mp->m_perag_lock);
- radix_tree_preload_end();
- error = -EEXIST;
+ error = xa_insert(&mp->m_perags, index, pag, GFP_KERNEL);
+ if (error) {
+ WARN_ON_ONCE(error == -EBUSY);
goto out_free_pag;
}
- spin_unlock(&mp->m_perag_lock);
- radix_tree_preload_end();
#ifdef __KERNEL__
/* Place kernel structure only init below this point. */
@@ -390,9 +377,7 @@ xfs_initialize_perag(
out_remove_pag:
xfs_defer_drain_free(&pag->pag_intents_drain);
- spin_lock(&mp->m_perag_lock);
- radix_tree_delete(&mp->m_perag_tree, index);
- spin_unlock(&mp->m_perag_lock);
+ pag = xa_erase(&mp->m_perags, index);
out_free_pag:
kfree(pag);
out_unwind_new_pags:
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 4d71fbfe71299a..5bca845e702f1d 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -65,6 +65,18 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
XFS_ICWALK_FLAG_RECLAIM_SICK | \
XFS_ICWALK_FLAG_UNION)
+/* Marks for the perag xarray */
+#define XFS_PERAG_RECLAIM_MARK XA_MARK_0
+#define XFS_PERAG_BLOCKGC_MARK XA_MARK_1
+
+static inline xa_mark_t ici_tag_to_mark(unsigned int tag)
+{
+ if (tag == XFS_ICI_RECLAIM_TAG)
+ return XFS_PERAG_RECLAIM_MARK;
+ ASSERT(tag == XFS_ICI_BLOCKGC_TAG);
+ return XFS_PERAG_BLOCKGC_MARK;
+}
+
/*
* Allocate and initialise an xfs_inode.
*/
@@ -191,7 +203,7 @@ xfs_reclaim_work_queue(
{
rcu_read_lock();
- if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+ if (xa_marked(&mp->m_perags, XFS_PERAG_RECLAIM_MARK)) {
queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
}
@@ -241,9 +253,7 @@ xfs_perag_set_inode_tag(
return;
/* propagate the tag up into the perag radix tree */
- spin_lock(&mp->m_perag_lock);
- radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, tag);
- spin_unlock(&mp->m_perag_lock);
+ xa_set_mark(&mp->m_perags, pag->pag_agno, ici_tag_to_mark(tag));
/* start background work */
switch (tag) {
@@ -285,9 +295,7 @@ xfs_perag_clear_inode_tag(
return;
/* clear the tag from the perag radix tree */
- spin_lock(&mp->m_perag_lock);
- radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, tag);
- spin_unlock(&mp->m_perag_lock);
+ xa_clear_mark(&mp->m_perags, pag->pag_agno, ici_tag_to_mark(tag));
trace_xfs_perag_clear_inode_tag(pag, _RET_IP_);
}
@@ -302,7 +310,6 @@ xfs_perag_get_next_tag(
unsigned int tag)
{
unsigned long index = 0;
- int found;
if (pag) {
index = pag->pag_agno + 1;
@@ -310,14 +317,11 @@ xfs_perag_get_next_tag(
}
rcu_read_lock();
- found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
- (void **)&pag, index, 1, tag);
- if (found <= 0) {
- rcu_read_unlock();
- return NULL;
+ pag = xa_find(&mp->m_perags, &index, ULONG_MAX, ici_tag_to_mark(tag));
+ if (pag) {
+ trace_xfs_perag_get_next_tag(pag, _RET_IP_);
+ atomic_inc(&pag->pag_ref);
}
- trace_xfs_perag_get_next_tag(pag, _RET_IP_);
- atomic_inc(&pag->pag_ref);
rcu_read_unlock();
return pag;
}
@@ -332,7 +336,6 @@ xfs_perag_grab_next_tag(
int tag)
{
unsigned long index = 0;
- int found;
if (pag) {
index = pag->pag_agno + 1;
@@ -340,15 +343,12 @@ xfs_perag_grab_next_tag(
}
rcu_read_lock();
- found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
- (void **)&pag, index, 1, tag);
- if (found <= 0) {
- rcu_read_unlock();
- return NULL;
+ pag = xa_find(&mp->m_perags, &index, ULONG_MAX, ici_tag_to_mark(tag));
+ if (pag) {
+ trace_xfs_perag_grab_next_tag(pag, _RET_IP_);
+ if (!atomic_inc_not_zero(&pag->pag_active_ref))
+ pag = NULL;
}
- trace_xfs_perag_grab_next_tag(pag, _RET_IP_);
- if (!atomic_inc_not_zero(&pag->pag_active_ref))
- pag = NULL;
rcu_read_unlock();
return pag;
}
@@ -1038,7 +1038,7 @@ xfs_reclaim_inodes(
if (xfs_want_reclaim_sick(mp))
icw.icw_flags |= XFS_ICWALK_FLAG_RECLAIM_SICK;
- while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+ while (xa_marked(&mp->m_perags, XFS_PERAG_RECLAIM_MARK)) {
xfs_ail_push_all_sync(mp->m_ail);
xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &icw);
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index d0567dfbc0368d..dce2d832e1e6d1 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -208,8 +208,7 @@ typedef struct xfs_mount {
*/
atomic64_t m_allocbt_blks;
- struct radix_tree_root m_perag_tree; /* per-ag accounting info */
- spinlock_t m_perag_lock; /* lock for m_perag_tree */
+ struct xarray m_perags; /* per-ag accounting info */
uint64_t m_resblks; /* total reserved blocks */
uint64_t m_resblks_avail;/* available reserved blocks */
uint64_t m_resblks_save; /* reserved blks @ remount,ro */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 27e9f749c4c7fc..c41b543f2e9121 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -2009,8 +2009,7 @@ static int xfs_init_fs_context(
return -ENOMEM;
spin_lock_init(&mp->m_sb_lock);
- INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
- spin_lock_init(&mp->m_perag_lock);
+ xa_init(&mp->m_perags);
mutex_init(&mp->m_growlock);
INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCH 4/5] xfs: convert perag lookup to xarray
2024-08-21 6:38 ` [PATCH 4/5] xfs: convert perag lookup to xarray Christoph Hellwig
@ 2024-08-21 16:28 ` Darrick J. Wong
2024-08-22 3:45 ` Christoph Hellwig
0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2024-08-21 16:28 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Chandan Babu R, Matthew Wilcox, Andrew Morton, linux-xfs,
linux-kernel, linux-fsdevel
On Wed, Aug 21, 2024 at 08:38:31AM +0200, Christoph Hellwig wrote:
> Convert the perag lookup from the legacy radix tree to the xarray,
> which allows for much nicer iteration and bulk lookup semantics.
Looks like a pretty straightforward covnersion. Is there a good
justification for converting the ici radix tree too? Or is it too
sparse to be worth doing?
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
--D
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/xfs/libxfs/xfs_ag.c | 31 +++++++-------------------
> fs/xfs/xfs_icache.c | 50 +++++++++++++++++++++---------------------
> fs/xfs/xfs_mount.h | 3 +--
> fs/xfs/xfs_super.c | 3 +--
> 4 files changed, 35 insertions(+), 52 deletions(-)
>
> diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
> index 87f00f0180846f..5f0494702e0b55 100644
> --- a/fs/xfs/libxfs/xfs_ag.c
> +++ b/fs/xfs/libxfs/xfs_ag.c
> @@ -46,7 +46,7 @@ xfs_perag_get(
> struct xfs_perag *pag;
>
> rcu_read_lock();
> - pag = radix_tree_lookup(&mp->m_perag_tree, agno);
> + pag = xa_load(&mp->m_perags, agno);
> if (pag) {
> trace_xfs_perag_get(pag, _RET_IP_);
> ASSERT(atomic_read(&pag->pag_ref) >= 0);
> @@ -92,7 +92,7 @@ xfs_perag_grab(
> struct xfs_perag *pag;
>
> rcu_read_lock();
> - pag = radix_tree_lookup(&mp->m_perag_tree, agno);
> + pag = xa_load(&mp->m_perags, agno);
> if (pag) {
> trace_xfs_perag_grab(pag, _RET_IP_);
> if (!atomic_inc_not_zero(&pag->pag_active_ref))
> @@ -195,9 +195,7 @@ xfs_free_perag(
> xfs_agnumber_t agno;
>
> for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> - spin_lock(&mp->m_perag_lock);
> - pag = radix_tree_delete(&mp->m_perag_tree, agno);
> - spin_unlock(&mp->m_perag_lock);
> + pag = xa_erase(&mp->m_perags, agno);
> ASSERT(pag);
> XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
> xfs_defer_drain_free(&pag->pag_intents_drain);
> @@ -286,9 +284,7 @@ xfs_free_unused_perag_range(
> xfs_agnumber_t index;
>
> for (index = agstart; index < agend; index++) {
> - spin_lock(&mp->m_perag_lock);
> - pag = radix_tree_delete(&mp->m_perag_tree, index);
> - spin_unlock(&mp->m_perag_lock);
> + pag = xa_erase(&mp->m_perags, index);
> if (!pag)
> break;
> xfs_buf_cache_destroy(&pag->pag_bcache);
> @@ -329,20 +325,11 @@ xfs_initialize_perag(
> pag->pag_agno = index;
> pag->pag_mount = mp;
>
> - error = radix_tree_preload(GFP_KERNEL | __GFP_RETRY_MAYFAIL);
> - if (error)
> - goto out_free_pag;
> -
> - spin_lock(&mp->m_perag_lock);
> - if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
> - WARN_ON_ONCE(1);
> - spin_unlock(&mp->m_perag_lock);
> - radix_tree_preload_end();
> - error = -EEXIST;
> + error = xa_insert(&mp->m_perags, index, pag, GFP_KERNEL);
> + if (error) {
> + WARN_ON_ONCE(error == -EBUSY);
> goto out_free_pag;
> }
> - spin_unlock(&mp->m_perag_lock);
> - radix_tree_preload_end();
>
> #ifdef __KERNEL__
> /* Place kernel structure only init below this point. */
> @@ -390,9 +377,7 @@ xfs_initialize_perag(
>
> out_remove_pag:
> xfs_defer_drain_free(&pag->pag_intents_drain);
> - spin_lock(&mp->m_perag_lock);
> - radix_tree_delete(&mp->m_perag_tree, index);
> - spin_unlock(&mp->m_perag_lock);
> + pag = xa_erase(&mp->m_perags, index);
> out_free_pag:
> kfree(pag);
> out_unwind_new_pags:
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index 4d71fbfe71299a..5bca845e702f1d 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -65,6 +65,18 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
> XFS_ICWALK_FLAG_RECLAIM_SICK | \
> XFS_ICWALK_FLAG_UNION)
>
> +/* Marks for the perag xarray */
> +#define XFS_PERAG_RECLAIM_MARK XA_MARK_0
> +#define XFS_PERAG_BLOCKGC_MARK XA_MARK_1
> +
> +static inline xa_mark_t ici_tag_to_mark(unsigned int tag)
> +{
> + if (tag == XFS_ICI_RECLAIM_TAG)
> + return XFS_PERAG_RECLAIM_MARK;
> + ASSERT(tag == XFS_ICI_BLOCKGC_TAG);
> + return XFS_PERAG_BLOCKGC_MARK;
> +}
> +
> /*
> * Allocate and initialise an xfs_inode.
> */
> @@ -191,7 +203,7 @@ xfs_reclaim_work_queue(
> {
>
> rcu_read_lock();
> - if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
> + if (xa_marked(&mp->m_perags, XFS_PERAG_RECLAIM_MARK)) {
> queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
> msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
> }
> @@ -241,9 +253,7 @@ xfs_perag_set_inode_tag(
> return;
>
> /* propagate the tag up into the perag radix tree */
> - spin_lock(&mp->m_perag_lock);
> - radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, tag);
> - spin_unlock(&mp->m_perag_lock);
> + xa_set_mark(&mp->m_perags, pag->pag_agno, ici_tag_to_mark(tag));
>
> /* start background work */
> switch (tag) {
> @@ -285,9 +295,7 @@ xfs_perag_clear_inode_tag(
> return;
>
> /* clear the tag from the perag radix tree */
> - spin_lock(&mp->m_perag_lock);
> - radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, tag);
> - spin_unlock(&mp->m_perag_lock);
> + xa_clear_mark(&mp->m_perags, pag->pag_agno, ici_tag_to_mark(tag));
>
> trace_xfs_perag_clear_inode_tag(pag, _RET_IP_);
> }
> @@ -302,7 +310,6 @@ xfs_perag_get_next_tag(
> unsigned int tag)
> {
> unsigned long index = 0;
> - int found;
>
> if (pag) {
> index = pag->pag_agno + 1;
> @@ -310,14 +317,11 @@ xfs_perag_get_next_tag(
> }
>
> rcu_read_lock();
> - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
> - (void **)&pag, index, 1, tag);
> - if (found <= 0) {
> - rcu_read_unlock();
> - return NULL;
> + pag = xa_find(&mp->m_perags, &index, ULONG_MAX, ici_tag_to_mark(tag));
> + if (pag) {
> + trace_xfs_perag_get_next_tag(pag, _RET_IP_);
> + atomic_inc(&pag->pag_ref);
> }
> - trace_xfs_perag_get_next_tag(pag, _RET_IP_);
> - atomic_inc(&pag->pag_ref);
> rcu_read_unlock();
> return pag;
> }
> @@ -332,7 +336,6 @@ xfs_perag_grab_next_tag(
> int tag)
> {
> unsigned long index = 0;
> - int found;
>
> if (pag) {
> index = pag->pag_agno + 1;
> @@ -340,15 +343,12 @@ xfs_perag_grab_next_tag(
> }
>
> rcu_read_lock();
> - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
> - (void **)&pag, index, 1, tag);
> - if (found <= 0) {
> - rcu_read_unlock();
> - return NULL;
> + pag = xa_find(&mp->m_perags, &index, ULONG_MAX, ici_tag_to_mark(tag));
> + if (pag) {
> + trace_xfs_perag_grab_next_tag(pag, _RET_IP_);
> + if (!atomic_inc_not_zero(&pag->pag_active_ref))
> + pag = NULL;
> }
> - trace_xfs_perag_grab_next_tag(pag, _RET_IP_);
> - if (!atomic_inc_not_zero(&pag->pag_active_ref))
> - pag = NULL;
> rcu_read_unlock();
> return pag;
> }
> @@ -1038,7 +1038,7 @@ xfs_reclaim_inodes(
> if (xfs_want_reclaim_sick(mp))
> icw.icw_flags |= XFS_ICWALK_FLAG_RECLAIM_SICK;
>
> - while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
> + while (xa_marked(&mp->m_perags, XFS_PERAG_RECLAIM_MARK)) {
> xfs_ail_push_all_sync(mp->m_ail);
> xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &icw);
> }
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index d0567dfbc0368d..dce2d832e1e6d1 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -208,8 +208,7 @@ typedef struct xfs_mount {
> */
> atomic64_t m_allocbt_blks;
>
> - struct radix_tree_root m_perag_tree; /* per-ag accounting info */
> - spinlock_t m_perag_lock; /* lock for m_perag_tree */
> + struct xarray m_perags; /* per-ag accounting info */
> uint64_t m_resblks; /* total reserved blocks */
> uint64_t m_resblks_avail;/* available reserved blocks */
> uint64_t m_resblks_save; /* reserved blks @ remount,ro */
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index 27e9f749c4c7fc..c41b543f2e9121 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -2009,8 +2009,7 @@ static int xfs_init_fs_context(
> return -ENOMEM;
>
> spin_lock_init(&mp->m_sb_lock);
> - INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
> - spin_lock_init(&mp->m_perag_lock);
> + xa_init(&mp->m_perags);
> mutex_init(&mp->m_growlock);
> INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
> INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
> --
> 2.43.0
>
>
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH 4/5] xfs: convert perag lookup to xarray
2024-08-21 16:28 ` Darrick J. Wong
@ 2024-08-22 3:45 ` Christoph Hellwig
2024-08-27 22:51 ` Dave Chinner
0 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-08-22 3:45 UTC (permalink / raw)
To: Darrick J. Wong
Cc: Christoph Hellwig, Chandan Babu R, Matthew Wilcox, Andrew Morton,
linux-xfs, linux-kernel, linux-fsdevel
On Wed, Aug 21, 2024 at 09:28:10AM -0700, Darrick J. Wong wrote:
> On Wed, Aug 21, 2024 at 08:38:31AM +0200, Christoph Hellwig wrote:
> > Convert the perag lookup from the legacy radix tree to the xarray,
> > which allows for much nicer iteration and bulk lookup semantics.
>
> Looks like a pretty straightforward covnersion. Is there a good
> justification for converting the ici radix tree too? Or is it too
> sparse to be worth doing?
radix trees and xarrays have pretty similar behavior related to
sparseness or waste of interior nodes due to it. So unless we find a
better data structure for it, it would be worthwhile.
But the ici radix tree does pretty funny things in terms of also
protecting other fields with the lock synchronizing it, so the conversion
is fairly complicated and I don't feel like doing it right now, at least
no without evaluating if for example a rthashtable might actually be
the better data structure here. The downside of the rthashtable is
that it doens't support tags/masks and isn't great for iteration, so it
might very much not be very suitable.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 4/5] xfs: convert perag lookup to xarray
2024-08-22 3:45 ` Christoph Hellwig
@ 2024-08-27 22:51 ` Dave Chinner
0 siblings, 0 replies; 18+ messages in thread
From: Dave Chinner @ 2024-08-27 22:51 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Darrick J. Wong, Chandan Babu R, Matthew Wilcox, Andrew Morton,
linux-xfs, linux-kernel, linux-fsdevel
On Thu, Aug 22, 2024 at 05:45:48AM +0200, Christoph Hellwig wrote:
> On Wed, Aug 21, 2024 at 09:28:10AM -0700, Darrick J. Wong wrote:
> > On Wed, Aug 21, 2024 at 08:38:31AM +0200, Christoph Hellwig wrote:
> > > Convert the perag lookup from the legacy radix tree to the xarray,
> > > which allows for much nicer iteration and bulk lookup semantics.
> >
> > Looks like a pretty straightforward covnersion. Is there a good
> > justification for converting the ici radix tree too? Or is it too
> > sparse to be worth doing?
>
> radix trees and xarrays have pretty similar behavior related to
> sparseness or waste of interior nodes due to it.
And the node size is still 64 entries, which matches up with inode
chunk size. Hence a fully populated and cached inode chunk fills
xarray nodes completely, just like the radix tree. Hence if our
inode allocation locality decisions work, we end up with good
population characteristics in the in-memory cache index, too.
> So unless we find a
> better data structure for it, it would be worthwhile.
I have prototype patches to convert the ici radix tree to an xarray.
When I wrote it a few months ago I never got the time to actually
test it because other stuff happened....
> But the ici radix tree does pretty funny things in terms of also
> protecting other fields with the lock synchronizing it, so the conversion
> is fairly complicated
The locking isn't a big deal - I just used xa_lock() and xa_unlock()
to use the internal xarray lock to replace the perag->pag_ici_lock.
This gives the same semantics of making external state and tree
state updates atomic.
e.g. this code in xfs_reclaim_inode():
spin_lock(&pag->pag_ici_lock);
if (!radix_tree_delete(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ino)))
ASSERT(0);
xfs_perag_clear_inode_tag(pag, NULLAGINO, XFS_ICI_RECLAIM_TAG);
spin_unlock(&pag->pag_ici_lock);
becomes:
xa_lock(&pag->pag_icache);
if (__xa_erase(&pag->pag_icache,
XFS_INO_TO_AGINO(ip->i_mount, ino)) != ip)
ASSERT(0);
xfs_perag_clear_inode_tag(pag, NULLAGINO, XFS_ICI_RECLAIM_TAG);
xa_unlock(&pag->pag_icache);
so the clearing of the XFS_ICI_RECLAIM_TAG in the mp->m_perag tree
is still atomic w.r.t. the removal of the inode from the icache
xarray.
> and I don't feel like doing it right now, at least
> no without evaluating if for example a rthashtable might actually be
> the better data structure here. The downside of the rthashtable is
> that it doens't support tags/masks and isn't great for iteration, so it
> might very much not be very suitable.
The rhashtable is not suited to the inode cache at all. A very
common access pattern is iterating all the inodes in an inode
cluster (e.g. in xfs_iflush_cluster() or during an icwalk) and with
a radix tree or xarray, these lookups all hit the same node and
cachelines. We've optimised this into gang lookups, which means
all the inodes in a cluster are fetched at the same time via
sequential memory access.
Move to a rhashtable makes this iteration mechanism impossible
because the rhashtable is unordered. Every inode we look up now
takes at least one cacheline miss because it's in some other
completely random index in the rhashtable and not adjacent to
the last inode we lookuped up. Worse, we have to dereference each
object we find on the chain to do key matching, so it's at least two
cacheline accesses per inode lookup.
So instead of a cluster lookup of 32 inodes only requiring a few
cacheline accesses to walk down the tree and then 4 sequential
cacheline accesses to retreive all the inode pointers, we have at
least 64 individual random cacheline accesses to get the pointers to
the same number of inodes.
IOWs, a hashtable of any kind is way more inefficient than using the
radix tree or xarray when it comes to the sorts of lockless
sequential access patterns we use internally with the XFS inode
cache.
Keep in mind that I went through all this "scalable structure
analysis" back in 2007 before I replaced the hash table based inode
cache implementation with radix trees. Radix trees were a far better
choice than a hash table way back then, and nothing in our inode
cache access patterns and algorithms has really changed since
then....
-Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 5/5] xfs: use xas_for_each_marked in xfs_reclaim_inodes_count
2024-08-21 6:38 convert XFS perag lookup to xarrays v2 Christoph Hellwig
` (3 preceding siblings ...)
2024-08-21 6:38 ` [PATCH 4/5] xfs: convert perag lookup to xarray Christoph Hellwig
@ 2024-08-21 6:38 ` Christoph Hellwig
2024-08-21 16:22 ` Darrick J. Wong
4 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-08-21 6:38 UTC (permalink / raw)
To: Chandan Babu R, Matthew Wilcox
Cc: Darrick J. Wong, Andrew Morton, linux-xfs, linux-kernel,
linux-fsdevel
xfs_reclaim_inodes_count iterates over all AGs to sum up the reclaimable
inodes counts. There is no point in grabbing a reference to the them or
unlock the RCU critical section for each iteration, so switch to the
more efficient xas_for_each_marked iterator.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_icache.c | 36 ++++++++----------------------------
fs/xfs/xfs_trace.h | 2 +-
2 files changed, 9 insertions(+), 29 deletions(-)
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 5bca845e702f1d..d36dbaba660013 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -300,32 +300,6 @@ xfs_perag_clear_inode_tag(
trace_xfs_perag_clear_inode_tag(pag, _RET_IP_);
}
-/*
- * Search from @first to find the next perag with the given tag set.
- */
-static struct xfs_perag *
-xfs_perag_get_next_tag(
- struct xfs_mount *mp,
- struct xfs_perag *pag,
- unsigned int tag)
-{
- unsigned long index = 0;
-
- if (pag) {
- index = pag->pag_agno + 1;
- xfs_perag_rele(pag);
- }
-
- rcu_read_lock();
- pag = xa_find(&mp->m_perags, &index, ULONG_MAX, ici_tag_to_mark(tag));
- if (pag) {
- trace_xfs_perag_get_next_tag(pag, _RET_IP_);
- atomic_inc(&pag->pag_ref);
- }
- rcu_read_unlock();
- return pag;
-}
-
/*
* Find the next AG after @pag, or the first AG if @pag is NULL.
*/
@@ -1080,11 +1054,17 @@ long
xfs_reclaim_inodes_count(
struct xfs_mount *mp)
{
- struct xfs_perag *pag = NULL;
+ XA_STATE (xas, &mp->m_perags, 0);
long reclaimable = 0;
+ struct xfs_perag *pag;
- while ((pag = xfs_perag_get_next_tag(mp, pag, XFS_ICI_RECLAIM_TAG)))
+ rcu_read_lock();
+ xas_for_each_marked(&xas, pag, ULONG_MAX, XFS_PERAG_RECLAIM_MARK) {
+ trace_xfs_reclaim_inodes_count(pag, _THIS_IP_);
reclaimable += pag->pag_ici_reclaimable;
+ }
+ rcu_read_unlock();
+
return reclaimable;
}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 002d012ebd83cb..d73c0a49d9dc29 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -210,7 +210,6 @@ DEFINE_EVENT(xfs_perag_class, name, \
TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), \
TP_ARGS(pag, caller_ip))
DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_next_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_hold);
DEFINE_PERAG_REF_EVENT(xfs_perag_put);
DEFINE_PERAG_REF_EVENT(xfs_perag_grab);
@@ -218,6 +217,7 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_grab_next_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_rele);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
+DEFINE_PERAG_REF_EVENT(xfs_reclaim_inodes_count);
TRACE_EVENT(xfs_inodegc_worker,
TP_PROTO(struct xfs_mount *mp, unsigned int shrinker_hits),
--
2.43.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCH 5/5] xfs: use xas_for_each_marked in xfs_reclaim_inodes_count
2024-08-21 6:38 ` [PATCH 5/5] xfs: use xas_for_each_marked in xfs_reclaim_inodes_count Christoph Hellwig
@ 2024-08-21 16:22 ` Darrick J. Wong
0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-08-21 16:22 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Chandan Babu R, Matthew Wilcox, Andrew Morton, linux-xfs,
linux-kernel, linux-fsdevel
On Wed, Aug 21, 2024 at 08:38:32AM +0200, Christoph Hellwig wrote:
> xfs_reclaim_inodes_count iterates over all AGs to sum up the reclaimable
> inodes counts. There is no point in grabbing a reference to the them or
> unlock the RCU critical section for each iteration, so switch to the
> more efficient xas_for_each_marked iterator.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Clever!
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
--D
> ---
> fs/xfs/xfs_icache.c | 36 ++++++++----------------------------
> fs/xfs/xfs_trace.h | 2 +-
> 2 files changed, 9 insertions(+), 29 deletions(-)
>
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index 5bca845e702f1d..d36dbaba660013 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -300,32 +300,6 @@ xfs_perag_clear_inode_tag(
> trace_xfs_perag_clear_inode_tag(pag, _RET_IP_);
> }
>
> -/*
> - * Search from @first to find the next perag with the given tag set.
> - */
> -static struct xfs_perag *
> -xfs_perag_get_next_tag(
> - struct xfs_mount *mp,
> - struct xfs_perag *pag,
> - unsigned int tag)
> -{
> - unsigned long index = 0;
> -
> - if (pag) {
> - index = pag->pag_agno + 1;
> - xfs_perag_rele(pag);
> - }
> -
> - rcu_read_lock();
> - pag = xa_find(&mp->m_perags, &index, ULONG_MAX, ici_tag_to_mark(tag));
> - if (pag) {
> - trace_xfs_perag_get_next_tag(pag, _RET_IP_);
> - atomic_inc(&pag->pag_ref);
> - }
> - rcu_read_unlock();
> - return pag;
> -}
> -
> /*
> * Find the next AG after @pag, or the first AG if @pag is NULL.
> */
> @@ -1080,11 +1054,17 @@ long
> xfs_reclaim_inodes_count(
> struct xfs_mount *mp)
> {
> - struct xfs_perag *pag = NULL;
> + XA_STATE (xas, &mp->m_perags, 0);
> long reclaimable = 0;
> + struct xfs_perag *pag;
>
> - while ((pag = xfs_perag_get_next_tag(mp, pag, XFS_ICI_RECLAIM_TAG)))
> + rcu_read_lock();
> + xas_for_each_marked(&xas, pag, ULONG_MAX, XFS_PERAG_RECLAIM_MARK) {
> + trace_xfs_reclaim_inodes_count(pag, _THIS_IP_);
> reclaimable += pag->pag_ici_reclaimable;
> + }
> + rcu_read_unlock();
> +
> return reclaimable;
> }
>
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 002d012ebd83cb..d73c0a49d9dc29 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -210,7 +210,6 @@ DEFINE_EVENT(xfs_perag_class, name, \
> TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), \
> TP_ARGS(pag, caller_ip))
> DEFINE_PERAG_REF_EVENT(xfs_perag_get);
> -DEFINE_PERAG_REF_EVENT(xfs_perag_get_next_tag);
> DEFINE_PERAG_REF_EVENT(xfs_perag_hold);
> DEFINE_PERAG_REF_EVENT(xfs_perag_put);
> DEFINE_PERAG_REF_EVENT(xfs_perag_grab);
> @@ -218,6 +217,7 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_grab_next_tag);
> DEFINE_PERAG_REF_EVENT(xfs_perag_rele);
> DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
> DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
> +DEFINE_PERAG_REF_EVENT(xfs_reclaim_inodes_count);
>
> TRACE_EVENT(xfs_inodegc_worker,
> TP_PROTO(struct xfs_mount *mp, unsigned int shrinker_hits),
> --
> 2.43.0
>
>
^ permalink raw reply [flat|nested] 18+ messages in thread