* [PATCH 6/6] [XFS] Reference count per-ag structures
2009-12-02 6:11 [PATCH 0/6] [XFS] Fix growfs deadlocks and per-AG use after free Dave Chinner
@ 2009-12-02 6:11 ` Dave Chinner
2009-12-10 23:48 ` Christoph Hellwig
0 siblings, 1 reply; 23+ messages in thread
From: Dave Chinner @ 2009-12-02 6:11 UTC (permalink / raw)
To: xfs
Reference count the per-ag structures to ensure that we keep get/put
pairs balanced. Assert that the reference counts are zero at unmount
time to catch leaks. In future, reference counts will enable us to
safely remove perag structures by allowing us to detect when they
are no longer in use.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/xfs_ag.h | 4 ++--
fs/xfs/xfs_alloc.c | 2 +-
fs/xfs/xfs_inode.c | 5 ++++-
fs/xfs/xfs_mount.c | 1 +
fs/xfs/xfs_mount.h | 11 +++++++++--
5 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index a5d54bf..b384a3c 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -182,8 +182,8 @@ typedef struct xfs_perag_busy {
#define XFS_PAGB_NUM_SLOTS 128
#endif
-typedef struct xfs_perag
-{
+typedef struct xfs_perag {
+ atomic_t pag_ref; /* perag reference count */
char pagf_init; /* this agf's entry is initialized */
char pagi_init; /* this agi's entry is initialized */
char pagf_metadata; /* the agf is preferred to be metadata */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 2d076e2..d1349b1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2545,7 +2545,6 @@ xfs_alloc_vextent(
}
xfs_perag_put(args->pag);
}
- xfs_perag_put(args->pag);
if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
if (args->agno == sagno)
mp->m_agfrotor = (mp->m_agfrotor + 1) %
@@ -2571,6 +2570,7 @@ xfs_alloc_vextent(
args->len);
#endif
}
+ xfs_perag_put(args->pag);
return 0;
error0:
xfs_perag_put(args->pag);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 44a1168..e6c178e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2729,7 +2729,7 @@ xfs_iflush_cluster(
ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
if (!ilist)
- return 0;
+ goto out_put;
mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
@@ -2798,6 +2798,8 @@ xfs_iflush_cluster(
out_free:
read_unlock(&pag->pag_ici_lock);
kmem_free(ilist);
+out_put:
+ xfs_perag_put(pag);
return 0;
@@ -2841,6 +2843,7 @@ cluster_corrupt_out:
*/
xfs_iflush_abort(iq);
kmem_free(ilist);
+ xfs_perag_put(pag);
return XFS_ERROR(EFSCORRUPTED);
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d6de63d..21f1b29 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -213,6 +213,7 @@ xfs_free_perag(
for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
spin_lock(&mp->m_perag_lock);
pag = radix_tree_delete(&mp->m_perag_tree, agno);
+ ASSERT(atomic_read(&pag->pag_ref) == 0);
spin_unlock(&mp->m_perag_lock);
if (!pag)
continue;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b2aa726..225eb29 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -384,7 +384,7 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
}
/*
- * perag get/put wrappers for eventual ref counting
+ * perag get/put wrappers for ref counting
*/
static inline xfs_perag_t *
xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
@@ -393,6 +393,12 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
spin_lock(&mp->m_perag_lock);
pag = radix_tree_lookup(&mp->m_perag_tree, agno);
+ if (pag) {
+ ASSERT(atomic_read(&pag->pag_ref) >= 0);
+ /* catch leaks in the positive direction during testing */
+ ASSERT(atomic_read(&pag->pag_ref) < 1000);
+ atomic_inc(&pag->pag_ref);
+ }
spin_unlock(&mp->m_perag_lock);
return pag;
}
@@ -400,7 +406,8 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
static inline void
xfs_perag_put(xfs_perag_t *pag)
{
- /* nothing to see here, move along */
+ ASSERT(atomic_read(&pag->pag_ref) > 0);
+ atomic_dec(&pag->pag_ref);
}
/*
--
1.6.5
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 6/6] [XFS] Reference count per-ag structures
2009-12-02 6:11 ` [PATCH 6/6] [XFS] Reference count per-ag structures Dave Chinner
@ 2009-12-10 23:48 ` Christoph Hellwig
2009-12-11 0:50 ` Dave Chinner
0 siblings, 1 reply; 23+ messages in thread
From: Christoph Hellwig @ 2009-12-10 23:48 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Wed, Dec 02, 2009 at 05:11:39PM +1100, Dave Chinner wrote:
> Reference count the per-ag structures to ensure that we keep get/put
> pairs balanced. Assert that the reference counts are zero at unmount
> time to catch leaks. In future, reference counts will enable us to
> safely remove perag structures by allowing us to detect when they
> are no longer in use.
>
> Signed-off-by: Dave Chinner <david@fromorbit.com>
> ---
> fs/xfs/xfs_ag.h | 4 ++--
> fs/xfs/xfs_alloc.c | 2 +-
> fs/xfs/xfs_inode.c | 5 ++++-
> fs/xfs/xfs_mount.c | 1 +
> fs/xfs/xfs_mount.h | 11 +++++++++--
> 5 files changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
> index a5d54bf..b384a3c 100644
> --- a/fs/xfs/xfs_ag.h
> +++ b/fs/xfs/xfs_ag.h
> index 2d076e2..d1349b1 100644
> --- a/fs/xfs/xfs_alloc.c
> +++ b/fs/xfs/xfs_alloc.c
> @@ -2545,7 +2545,6 @@ xfs_alloc_vextent(
> }
> xfs_perag_put(args->pag);
> }
> - xfs_perag_put(args->pag);
> if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
> if (args->agno == sagno)
> mp->m_agfrotor = (mp->m_agfrotor + 1) %
> @@ -2571,6 +2570,7 @@ xfs_alloc_vextent(
> args->len);
> #endif
> }
> + xfs_perag_put(args->pag);
> return 0;
> error0:
> xfs_perag_put(args->pag);
Should be folded into the earlier patches.
>
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 44a1168..e6c178e 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -2729,7 +2729,7 @@ xfs_iflush_cluster(
> ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
> ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
> if (!ilist)
> - return 0;
> + goto out_put;
>
> mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
> first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
> @@ -2798,6 +2798,8 @@ xfs_iflush_cluster(
> out_free:
> read_unlock(&pag->pag_ici_lock);
> kmem_free(ilist);
> +out_put:
> + xfs_perag_put(pag);
> return 0;
>
>
> @@ -2841,6 +2843,7 @@ cluster_corrupt_out:
> */
> xfs_iflush_abort(iq);
> kmem_free(ilist);
> + xfs_perag_put(pag);
> return XFS_ERROR(EFSCORRUPTED);
Same here.
> static inline xfs_perag_t *
> xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
> @@ -393,6 +393,12 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
>
> spin_lock(&mp->m_perag_lock);
> pag = radix_tree_lookup(&mp->m_perag_tree, agno);
> + if (pag) {
> + ASSERT(atomic_read(&pag->pag_ref) >= 0);
> + /* catch leaks in the positive direction during testing */
> + ASSERT(atomic_read(&pag->pag_ref) < 1000);
Is there any good reason why we should not be able to hit this number?
Patch looks good, although I'm a bit worried about the performance
overhead.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 6/6] [XFS] Reference count per-ag structures
2009-12-10 23:48 ` Christoph Hellwig
@ 2009-12-11 0:50 ` Dave Chinner
0 siblings, 0 replies; 23+ messages in thread
From: Dave Chinner @ 2009-12-11 0:50 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Thu, Dec 10, 2009 at 06:48:36PM -0500, Christoph Hellwig wrote:
> On Wed, Dec 02, 2009 at 05:11:39PM +1100, Dave Chinner wrote:
> > - xfs_perag_put(args->pag);
> > if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
> > if (args->agno == sagno)
> > mp->m_agfrotor = (mp->m_agfrotor + 1) %
> > @@ -2571,6 +2570,7 @@ xfs_alloc_vextent(
> > args->len);
> > #endif
> > }
> > + xfs_perag_put(args->pag);
> > return 0;
> > error0:
> > xfs_perag_put(args->pag);
>
> Should be folded into the earlier patches.
Will do.
> > static inline xfs_perag_t *
> > xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
> > @@ -393,6 +393,12 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
> >
> > spin_lock(&mp->m_perag_lock);
> > pag = radix_tree_lookup(&mp->m_perag_tree, agno);
> > + if (pag) {
> > + ASSERT(atomic_read(&pag->pag_ref) >= 0);
> > + /* catch leaks in the positive direction during testing */
> > + ASSERT(atomic_read(&pag->pag_ref) < 1000);
>
> Is there any good reason why we should not be able to hit this number?
that would require 1000 parallel accesses to the perag structure.
Most accesses are serialised by the AGF/AGI buffer locks, and there
are only a handful of traversal references that can be made, so I
don't think that a reference count of more than 10 is going to be
common in real life.
> Patch looks good, although I'm a bit worried about the performance
> overhead.
None that I've been able to see so far - effectively we are
replacing a rwsem with a spin lock and an atomic op, so I think
the overhead is comparable, perhaps even lower than before.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 6/6] XFS: Reference count per-ag structures
2009-12-14 23:11 [PATCH 0/6] XFS: Fix growfs deadlocks and per-AG use after free V2 Dave Chinner
@ 2009-12-14 23:11 ` Dave Chinner
0 siblings, 0 replies; 23+ messages in thread
From: Dave Chinner @ 2009-12-14 23:11 UTC (permalink / raw)
To: xfs
Reference count the per-ag structures to ensure that we keep get/put
pairs balanced. Assert that the reference counts are zero at unmount
time to catch leaks. In future, reference counts will enable us to
safely remove perag structures by allowing us to detect when they
are no longer in use.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/xfs_ag.h | 4 ++--
fs/xfs/xfs_mount.c | 1 +
fs/xfs/xfs_mount.h | 11 +++++++++--
3 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index a5d54bf..b384a3c 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -182,8 +182,8 @@ typedef struct xfs_perag_busy {
#define XFS_PAGB_NUM_SLOTS 128
#endif
-typedef struct xfs_perag
-{
+typedef struct xfs_perag {
+ atomic_t pag_ref; /* perag reference count */
char pagf_init; /* this agf's entry is initialized */
char pagi_init; /* this agi's entry is initialized */
char pagf_metadata; /* the agf is preferred to be metadata */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 1ab9495..0203a6a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -213,6 +213,7 @@ xfs_free_perag(
for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
spin_lock(&mp->m_perag_lock);
pag = radix_tree_delete(&mp->m_perag_tree, agno);
+ ASSERT(atomic_read(&pag->pag_ref) == 0);
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
kmem_free(pag->pagb_list);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index cfa7a5d..16b2212 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -384,7 +384,7 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
}
/*
- * perag get/put wrappers for eventual ref counting
+ * perag get/put wrappers for ref counting
*/
static inline struct xfs_perag *
xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
@@ -393,6 +393,12 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
spin_lock(&mp->m_perag_lock);
pag = radix_tree_lookup(&mp->m_perag_tree, agno);
+ if (pag) {
+ ASSERT(atomic_read(&pag->pag_ref) >= 0);
+ /* catch leaks in the positive direction during testing */
+ ASSERT(atomic_read(&pag->pag_ref) < 1000);
+ atomic_inc(&pag->pag_ref);
+ }
spin_unlock(&mp->m_perag_lock);
return pag;
}
@@ -400,7 +406,8 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
static inline void
xfs_perag_put(struct xfs_perag *pag)
{
- /* nothing to see here, move along */
+ ASSERT(atomic_read(&pag->pag_ref) > 0);
+ atomic_dec(&pag->pag_ref);
}
/*
--
1.6.5
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 0/6] XFS: Fix growfs deadlocks and per-AG use after free V3
@ 2009-12-15 6:11 Dave Chinner
2009-12-15 6:11 ` [PATCH 1/6] XFS: rename xfs_get_perag Dave Chinner
` (6 more replies)
0 siblings, 7 replies; 23+ messages in thread
From: Dave Chinner @ 2009-12-15 6:11 UTC (permalink / raw)
To: xfs
The use of an array for the xfs_perag structures results in growfs having
to realocate the array. This requires exclusion to prevent use-after-free
situations. The current locking is prone to deadlocks when growing under
load, and the xfssyncd currently has no protection against the array being
reallocated that can lead to panics.
This series abstracts the per-ag structure access and then removes the array
to replace it with individual xfs_perag structures indexed by a radix tree.
The only locking required is for the radix tree, hence the deadlocks go away
as the tree lock is always the innermost lock. The use after frees go away
as well as growfs does not need to reallocate structures fo pre-existing
allocation groups - it only needs to allocate the structures for the new
AGs and insert them into the tree.
This series also adds reference counting to the xfs-perag structure to
ensure that we balance get/put accesses to the structures and provide the
infrastructure to determine if the structure is in use or not.
Version 3: Rebase for current upstream tree.
Version 2: Fix review comments from Christoph.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 1/6] XFS: rename xfs_get_perag
2009-12-15 6:11 [PATCH 0/6] XFS: Fix growfs deadlocks and per-AG use after free V3 Dave Chinner
@ 2009-12-15 6:11 ` Dave Chinner
2009-12-21 22:21 ` Alex Elder
2009-12-15 6:11 ` [PATCH 2/6] XFS: Don't directly reference m_perag in allocation code Dave Chinner
` (5 subsequent siblings)
6 siblings, 1 reply; 23+ messages in thread
From: Dave Chinner @ 2009-12-15 6:11 UTC (permalink / raw)
To: xfs
xfs_get_perag is really getting the perag that an inode
belongs to based on it's inode number. Convert the use of this
function to just get the perag from a provided ag number.
Use this new function to obtain the per-ag structure when
traversing the per AG inode trees for sync and reclaim.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/linux-2.6/xfs_sync.c | 33 +++++++++++++++++++++------------
fs/xfs/xfs_iget.c | 10 +++++-----
fs/xfs/xfs_inode.c | 8 +++++---
fs/xfs/xfs_mount.h | 8 ++++----
4 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 6fed97a..0e17683 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -96,13 +96,12 @@ unlock:
STATIC int
xfs_inode_ag_walk(
struct xfs_mount *mp,
- xfs_agnumber_t ag,
+ struct xfs_perag *pag,
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags),
int flags,
int tag)
{
- struct xfs_perag *pag = &mp->m_perag[ag];
uint32_t first_index;
int last_error = 0;
int skipped;
@@ -137,8 +136,6 @@ restart:
delay(1);
goto restart;
}
-
- xfs_put_perag(mp, pag);
return last_error;
}
@@ -155,9 +152,15 @@ xfs_inode_ag_iterator(
xfs_agnumber_t ag;
for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
- if (!mp->m_perag[ag].pag_ici_init)
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, ag);
+ if (!pag->pag_ici_init) {
+ xfs_perag_put(pag);
continue;
- error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
+ }
+ error = xfs_inode_ag_walk(mp, pag, execute, flags, tag);
+ xfs_perag_put(pag);
if (error) {
last_error = error;
if (error == EFSCORRUPTED)
@@ -669,25 +672,30 @@ xfs_reclaim_inode(
xfs_inode_t *ip,
int sync_mode)
{
- xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_perag *pag;
+
- /* The hash lock here protects a thread in xfs_iget_core from
+ /*
+ * The radix tree lock here protects a thread in xfs_iget_core from
* racing with us on linking the inode back with a vnode.
* Once we have the XFS_IRECLAIM flag set it will not touch
* us.
*/
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
write_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
!__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);
+ xfs_perag_put(pag);
return -EAGAIN;
}
__xfs_iflags_set(ip, XFS_IRECLAIM);
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);
- xfs_put_perag(ip->i_mount, pag);
+ xfs_perag_put(pag);
/*
* If the inode is still dirty, then flush it out. If the inode
@@ -737,16 +745,17 @@ void
xfs_inode_set_reclaim_tag(
xfs_inode_t *ip)
{
- xfs_mount_t *mp = ip->i_mount;
- xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_perag *pag;
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
read_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
__xfs_inode_set_reclaim_tag(pag, ip);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
spin_unlock(&ip->i_flags_lock);
read_unlock(&pag->pag_ici_lock);
- xfs_put_perag(mp, pag);
+ xfs_perag_put(pag);
}
void
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index f5c904a..d1e7250 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -375,7 +375,7 @@ xfs_iget(
return EINVAL;
/* get the perag structure and ensure that it's inode capable */
- pag = xfs_get_perag(mp, ino);
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
if (!pag->pagi_inodeok)
return EINVAL;
ASSERT(pag->pag_ici_init);
@@ -399,7 +399,7 @@ again:
if (error)
goto out_error_or_again;
}
- xfs_put_perag(mp, pag);
+ xfs_perag_put(pag);
*ipp = ip;
@@ -418,7 +418,7 @@ out_error_or_again:
delay(1);
goto again;
}
- xfs_put_perag(mp, pag);
+ xfs_perag_put(pag);
return error;
}
@@ -486,11 +486,11 @@ xfs_ireclaim(
* if it was never added to it because radix_tree_delete can deal
* with that case just fine.
*/
- pag = xfs_get_perag(mp, ip->i_ino);
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
write_lock(&pag->pag_ici_lock);
radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
write_unlock(&pag->pag_ici_lock);
- xfs_put_perag(mp, pag);
+ xfs_perag_put(pag);
/*
* Here we do an (almost) spurious inode lock in order to coordinate
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ce278b3..1f69dda 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1946,8 +1946,9 @@ xfs_ifree_cluster(
xfs_inode_t *ip, **ip_found;
xfs_inode_log_item_t *iip;
xfs_log_item_t *lip;
- xfs_perag_t *pag = xfs_get_perag(mp, inum);
+ struct xfs_perag *pag;
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
blks_per_cluster = 1;
ninodes = mp->m_sb.sb_inopblock;
@@ -2088,7 +2089,7 @@ xfs_ifree_cluster(
}
kmem_free(ip_found);
- xfs_put_perag(mp, pag);
+ xfs_perag_put(pag);
}
/*
@@ -2675,7 +2676,7 @@ xfs_iflush_cluster(
xfs_buf_t *bp)
{
xfs_mount_t *mp = ip->i_mount;
- xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
+ struct xfs_perag *pag;
unsigned long first_index, mask;
unsigned long inodes_per_cluster;
int ilist_size;
@@ -2686,6 +2687,7 @@ xfs_iflush_cluster(
int bufwasdelwri;
int i;
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
ASSERT(pag->pagi_inodeok);
ASSERT(pag->pag_ici_init);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1df7e45..f8a68a2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -386,14 +386,14 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
/*
* perag get/put wrappers for eventual ref counting
*/
-static inline xfs_perag_t *
-xfs_get_perag(struct xfs_mount *mp, xfs_ino_t ino)
+static inline struct xfs_perag *
+xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
{
- return &mp->m_perag[XFS_INO_TO_AGNO(mp, ino)];
+ return &mp->m_perag[agno];
}
static inline void
-xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag)
+xfs_perag_put(struct xfs_perag *pag)
{
/* nothing to see here, move along */
}
--
1.6.5
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 2/6] XFS: Don't directly reference m_perag in allocation code
2009-12-15 6:11 [PATCH 0/6] XFS: Fix growfs deadlocks and per-AG use after free V3 Dave Chinner
2009-12-15 6:11 ` [PATCH 1/6] XFS: rename xfs_get_perag Dave Chinner
@ 2009-12-15 6:11 ` Dave Chinner
2009-12-20 4:24 ` Dave Chinner
2009-12-22 14:18 ` Alex Elder
2009-12-15 6:11 ` [PATCH 3/6] XFS: Convert filestreams code to use per-ag get/put routines Dave Chinner
` (4 subsequent siblings)
6 siblings, 2 replies; 23+ messages in thread
From: Dave Chinner @ 2009-12-15 6:11 UTC (permalink / raw)
To: xfs
Start abstracting the perag references so that the indexing of the
structures is not directly coded into all the places that uses the
perag structures. This will allow us to separate the use of the
perag structure and the way it is indexed and hence avoid the known
deadlocks related to growing a busy filesystem.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/xfs_alloc.c | 81 +++++++++++++++++++++++++--------------------
fs/xfs/xfs_alloc_btree.c | 9 ++++-
2 files changed, 52 insertions(+), 38 deletions(-)
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a1c65fc..3cb533c 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1662,11 +1662,13 @@ xfs_free_ag_extent(
xfs_agf_t *agf;
xfs_perag_t *pag; /* per allocation group data */
+ pag = xfs_perag_get(mp, agno);
+ pag->pagf_freeblks += len;
+ xfs_perag_put(pag);
+
agf = XFS_BUF_TO_AGF(agbp);
- pag = &mp->m_perag[agno];
be32_add_cpu(&agf->agf_freeblks, len);
xfs_trans_agblocks_delta(tp, len);
- pag->pagf_freeblks += len;
XFS_WANT_CORRUPTED_GOTO(
be32_to_cpu(agf->agf_freeblks) <=
be32_to_cpu(agf->agf_length),
@@ -1969,7 +1971,8 @@ xfs_alloc_get_freelist(
xfs_trans_brelse(tp, agflbp);
if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
agf->agf_flfirst = 0;
- pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
+
+ pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
be32_add_cpu(&agf->agf_flcount, -1);
xfs_trans_agflist_delta(tp, -1);
pag->pagf_flcount--;
@@ -2078,7 +2081,8 @@ xfs_alloc_put_freelist(
be32_add_cpu(&agf->agf_fllast, 1);
if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
agf->agf_fllast = 0;
- pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
+
+ pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
be32_add_cpu(&agf->agf_flcount, 1);
xfs_trans_agflist_delta(tp, 1);
pag->pagf_flcount++;
@@ -2089,6 +2093,7 @@ xfs_alloc_put_freelist(
pag->pagf_btreeblks--;
logflags |= XFS_AGF_BTREEBLKS;
}
+ xfs_perag_put(pag);
xfs_alloc_log_agf(tp, agbp, logflags);
@@ -2152,7 +2157,6 @@ xfs_read_agf(
xfs_trans_brelse(tp, *bpp);
return XFS_ERROR(EFSCORRUPTED);
}
-
XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
return 0;
}
@@ -2184,7 +2188,7 @@ xfs_alloc_read_agf(
ASSERT(!XFS_BUF_GETERROR(*bpp));
agf = XFS_BUF_TO_AGF(*bpp);
- pag = &mp->m_perag[agno];
+ pag = xfs_perag_get(mp, agno);
if (!pag->pagf_init) {
pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
@@ -2211,6 +2215,7 @@ xfs_alloc_read_agf(
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
}
#endif
+ xfs_perag_put(pag);
return 0;
}
@@ -2271,7 +2276,7 @@ xfs_alloc_vextent(
*/
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
down_read(&mp->m_peraglock);
- args->pag = &mp->m_perag[args->agno];
+ args->pag = xfs_perag_get(mp, args->agno);
args->minleft = 0;
error = xfs_alloc_fix_freelist(args, 0);
args->minleft = minleft;
@@ -2341,7 +2346,7 @@ xfs_alloc_vextent(
*/
down_read(&mp->m_peraglock);
for (;;) {
- args->pag = &mp->m_perag[args->agno];
+ args->pag = xfs_perag_get(mp, args->agno);
if (no_min) args->minleft = 0;
error = xfs_alloc_fix_freelist(args, flags);
args->minleft = minleft;
@@ -2400,6 +2405,7 @@ xfs_alloc_vextent(
}
}
}
+ xfs_perag_put(args->pag);
}
up_read(&mp->m_peraglock);
if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
@@ -2427,8 +2433,10 @@ xfs_alloc_vextent(
args->len);
#endif
}
+ xfs_perag_put(args->pag);
return 0;
error0:
+ xfs_perag_put(args->pag);
up_read(&mp->m_peraglock);
return error;
}
@@ -2455,7 +2463,7 @@ xfs_free_extent(
ASSERT(args.agno < args.mp->m_sb.sb_agcount);
args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
down_read(&args.mp->m_peraglock);
- args.pag = &args.mp->m_perag[args.agno];
+ args.pag = xfs_perag_get(args.mp, args.agno);
if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
goto error0;
#ifdef DEBUG
@@ -2465,6 +2473,7 @@ xfs_free_extent(
#endif
error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
error0:
+ xfs_perag_put(args.pag);
up_read(&args.mp->m_peraglock);
return error;
}
@@ -2486,15 +2495,15 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
xfs_agblock_t bno,
xfs_extlen_t len)
{
- xfs_mount_t *mp;
xfs_perag_busy_t *bsy;
+ struct xfs_perag *pag;
int n;
- mp = tp->t_mountp;
- spin_lock(&mp->m_perag[agno].pagb_lock);
+ pag = xfs_perag_get(tp->t_mountp, agno);
+ spin_lock(&pag->pagb_lock);
/* search pagb_list for an open slot */
- for (bsy = mp->m_perag[agno].pagb_list, n = 0;
+ for (bsy = pag->pagb_list, n = 0;
n < XFS_PAGB_NUM_SLOTS;
bsy++, n++) {
if (bsy->busy_tp == NULL) {
@@ -2502,11 +2511,11 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
}
}
- trace_xfs_alloc_busy(mp, agno, bno, len, n);
+ trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n);
if (n < XFS_PAGB_NUM_SLOTS) {
- bsy = &mp->m_perag[agno].pagb_list[n];
- mp->m_perag[agno].pagb_count++;
+ bsy = &pag->pagb_list[n];
+ pag->pagb_count++;
bsy->busy_start = bno;
bsy->busy_length = len;
bsy->busy_tp = tp;
@@ -2521,7 +2530,8 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
xfs_trans_set_sync(tp);
}
- spin_unlock(&mp->m_perag[agno].pagb_lock);
+ spin_unlock(&pag->pagb_lock);
+ xfs_perag_put(pag);
}
void
@@ -2529,24 +2539,23 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
xfs_agnumber_t agno,
int idx)
{
- xfs_mount_t *mp;
+ struct xfs_perag *pag;
xfs_perag_busy_t *list;
- mp = tp->t_mountp;
-
- spin_lock(&mp->m_perag[agno].pagb_lock);
- list = mp->m_perag[agno].pagb_list;
-
ASSERT(idx < XFS_PAGB_NUM_SLOTS);
+ pag = xfs_perag_get(tp->t_mountp, agno);
+ spin_lock(&pag->pagb_lock);
+ list = pag->pagb_list;
- trace_xfs_alloc_unbusy(mp, agno, idx, list[idx].busy_tp == tp);
+ trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp);
if (list[idx].busy_tp == tp) {
list[idx].busy_tp = NULL;
- mp->m_perag[agno].pagb_count--;
+ pag->pagb_count--;
}
- spin_unlock(&mp->m_perag[agno].pagb_lock);
+ spin_unlock(&pag->pagb_lock);
+ xfs_perag_put(pag);
}
@@ -2560,21 +2569,20 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
xfs_agblock_t bno,
xfs_extlen_t len)
{
- xfs_mount_t *mp;
+ struct xfs_perag *pag;
xfs_perag_busy_t *bsy;
xfs_agblock_t uend, bend;
xfs_lsn_t lsn;
int cnt;
- mp = tp->t_mountp;
-
- spin_lock(&mp->m_perag[agno].pagb_lock);
- cnt = mp->m_perag[agno].pagb_count;
+ pag = xfs_perag_get(tp->t_mountp, agno);
+ spin_lock(&pag->pagb_lock);
+ cnt = pag->pagb_count;
uend = bno + len - 1;
/* search pagb_list for this slot, skipping open slots */
- for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) {
+ for (bsy = pag->pagb_list; cnt; bsy++) {
/*
* (start1,length1) within (start2, length2)
@@ -2589,7 +2597,7 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
}
}
- trace_xfs_alloc_busysearch(mp, agno, bno, len, !!cnt);
+ trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, !!cnt);
/*
* If a block was found, force the log through the LSN of the
@@ -2597,9 +2605,10 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
*/
if (cnt) {
lsn = bsy->busy_tp->t_commit_lsn;
- spin_unlock(&mp->m_perag[agno].pagb_lock);
- xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
+ spin_unlock(&pag->pagb_lock);
+ xfs_log_force(tp->t_mountp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
} else {
- spin_unlock(&mp->m_perag[agno].pagb_lock);
+ spin_unlock(&pag->pagb_lock);
}
+ xfs_perag_put(pag);
}
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index adbd914..b726e10 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -61,12 +61,14 @@ xfs_allocbt_set_root(
struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
int btnum = cur->bc_btnum;
+ struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
ASSERT(ptr->s != 0);
agf->agf_roots[btnum] = ptr->s;
be32_add_cpu(&agf->agf_levels[btnum], inc);
- cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc;
+ pag->pagf_levels[btnum] += inc;
+ xfs_perag_put(pag);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
}
@@ -150,6 +152,7 @@ xfs_allocbt_update_lastrec(
{
struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ struct xfs_perag *pag;
__be32 len;
int numrecs;
@@ -193,7 +196,9 @@ xfs_allocbt_update_lastrec(
}
agf->agf_longest = len;
- cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len);
+ pag = xfs_perag_get(cur->bc_mp, seqno);
+ pag->pagf_longest = be32_to_cpu(len);
+ xfs_perag_put(pag);
xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
}
--
1.6.5
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 3/6] XFS: Convert filestreams code to use per-ag get/put routines
2009-12-15 6:11 [PATCH 0/6] XFS: Fix growfs deadlocks and per-AG use after free V3 Dave Chinner
2009-12-15 6:11 ` [PATCH 1/6] XFS: rename xfs_get_perag Dave Chinner
2009-12-15 6:11 ` [PATCH 2/6] XFS: Don't directly reference m_perag in allocation code Dave Chinner
@ 2009-12-15 6:11 ` Dave Chinner
2009-12-22 15:08 ` [PATCH 3/6] XFS: Convert filestreams code to use per-ag get/putroutines Alex Elder
2009-12-15 6:11 ` [PATCH 4/6] XFS: convert remaining direct references to m_perag Dave Chinner
` (3 subsequent siblings)
6 siblings, 1 reply; 23+ messages in thread
From: Dave Chinner @ 2009-12-15 6:11 UTC (permalink / raw)
To: xfs
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/xfs_filestream.c | 19 ++++++++++++-------
fs/xfs/xfs_filestream.h | 27 ++++++++++++++++++++++++---
2 files changed, 36 insertions(+), 10 deletions(-)
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index a631e14..e61f2aa 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -140,6 +140,7 @@ _xfs_filestream_pick_ag(
int flags,
xfs_extlen_t minlen)
{
+ int streams, max_streams;
int err, trylock, nscan;
xfs_extlen_t longest, free, minfree, maxfree = 0;
xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
@@ -155,15 +156,15 @@ _xfs_filestream_pick_ag(
trylock = XFS_ALLOC_FLAG_TRYLOCK;
for (nscan = 0; 1; nscan++) {
-
- TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag));
-
- pag = mp->m_perag + ag;
+ pag = xfs_perag_get(mp, ag);
+ TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms));
if (!pag->pagf_init) {
err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
- if (err && !trylock)
+ if (err && !trylock) {
+ xfs_perag_put(pag);
return err;
+ }
}
/* Might fail sometimes during the 1st pass with trylock set. */
@@ -173,6 +174,7 @@ _xfs_filestream_pick_ag(
/* Keep track of the AG with the most free blocks. */
if (pag->pagf_freeblks > maxfree) {
maxfree = pag->pagf_freeblks;
+ max_streams = atomic_read(&pag->pagf_fstrms);
max_ag = ag;
}
@@ -195,6 +197,8 @@ _xfs_filestream_pick_ag(
/* Break out, retaining the reference on the AG. */
free = pag->pagf_freeblks;
+ streams = atomic_read(&pag->pagf_fstrms);
+ xfs_perag_put(pag);
*agp = ag;
break;
}
@@ -202,6 +206,7 @@ _xfs_filestream_pick_ag(
/* Drop the reference on this AG, it's not usable. */
xfs_filestream_put_ag(mp, ag);
next_ag:
+ xfs_perag_put(pag);
/* Move to the next AG, wrapping to AG 0 if necessary. */
if (++ag >= mp->m_sb.sb_agcount)
ag = 0;
@@ -229,6 +234,7 @@ next_ag:
if (max_ag != NULLAGNUMBER) {
xfs_filestream_get_ag(mp, max_ag);
TRACE_AG_PICK1(mp, max_ag, maxfree);
+ streams = max_streams;
free = maxfree;
*agp = max_ag;
break;
@@ -240,8 +246,7 @@ next_ag:
return 0;
}
- TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp),
- free, nscan, flags);
+ TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags);
return 0;
}
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 4aba67c..58378b2 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -79,12 +79,21 @@ extern ktrace_t *xfs_filestreams_trace_buf;
* the cache that reference per-ag array elements that have since been
* reallocated.
*/
+/*
+ * xfs_filestream_peek_ag is only used in tracing code
+ */
static inline int
xfs_filestream_peek_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
{
- return atomic_read(&mp->m_perag[agno].pagf_fstrms);
+ struct xfs_perag *pag;
+ int ret;
+
+ pag = xfs_perag_get(mp, agno);
+ ret = atomic_read(&pag->pagf_fstrms);
+ xfs_perag_put(pag);
+ return ret;
}
static inline int
@@ -92,7 +101,13 @@ xfs_filestream_get_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
{
- return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
+ struct xfs_perag *pag;
+ int ret;
+
+ pag = xfs_perag_get(mp, agno);
+ ret = atomic_inc_return(&pag->pagf_fstrms);
+ xfs_perag_put(pag);
+ return ret;
}
static inline int
@@ -100,7 +115,13 @@ xfs_filestream_put_ag(
xfs_mount_t *mp,
xfs_agnumber_t agno)
{
- return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms);
+ struct xfs_perag *pag;
+ int ret;
+
+ pag = xfs_perag_get(mp, agno);
+ ret = atomic_dec_return(&pag->pagf_fstrms);
+ xfs_perag_put(pag);
+ return ret;
}
/* allocation selection flags */
--
1.6.5
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 4/6] XFS: convert remaining direct references to m_perag
2009-12-15 6:11 [PATCH 0/6] XFS: Fix growfs deadlocks and per-AG use after free V3 Dave Chinner
` (2 preceding siblings ...)
2009-12-15 6:11 ` [PATCH 3/6] XFS: Convert filestreams code to use per-ag get/put routines Dave Chinner
@ 2009-12-15 6:11 ` Dave Chinner
2009-12-23 20:49 ` Alex Elder
2009-12-15 6:11 ` [PATCH 5/6] XFS: Replace per-ag array with a radix tree Dave Chinner
` (2 subsequent siblings)
6 siblings, 1 reply; 23+ messages in thread
From: Dave Chinner @ 2009-12-15 6:11 UTC (permalink / raw)
To: xfs
Convert the remaining direct lookups of the per ag structures
to use get/put accesses. Ensure that the loops across AGs and
prior users of the interface balance gets and puts correctly.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/xfs_bmap.c | 8 +++++++-
fs/xfs/xfs_ialloc.c | 35 +++++++++++++++++++++++++----------
fs/xfs/xfs_inode.c | 5 ++++-
fs/xfs/xfs_mount.c | 9 ++++++---
4 files changed, 42 insertions(+), 15 deletions(-)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 98251cd..a9b95d9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2630,11 +2630,12 @@ xfs_bmap_btalloc(
startag = ag = 0;
notinit = 0;
down_read(&mp->m_peraglock);
+ pag = xfs_perag_get(mp, ag);
while (blen < ap->alen) {
- pag = &mp->m_perag[ag];
if (!pag->pagf_init &&
(error = xfs_alloc_pagf_init(mp, args.tp,
ag, XFS_ALLOC_FLAG_TRYLOCK))) {
+ xfs_perag_put(pag);
up_read(&mp->m_peraglock);
return error;
}
@@ -2667,6 +2668,7 @@ xfs_bmap_btalloc(
break;
error = xfs_filestream_new_ag(ap, &ag);
+ xfs_perag_put(pag);
if (error) {
up_read(&mp->m_peraglock);
return error;
@@ -2674,6 +2676,7 @@ xfs_bmap_btalloc(
/* loop again to set 'blen'*/
startag = NULLAGNUMBER;
+ pag = xfs_perag_get(mp, ag);
continue;
}
}
@@ -2681,7 +2684,10 @@ xfs_bmap_btalloc(
ag = 0;
if (ag == startag)
break;
+ xfs_perag_put(pag);
+ pag = xfs_perag_get(mp, ag);
}
+ xfs_perag_put(pag);
up_read(&mp->m_peraglock);
/*
* Since the above loop did a BUF_TRYLOCK, it is
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index cb907ba..884ee13 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -253,6 +253,7 @@ xfs_ialloc_ag_alloc(
xfs_agino_t thisino; /* current inode number, for loop */
int isaligned = 0; /* inode allocation at stripe unit */
/* boundary */
+ struct xfs_perag *pag;
args.tp = tp;
args.mp = tp->t_mountp;
@@ -383,7 +384,9 @@ xfs_ialloc_ag_alloc(
be32_add_cpu(&agi->agi_count, newlen);
be32_add_cpu(&agi->agi_freecount, newlen);
down_read(&args.mp->m_peraglock);
- args.mp->m_perag[agno].pagi_freecount += newlen;
+ pag = xfs_perag_get(args.mp, agno);
+ pag->pagi_freecount += newlen;
+ xfs_perag_put(pag);
up_read(&args.mp->m_peraglock);
agi->agi_newino = cpu_to_be32(newino);
@@ -488,7 +491,7 @@ xfs_ialloc_ag_select(
flags = XFS_ALLOC_FLAG_TRYLOCK;
down_read(&mp->m_peraglock);
for (;;) {
- pag = &mp->m_perag[agno];
+ pag = xfs_perag_get(mp, agno);
if (!pag->pagi_init) {
if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
agbp = NULL;
@@ -527,6 +530,7 @@ xfs_ialloc_ag_select(
agbp = NULL;
goto nextag;
}
+ xfs_perag_put(pag);
up_read(&mp->m_peraglock);
return agbp;
}
@@ -535,6 +539,7 @@ unlock_nextag:
if (agbp)
xfs_trans_brelse(tp, agbp);
nextag:
+ xfs_perag_put(pag);
/*
* No point in iterating over the rest, if we're shutting
* down.
@@ -672,6 +677,7 @@ xfs_dialloc(
xfs_agnumber_t tagno; /* testing allocation group number */
xfs_btree_cur_t *tcur; /* temp cursor */
xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
+ struct xfs_perag *pag;
if (*IO_agbp == NULL) {
@@ -772,11 +778,14 @@ nextag:
return noroom ? ENOSPC : 0;
}
down_read(&mp->m_peraglock);
- if (mp->m_perag[tagno].pagi_inodeok == 0) {
+ pag = xfs_perag_get(mp, tagno);
+ if (pag->pagi_inodeok == 0) {
+ xfs_perag_put(pag);
up_read(&mp->m_peraglock);
goto nextag;
}
error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
+ xfs_perag_put(pag);
up_read(&mp->m_peraglock);
if (error)
goto nextag;
@@ -790,6 +799,7 @@ nextag:
*/
agno = tagno;
*IO_agbp = NULL;
+ pag = xfs_perag_get(mp, agno);
restart_pagno:
cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
@@ -808,7 +818,6 @@ nextag:
* If in the same AG as the parent, try to get near the parent.
*/
if (pagno == agno) {
- xfs_perag_t *pag = &mp->m_perag[agno];
int doneleft; /* done, to the left */
int doneright; /* done, to the right */
int searchdistance = 10;
@@ -1007,7 +1016,7 @@ alloc_inode:
be32_add_cpu(&agi->agi_freecount, -1);
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
down_read(&mp->m_peraglock);
- mp->m_perag[tagno].pagi_freecount--;
+ pag->pagi_freecount--;
up_read(&mp->m_peraglock);
error = xfs_check_agi_freecount(cur, agi);
@@ -1016,12 +1025,14 @@ alloc_inode:
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
+ xfs_perag_put(pag);
*inop = ino;
return 0;
error1:
xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
error0:
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ xfs_perag_put(pag);
return error;
}
@@ -1052,6 +1063,7 @@ xfs_difree(
xfs_mount_t *mp; /* mount structure for filesystem */
int off; /* offset of inode in inode chunk */
xfs_inobt_rec_incore_t rec; /* btree record */
+ struct xfs_perag *pag;
mp = tp->t_mountp;
@@ -1158,7 +1170,9 @@ xfs_difree(
be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
down_read(&mp->m_peraglock);
- mp->m_perag[agno].pagi_freecount -= ilen - 1;
+ pag = xfs_perag_get(mp, agno);
+ pag->pagi_freecount -= ilen - 1;
+ xfs_perag_put(pag);
up_read(&mp->m_peraglock);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
@@ -1189,7 +1203,9 @@ xfs_difree(
be32_add_cpu(&agi->agi_freecount, 1);
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
down_read(&mp->m_peraglock);
- mp->m_perag[agno].pagi_freecount++;
+ pag = xfs_perag_get(mp, agno);
+ pag->pagi_freecount++;
+ xfs_perag_put(pag);
up_read(&mp->m_peraglock);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
}
@@ -1379,7 +1395,6 @@ xfs_imap(
XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
return XFS_ERROR(EINVAL);
}
-
return 0;
}
@@ -1523,8 +1538,7 @@ xfs_ialloc_read_agi(
return error;
agi = XFS_BUF_TO_AGI(*bpp);
- pag = &mp->m_perag[agno];
-
+ pag = xfs_perag_get(mp, agno);
if (!pag->pagi_init) {
pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
pag->pagi_count = be32_to_cpu(agi->agi_count);
@@ -1537,6 +1551,7 @@ xfs_ialloc_read_agi(
*/
ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
XFS_FORCED_SHUTDOWN(mp));
+ xfs_perag_put(pag);
return 0;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1f69dda..c2618db 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2695,7 +2695,7 @@ xfs_iflush_cluster(
ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
if (!ilist)
- return 0;
+ goto out_put;
mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
@@ -2764,6 +2764,8 @@ xfs_iflush_cluster(
out_free:
read_unlock(&pag->pag_ici_lock);
kmem_free(ilist);
+out_put:
+ xfs_perag_put(pag);
return 0;
@@ -2807,6 +2809,7 @@ cluster_corrupt_out:
*/
xfs_iflush_abort(iq);
kmem_free(ilist);
+ xfs_perag_put(pag);
return XFS_ERROR(EFSCORRUPTED);
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d652d59..4739c2c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -438,18 +438,20 @@ xfs_initialize_perag(
}
/* This ag is preferred for inodes */
- pag = &mp->m_perag[index];
+ pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
if (index < max_metadata)
pag->pagf_metadata = 1;
xfs_initialize_perag_icache(pag);
+ xfs_perag_put(pag);
}
} else {
/* Setup default behavior for smaller filesystems */
for (index = 0; index < agcount; index++) {
- pag = &mp->m_perag[index];
+ pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
xfs_initialize_perag_icache(pag);
+ xfs_perag_put(pag);
}
}
return index;
@@ -731,12 +733,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
error = xfs_ialloc_pagi_init(mp, NULL, index);
if (error)
return error;
- pag = &mp->m_perag[index];
+ pag = xfs_perag_get(mp, index);
ifree += pag->pagi_freecount;
ialloc += pag->pagi_count;
bfree += pag->pagf_freeblks;
bfreelst += pag->pagf_flcount;
btree += pag->pagf_btreeblks;
+ xfs_perag_put(pag);
}
/*
* Overwrite incore superblock counters with just-read data
--
1.6.5
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 5/6] XFS: Replace per-ag array with a radix tree
2009-12-15 6:11 [PATCH 0/6] XFS: Fix growfs deadlocks and per-AG use after free V3 Dave Chinner
` (3 preceding siblings ...)
2009-12-15 6:11 ` [PATCH 4/6] XFS: convert remaining direct references to m_perag Dave Chinner
@ 2009-12-15 6:11 ` Dave Chinner
2009-12-23 16:02 ` Christoph Hellwig
2009-12-23 22:08 ` Alex Elder
2009-12-15 6:11 ` [PATCH 6/6] XFS: Reference count per-ag structures Dave Chinner
2009-12-20 4:28 ` [PATCH 7/6] XFS: Add trace points for per-ag refcount debugging Dave Chinner
6 siblings, 2 replies; 23+ messages in thread
From: Dave Chinner @ 2009-12-15 6:11 UTC (permalink / raw)
To: xfs
The use of an array for the per-ag structures requires reallocation of the
array when growing the filesystem. This requires locking access to the array to
avoid use after free situations, and the locking is difficult to get right. To
avoid needing to reallocate an array, change the per-ag structures to an
allocated object per ag and index them using a tree structure.
The AGs are always densely indexed (hence the use of an array), but the number
supported is 2^32 and lookups tend to be random and hence indexing needs to
scale. A simple choice is a radix tree - it works well with this sort of index.
This change also removes another large contiguous allocation from the
mount/growfs path in XFS.
The growing process now needs to change to only initialise the new AGs required
for the extra space, and as such only needs to exclusively lock the tree for
inserts. The rest of the code only needs to lock the tree while doing lookups,
and hence this will remove all the deadlocks that currently occur on the
m_perag_lock as it is now an innermost lock. The lock is also changed to a
spinlock from a read/write lock as the hold time is now extremely short.
To complete the picture, the per-ag structures will need to be reference
counted to ensure that we don't free/modify them while they are still in use.
This will be done in subsequent patch.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/xfs_alloc.c | 8 ------
fs/xfs/xfs_bmap.c | 7 +----
fs/xfs/xfs_filestream.c | 13 +++------
fs/xfs/xfs_fsops.c | 42 +++++++++++++++---------------
fs/xfs/xfs_ialloc.c | 25 +----------------
fs/xfs/xfs_itable.c | 4 ---
fs/xfs/xfs_mount.c | 65 +++++++++++++++++++++++++++++++++++-----------
fs/xfs/xfs_mount.h | 14 +++++++---
8 files changed, 87 insertions(+), 91 deletions(-)
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 3cb533c..e5679f3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2275,7 +2275,6 @@ xfs_alloc_vextent(
* These three force us into a single a.g.
*/
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
- down_read(&mp->m_peraglock);
args->pag = xfs_perag_get(mp, args->agno);
args->minleft = 0;
error = xfs_alloc_fix_freelist(args, 0);
@@ -2285,14 +2284,12 @@ xfs_alloc_vextent(
goto error0;
}
if (!args->agbp) {
- up_read(&mp->m_peraglock);
trace_xfs_alloc_vextent_noagbp(args);
break;
}
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
if ((error = xfs_alloc_ag_vextent(args)))
goto error0;
- up_read(&mp->m_peraglock);
break;
case XFS_ALLOCTYPE_START_BNO:
/*
@@ -2344,7 +2341,6 @@ xfs_alloc_vextent(
* Loop over allocation groups twice; first time with
* trylock set, second time without.
*/
- down_read(&mp->m_peraglock);
for (;;) {
args->pag = xfs_perag_get(mp, args->agno);
if (no_min) args->minleft = 0;
@@ -2407,7 +2403,6 @@ xfs_alloc_vextent(
}
xfs_perag_put(args->pag);
}
- up_read(&mp->m_peraglock);
if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
if (args->agno == sagno)
mp->m_agfrotor = (mp->m_agfrotor + 1) %
@@ -2437,7 +2432,6 @@ xfs_alloc_vextent(
return 0;
error0:
xfs_perag_put(args->pag);
- up_read(&mp->m_peraglock);
return error;
}
@@ -2462,7 +2456,6 @@ xfs_free_extent(
args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
ASSERT(args.agno < args.mp->m_sb.sb_agcount);
args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
- down_read(&args.mp->m_peraglock);
args.pag = xfs_perag_get(args.mp, args.agno);
if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
goto error0;
@@ -2474,7 +2467,6 @@ xfs_free_extent(
error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
error0:
xfs_perag_put(args.pag);
- up_read(&args.mp->m_peraglock);
return error;
}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index a9b95d9..7c6d9ac 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2629,14 +2629,12 @@ xfs_bmap_btalloc(
if (startag == NULLAGNUMBER)
startag = ag = 0;
notinit = 0;
- down_read(&mp->m_peraglock);
pag = xfs_perag_get(mp, ag);
while (blen < ap->alen) {
if (!pag->pagf_init &&
(error = xfs_alloc_pagf_init(mp, args.tp,
ag, XFS_ALLOC_FLAG_TRYLOCK))) {
xfs_perag_put(pag);
- up_read(&mp->m_peraglock);
return error;
}
/*
@@ -2669,10 +2667,8 @@ xfs_bmap_btalloc(
error = xfs_filestream_new_ag(ap, &ag);
xfs_perag_put(pag);
- if (error) {
- up_read(&mp->m_peraglock);
+ if (error)
return error;
- }
/* loop again to set 'blen'*/
startag = NULLAGNUMBER;
@@ -2688,7 +2684,6 @@ xfs_bmap_btalloc(
pag = xfs_perag_get(mp, ag);
}
xfs_perag_put(pag);
- up_read(&mp->m_peraglock);
/*
* Since the above loop did a BUF_TRYLOCK, it is
* possible that there is space for this request.
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index e61f2aa..914d00d 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -253,8 +253,7 @@ next_ag:
/*
* Set the allocation group number for a file or a directory, updating inode
- * references and per-AG references as appropriate. Must be called with the
- * m_peraglock held in read mode.
+ * references and per-AG references as appropriate.
*/
static int
_xfs_filestream_update_ag(
@@ -456,10 +455,10 @@ xfs_filestream_unmount(
}
/*
- * If the mount point's m_perag array is going to be reallocated, all
+ * If the mount point's m_perag tree is going to be modified, all
* outstanding cache entries must be flushed to avoid accessing reference count
* addresses that have been freed. The call to xfs_filestream_flush() must be
- * made inside the block that holds the m_peraglock in write mode to do the
+ * made inside the block that holds the m_perag_lock in write mode to do the
* reallocation.
*/
void
@@ -531,7 +530,6 @@ xfs_filestream_associate(
mp = pip->i_mount;
cache = mp->m_filestream;
- down_read(&mp->m_peraglock);
/*
* We have a problem, Houston.
@@ -548,10 +546,8 @@ xfs_filestream_associate(
*
* So, if we can't get the iolock without sleeping then just give up
*/
- if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) {
- up_read(&mp->m_peraglock);
+ if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL))
return 1;
- }
/* If the parent directory is already in the cache, use its AG. */
item = xfs_mru_cache_lookup(cache, pip->i_ino);
@@ -606,7 +602,6 @@ exit_did_pick:
exit:
xfs_iunlock(pip, XFS_IOLOCK_EXCL);
- up_read(&mp->m_peraglock);
return -err;
}
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a13919a..37a6f62 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -167,27 +167,14 @@ xfs_growfs_data_private(
}
new = nb - mp->m_sb.sb_dblocks;
oagcount = mp->m_sb.sb_agcount;
- if (nagcount > oagcount) {
- void *new_perag, *old_perag;
-
- xfs_filestream_flush(mp);
-
- new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
- KM_MAYFAIL);
- if (!new_perag)
- return XFS_ERROR(ENOMEM);
-
- down_write(&mp->m_peraglock);
- memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
- old_perag = mp->m_perag;
- mp->m_perag = new_perag;
-
- mp->m_flags |= XFS_MOUNT_32BITINODES;
- nagimax = xfs_initialize_perag(mp, nagcount);
- up_write(&mp->m_peraglock);
- kmem_free(old_perag);
+ /* allocate the new per-ag structures */
+ if (nagcount > oagcount) {
+ error = xfs_initialize_perag(mp, nagcount, &nagimax);
+ if (error)
+ return error;
}
+
tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
tp->t_flags |= XFS_TRANS_RESERVE;
if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
@@ -196,6 +183,11 @@ xfs_growfs_data_private(
return error;
}
+ /*
+ * Write new AG headers to disk. Non-transactional, but written
+ * synchronously so they are completed prior to the growfs transaction
+ * being logged.
+ */
nfree = 0;
for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
/*
@@ -359,6 +351,12 @@ xfs_growfs_data_private(
goto error0;
}
}
+
+ /*
+ * Update changed superblock fields transactionally. These are not
+ * seen by the rest of the world until the transaction commit applies
+ * them atomically to the superblock.
+ */
if (nagcount > oagcount)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
if (nb > mp->m_sb.sb_dblocks)
@@ -369,9 +367,9 @@ xfs_growfs_data_private(
if (dpct)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
error = xfs_trans_commit(tp, 0);
- if (error) {
+ if (error)
return error;
- }
+
/* New allocation groups fully initialized, so update mount struct */
if (nagimax)
mp->m_maxagi = nagimax;
@@ -381,6 +379,8 @@ xfs_growfs_data_private(
mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
} else
mp->m_maxicount = 0;
+
+ /* update secondary superblocks. */
for (agno = 1; agno < nagcount; agno++) {
error = xfs_read_buf(mp, mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 884ee13..52c9d00 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -383,11 +383,9 @@ xfs_ialloc_ag_alloc(
newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
be32_add_cpu(&agi->agi_count, newlen);
be32_add_cpu(&agi->agi_freecount, newlen);
- down_read(&args.mp->m_peraglock);
pag = xfs_perag_get(args.mp, agno);
pag->pagi_freecount += newlen;
xfs_perag_put(pag);
- up_read(&args.mp->m_peraglock);
agi->agi_newino = cpu_to_be32(newino);
/*
@@ -489,7 +487,6 @@ xfs_ialloc_ag_select(
*/
agno = pagno;
flags = XFS_ALLOC_FLAG_TRYLOCK;
- down_read(&mp->m_peraglock);
for (;;) {
pag = xfs_perag_get(mp, agno);
if (!pag->pagi_init) {
@@ -531,7 +528,6 @@ xfs_ialloc_ag_select(
goto nextag;
}
xfs_perag_put(pag);
- up_read(&mp->m_peraglock);
return agbp;
}
}
@@ -544,18 +540,14 @@ nextag:
* No point in iterating over the rest, if we're shutting
* down.
*/
- if (XFS_FORCED_SHUTDOWN(mp)) {
- up_read(&mp->m_peraglock);
+ if (XFS_FORCED_SHUTDOWN(mp))
return NULL;
- }
agno++;
if (agno >= agcount)
agno = 0;
if (agno == pagno) {
- if (flags == 0) {
- up_read(&mp->m_peraglock);
+ if (flags == 0)
return NULL;
- }
flags = 0;
}
}
@@ -777,16 +769,13 @@ nextag:
*inop = NULLFSINO;
return noroom ? ENOSPC : 0;
}
- down_read(&mp->m_peraglock);
pag = xfs_perag_get(mp, tagno);
if (pag->pagi_inodeok == 0) {
xfs_perag_put(pag);
- up_read(&mp->m_peraglock);
goto nextag;
}
error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
xfs_perag_put(pag);
- up_read(&mp->m_peraglock);
if (error)
goto nextag;
agi = XFS_BUF_TO_AGI(agbp);
@@ -1015,9 +1004,7 @@ alloc_inode:
goto error0;
be32_add_cpu(&agi->agi_freecount, -1);
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
- down_read(&mp->m_peraglock);
pag->pagi_freecount--;
- up_read(&mp->m_peraglock);
error = xfs_check_agi_freecount(cur, agi);
if (error)
@@ -1100,9 +1087,7 @@ xfs_difree(
/*
* Get the allocation group header.
*/
- down_read(&mp->m_peraglock);
error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
- up_read(&mp->m_peraglock);
if (error) {
cmn_err(CE_WARN,
"xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
@@ -1169,11 +1154,9 @@ xfs_difree(
be32_add_cpu(&agi->agi_count, -ilen);
be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
- down_read(&mp->m_peraglock);
pag = xfs_perag_get(mp, agno);
pag->pagi_freecount -= ilen - 1;
xfs_perag_put(pag);
- up_read(&mp->m_peraglock);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
@@ -1202,11 +1185,9 @@ xfs_difree(
*/
be32_add_cpu(&agi->agi_freecount, 1);
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
- down_read(&mp->m_peraglock);
pag = xfs_perag_get(mp, agno);
pag->pagi_freecount++;
xfs_perag_put(pag);
- up_read(&mp->m_peraglock);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
}
@@ -1328,9 +1309,7 @@ xfs_imap(
xfs_buf_t *agbp; /* agi buffer */
int i; /* temp state */
- down_read(&mp->m_peraglock);
error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
- up_read(&mp->m_peraglock);
if (error) {
xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
"xfs_ialloc_read_agi() returned "
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 62efab2..940307a 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -420,9 +420,7 @@ xfs_bulkstat(
while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
cond_resched();
bp = NULL;
- down_read(&mp->m_peraglock);
error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- up_read(&mp->m_peraglock);
if (error) {
/*
* Skip this allocation group and go to the next one.
@@ -849,9 +847,7 @@ xfs_inumbers(
agbp = NULL;
while (left > 0 && agno < mp->m_sb.sb_agcount) {
if (agbp == NULL) {
- down_read(&mp->m_peraglock);
error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- up_read(&mp->m_peraglock);
if (error) {
/*
* If we can't read the AGI of this ag,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4739c2c..73d61d4 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -209,13 +209,16 @@ STATIC void
xfs_free_perag(
xfs_mount_t *mp)
{
- if (mp->m_perag) {
- int agno;
+ xfs_agnumber_t agno;
+ struct xfs_perag *pag;
- for (agno = 0; agno < mp->m_maxagi; agno++)
- if (mp->m_perag[agno].pagb_list)
- kmem_free(mp->m_perag[agno].pagb_list);
- kmem_free(mp->m_perag);
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ spin_lock(&mp->m_perag_lock);
+ pag = radix_tree_delete(&mp->m_perag_tree, agno);
+ spin_unlock(&mp->m_perag_lock);
+ ASSERT(pag);
+ kmem_free(pag->pagb_list);
+ kmem_free(pag);
}
}
@@ -389,10 +392,11 @@ xfs_initialize_perag_icache(
}
}
-xfs_agnumber_t
+int
xfs_initialize_perag(
xfs_mount_t *mp,
- xfs_agnumber_t agcount)
+ xfs_agnumber_t agcount,
+ xfs_agnumber_t *maxagi)
{
xfs_agnumber_t index, max_metadata;
xfs_perag_t *pag;
@@ -405,6 +409,33 @@ xfs_initialize_perag(
agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
+ /*
+ * Walk the current per-ag tree so we don't try to initialise AGs
+ * that already exist (growfs case). Allocate and insert all the
+ * AGs we don't find ready for initialisation.
+ */
+ for (index = 0; index < agcount; index++) {
+ pag = xfs_perag_get(mp, index);
+ if (pag) {
+ xfs_perag_put(pag);
+ continue;
+ }
+ pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
+ if (!pag)
+ return -ENOMEM;
+ if (radix_tree_preload(GFP_NOFS))
+ return -ENOMEM;
+ spin_lock(&mp->m_perag_lock);
+ if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
+ BUG();
+ spin_unlock(&mp->m_perag_lock);
+ kmem_free(pag);
+ return -EEXIST;
+ }
+ spin_unlock(&mp->m_perag_lock);
+ radix_tree_preload_end();
+ }
+
/* Clear the mount flag if no inode can overflow 32 bits
* on this filesystem, or if specifically requested..
*/
@@ -454,7 +485,9 @@ xfs_initialize_perag(
xfs_perag_put(pag);
}
}
- return index;
+ if (maxagi)
+ *maxagi = index;
+ return 0;
}
void
@@ -1155,13 +1188,13 @@ xfs_mountfs(
/*
* Allocate and initialize the per-ag data.
*/
- init_rwsem(&mp->m_peraglock);
- mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t),
- KM_MAYFAIL);
- if (!mp->m_perag)
+ spin_lock_init(&mp->m_perag_lock);
+ INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
+ error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
+ if (error) {
+ cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
goto out_remove_uuid;
-
- mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
+ }
if (!sbp->sb_logblocks) {
cmn_err(CE_WARN, "XFS: no log defined");
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f8a68a2..cfa7a5d 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -207,8 +207,8 @@ typedef struct xfs_mount {
uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
uint m_in_maxlevels; /* max inobt btree levels. */
- struct xfs_perag *m_perag; /* per-ag accounting info */
- struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */
+ struct radix_tree_root m_perag_tree; /* per-ag accounting info */
+ spinlock_t m_perag_lock; /* lock for m_perag_tree */
struct mutex m_growlock; /* growfs mutex */
int m_fixedfsid[2]; /* unchanged for life of FS */
uint m_dmevmask; /* DMI events for this FS */
@@ -389,7 +389,12 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
static inline struct xfs_perag *
xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
{
- return &mp->m_perag[agno];
+ struct xfs_perag *pag;
+
+ spin_lock(&mp->m_perag_lock);
+ pag = radix_tree_lookup(&mp->m_perag_tree, agno);
+ spin_unlock(&mp->m_perag_lock);
+ return pag;
}
static inline void
@@ -450,7 +455,8 @@ extern struct xfs_dmops xfs_dmcore_xfs;
#endif /* __KERNEL__ */
extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
-extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t);
+extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
+ xfs_agnumber_t *);
extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
--
1.6.5
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 6/6] XFS: Reference count per-ag structures
2009-12-15 6:11 [PATCH 0/6] XFS: Fix growfs deadlocks and per-AG use after free V3 Dave Chinner
` (4 preceding siblings ...)
2009-12-15 6:11 ` [PATCH 5/6] XFS: Replace per-ag array with a radix tree Dave Chinner
@ 2009-12-15 6:11 ` Dave Chinner
2009-12-23 22:12 ` Alex Elder
2009-12-20 4:28 ` [PATCH 7/6] XFS: Add trace points for per-ag refcount debugging Dave Chinner
6 siblings, 1 reply; 23+ messages in thread
From: Dave Chinner @ 2009-12-15 6:11 UTC (permalink / raw)
To: xfs
Reference count the per-ag structures to ensure that we keep get/put
pairs balanced. Assert that the reference counts are zero at unmount
time to catch leaks. In future, reference counts will enable us to
safely remove perag structures by allowing us to detect when they
are no longer in use.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/xfs_ag.h | 4 ++--
fs/xfs/xfs_mount.c | 1 +
fs/xfs/xfs_mount.h | 11 +++++++++--
3 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 6702bd8..18ae43f 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -196,8 +196,8 @@ typedef struct xfs_perag_busy {
#define XFS_PAGB_NUM_SLOTS 128
#endif
-typedef struct xfs_perag
-{
+typedef struct xfs_perag {
+ atomic_t pag_ref; /* perag reference count */
char pagf_init; /* this agf's entry is initialized */
char pagi_init; /* this agi's entry is initialized */
char pagf_metadata; /* the agf is preferred to be metadata */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 73d61d4..1d00f7f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -215,6 +215,7 @@ xfs_free_perag(
for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
spin_lock(&mp->m_perag_lock);
pag = radix_tree_delete(&mp->m_perag_tree, agno);
+ ASSERT(atomic_read(&pag->pag_ref) == 0);
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
kmem_free(pag->pagb_list);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index cfa7a5d..16b2212 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -384,7 +384,7 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
}
/*
- * perag get/put wrappers for eventual ref counting
+ * perag get/put wrappers for ref counting
*/
static inline struct xfs_perag *
xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
@@ -393,6 +393,12 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
spin_lock(&mp->m_perag_lock);
pag = radix_tree_lookup(&mp->m_perag_tree, agno);
+ if (pag) {
+ ASSERT(atomic_read(&pag->pag_ref) >= 0);
+ /* catch leaks in the positive direction during testing */
+ ASSERT(atomic_read(&pag->pag_ref) < 1000);
+ atomic_inc(&pag->pag_ref);
+ }
spin_unlock(&mp->m_perag_lock);
return pag;
}
@@ -400,7 +406,8 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
static inline void
xfs_perag_put(struct xfs_perag *pag)
{
- /* nothing to see here, move along */
+ ASSERT(atomic_read(&pag->pag_ref) > 0);
+ atomic_dec(&pag->pag_ref);
}
/*
--
1.6.5
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 2/6] XFS: Don't directly reference m_perag in allocation code
2009-12-15 6:11 ` [PATCH 2/6] XFS: Don't directly reference m_perag in allocation code Dave Chinner
@ 2009-12-20 4:24 ` Dave Chinner
2009-12-22 14:18 ` Alex Elder
1 sibling, 0 replies; 23+ messages in thread
From: Dave Chinner @ 2009-12-20 4:24 UTC (permalink / raw)
To: xfs
On Tue, Dec 15, 2009 at 05:11:13PM +1100, Dave Chinner wrote:
> Start abstracting the perag references so that the indexing of the
> structures is not directly coded into all the places that uses the
> perag structures. This will allow us to separate the use of the
> perag structure and the way it is indexed and hence avoid the known
> deadlocks related to growing a busy filesystem.
>
> Signed-off-by: Dave Chinner <david@fromorbit.com>
I screwed up the rebase so this patch is missing a xfs_perag_put()
call in xfs_alloc_get_freelist(), so will assert fail on unmount in
test 042. Corrected patch below.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
XFS: Don't directly reference m_perag in allocation code
Start abstracting the perag references so that the indexing of the
structures is not directly coded into all the places that uses the
perag structures. This will allow us to separate the use of the
perag structure and the way it is indexed and hence avoid the known
deadlocks related to growing a busy filesystem.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/xfs_alloc.c | 82 ++++++++++++++++++++++++++--------------------
fs/xfs/xfs_alloc_btree.c | 9 ++++-
2 files changed, 53 insertions(+), 38 deletions(-)
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a1c65fc..d39ee2d 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1662,11 +1662,13 @@ xfs_free_ag_extent(
xfs_agf_t *agf;
xfs_perag_t *pag; /* per allocation group data */
+ pag = xfs_perag_get(mp, agno);
+ pag->pagf_freeblks += len;
+ xfs_perag_put(pag);
+
agf = XFS_BUF_TO_AGF(agbp);
- pag = &mp->m_perag[agno];
be32_add_cpu(&agf->agf_freeblks, len);
xfs_trans_agblocks_delta(tp, len);
- pag->pagf_freeblks += len;
XFS_WANT_CORRUPTED_GOTO(
be32_to_cpu(agf->agf_freeblks) <=
be32_to_cpu(agf->agf_length),
@@ -1969,10 +1971,12 @@ xfs_alloc_get_freelist(
xfs_trans_brelse(tp, agflbp);
if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
agf->agf_flfirst = 0;
- pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
+
+ pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
be32_add_cpu(&agf->agf_flcount, -1);
xfs_trans_agflist_delta(tp, -1);
pag->pagf_flcount--;
+ xfs_perag_put(pag);
logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
if (btreeblk) {
@@ -2078,7 +2082,8 @@ xfs_alloc_put_freelist(
be32_add_cpu(&agf->agf_fllast, 1);
if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
agf->agf_fllast = 0;
- pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
+
+ pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
be32_add_cpu(&agf->agf_flcount, 1);
xfs_trans_agflist_delta(tp, 1);
pag->pagf_flcount++;
@@ -2089,6 +2094,7 @@ xfs_alloc_put_freelist(
pag->pagf_btreeblks--;
logflags |= XFS_AGF_BTREEBLKS;
}
+ xfs_perag_put(pag);
xfs_alloc_log_agf(tp, agbp, logflags);
@@ -2152,7 +2158,6 @@ xfs_read_agf(
xfs_trans_brelse(tp, *bpp);
return XFS_ERROR(EFSCORRUPTED);
}
-
XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
return 0;
}
@@ -2184,7 +2189,7 @@ xfs_alloc_read_agf(
ASSERT(!XFS_BUF_GETERROR(*bpp));
agf = XFS_BUF_TO_AGF(*bpp);
- pag = &mp->m_perag[agno];
+ pag = xfs_perag_get(mp, agno);
if (!pag->pagf_init) {
pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
@@ -2211,6 +2216,7 @@ xfs_alloc_read_agf(
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
}
#endif
+ xfs_perag_put(pag);
return 0;
}
@@ -2271,7 +2277,7 @@ xfs_alloc_vextent(
*/
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
down_read(&mp->m_peraglock);
- args->pag = &mp->m_perag[args->agno];
+ args->pag = xfs_perag_get(mp, args->agno);
args->minleft = 0;
error = xfs_alloc_fix_freelist(args, 0);
args->minleft = minleft;
@@ -2341,7 +2347,7 @@ xfs_alloc_vextent(
*/
down_read(&mp->m_peraglock);
for (;;) {
- args->pag = &mp->m_perag[args->agno];
+ args->pag = xfs_perag_get(mp, args->agno);
if (no_min) args->minleft = 0;
error = xfs_alloc_fix_freelist(args, flags);
args->minleft = minleft;
@@ -2400,6 +2406,7 @@ xfs_alloc_vextent(
}
}
}
+ xfs_perag_put(args->pag);
}
up_read(&mp->m_peraglock);
if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
@@ -2427,8 +2434,10 @@ xfs_alloc_vextent(
args->len);
#endif
}
+ xfs_perag_put(args->pag);
return 0;
error0:
+ xfs_perag_put(args->pag);
up_read(&mp->m_peraglock);
return error;
}
@@ -2455,7 +2464,7 @@ xfs_free_extent(
ASSERT(args.agno < args.mp->m_sb.sb_agcount);
args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
down_read(&args.mp->m_peraglock);
- args.pag = &args.mp->m_perag[args.agno];
+ args.pag = xfs_perag_get(args.mp, args.agno);
if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
goto error0;
#ifdef DEBUG
@@ -2465,6 +2474,7 @@ xfs_free_extent(
#endif
error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
error0:
+ xfs_perag_put(args.pag);
up_read(&args.mp->m_peraglock);
return error;
}
@@ -2486,15 +2496,15 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
xfs_agblock_t bno,
xfs_extlen_t len)
{
- xfs_mount_t *mp;
xfs_perag_busy_t *bsy;
+ struct xfs_perag *pag;
int n;
- mp = tp->t_mountp;
- spin_lock(&mp->m_perag[agno].pagb_lock);
+ pag = xfs_perag_get(tp->t_mountp, agno);
+ spin_lock(&pag->pagb_lock);
/* search pagb_list for an open slot */
- for (bsy = mp->m_perag[agno].pagb_list, n = 0;
+ for (bsy = pag->pagb_list, n = 0;
n < XFS_PAGB_NUM_SLOTS;
bsy++, n++) {
if (bsy->busy_tp == NULL) {
@@ -2502,11 +2512,11 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
}
}
- trace_xfs_alloc_busy(mp, agno, bno, len, n);
+ trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n);
if (n < XFS_PAGB_NUM_SLOTS) {
- bsy = &mp->m_perag[agno].pagb_list[n];
- mp->m_perag[agno].pagb_count++;
+ bsy = &pag->pagb_list[n];
+ pag->pagb_count++;
bsy->busy_start = bno;
bsy->busy_length = len;
bsy->busy_tp = tp;
@@ -2521,7 +2531,8 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
xfs_trans_set_sync(tp);
}
- spin_unlock(&mp->m_perag[agno].pagb_lock);
+ spin_unlock(&pag->pagb_lock);
+ xfs_perag_put(pag);
}
void
@@ -2529,24 +2540,23 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
xfs_agnumber_t agno,
int idx)
{
- xfs_mount_t *mp;
+ struct xfs_perag *pag;
xfs_perag_busy_t *list;
- mp = tp->t_mountp;
-
- spin_lock(&mp->m_perag[agno].pagb_lock);
- list = mp->m_perag[agno].pagb_list;
-
ASSERT(idx < XFS_PAGB_NUM_SLOTS);
+ pag = xfs_perag_get(tp->t_mountp, agno);
+ spin_lock(&pag->pagb_lock);
+ list = pag->pagb_list;
- trace_xfs_alloc_unbusy(mp, agno, idx, list[idx].busy_tp == tp);
+ trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp);
if (list[idx].busy_tp == tp) {
list[idx].busy_tp = NULL;
- mp->m_perag[agno].pagb_count--;
+ pag->pagb_count--;
}
- spin_unlock(&mp->m_perag[agno].pagb_lock);
+ spin_unlock(&pag->pagb_lock);
+ xfs_perag_put(pag);
}
@@ -2560,21 +2570,20 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
xfs_agblock_t bno,
xfs_extlen_t len)
{
- xfs_mount_t *mp;
+ struct xfs_perag *pag;
xfs_perag_busy_t *bsy;
xfs_agblock_t uend, bend;
xfs_lsn_t lsn;
int cnt;
- mp = tp->t_mountp;
-
- spin_lock(&mp->m_perag[agno].pagb_lock);
- cnt = mp->m_perag[agno].pagb_count;
+ pag = xfs_perag_get(tp->t_mountp, agno);
+ spin_lock(&pag->pagb_lock);
+ cnt = pag->pagb_count;
uend = bno + len - 1;
/* search pagb_list for this slot, skipping open slots */
- for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) {
+ for (bsy = pag->pagb_list; cnt; bsy++) {
/*
* (start1,length1) within (start2, length2)
@@ -2589,7 +2598,7 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
}
}
- trace_xfs_alloc_busysearch(mp, agno, bno, len, !!cnt);
+ trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, !!cnt);
/*
* If a block was found, force the log through the LSN of the
@@ -2597,9 +2606,10 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
*/
if (cnt) {
lsn = bsy->busy_tp->t_commit_lsn;
- spin_unlock(&mp->m_perag[agno].pagb_lock);
- xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
+ spin_unlock(&pag->pagb_lock);
+ xfs_log_force(tp->t_mountp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
} else {
- spin_unlock(&mp->m_perag[agno].pagb_lock);
+ spin_unlock(&pag->pagb_lock);
}
+ xfs_perag_put(pag);
}
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index adbd914..b726e10 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -61,12 +61,14 @@ xfs_allocbt_set_root(
struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
int btnum = cur->bc_btnum;
+ struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
ASSERT(ptr->s != 0);
agf->agf_roots[btnum] = ptr->s;
be32_add_cpu(&agf->agf_levels[btnum], inc);
- cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc;
+ pag->pagf_levels[btnum] += inc;
+ xfs_perag_put(pag);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
}
@@ -150,6 +152,7 @@ xfs_allocbt_update_lastrec(
{
struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ struct xfs_perag *pag;
__be32 len;
int numrecs;
@@ -193,7 +196,9 @@ xfs_allocbt_update_lastrec(
}
agf->agf_longest = len;
- cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len);
+ pag = xfs_perag_get(cur->bc_mp, seqno);
+ pag->pagf_longest = be32_to_cpu(len);
+ xfs_perag_put(pag);
xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
}
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH 7/6] XFS: Add trace points for per-ag refcount debugging.
2009-12-15 6:11 [PATCH 0/6] XFS: Fix growfs deadlocks and per-AG use after free V3 Dave Chinner
` (5 preceding siblings ...)
2009-12-15 6:11 ` [PATCH 6/6] XFS: Reference count per-ag structures Dave Chinner
@ 2009-12-20 4:28 ` Dave Chinner
2010-01-05 18:08 ` Alex Elder
6 siblings, 1 reply; 23+ messages in thread
From: Dave Chinner @ 2009-12-20 4:28 UTC (permalink / raw)
To: xfs
Uninline xfs_perag_{get,put} so that tracepoints can be inserted
into them to speed debugging of reference count problems.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/linux-2.6/xfs_trace.h | 27 +++++++++++++++++++++++++++
fs/xfs/xfs_ag.h | 2 ++
fs/xfs/xfs_mount.c | 34 ++++++++++++++++++++++++++++++++++
fs/xfs/xfs_mount.h | 25 ++-----------------------
4 files changed, 65 insertions(+), 23 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index d4ded59..5ec1475 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -33,6 +33,33 @@ struct xfs_dquot;
struct xlog_ticket;
struct log;
+#define DEFINE_PERAG_REF_EVENT(name) \
+TRACE_EVENT(name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
+ unsigned long caller_ip), \
+ TP_ARGS(mp, agno, refcount, caller_ip), \
+ TP_STRUCT__entry( \
+ __field(dev_t, dev) \
+ __field(xfs_agnumber_t, agno) \
+ __field(int, refcount) \
+ __field(unsigned long, caller_ip) \
+ ), \
+ TP_fast_assign( \
+ __entry->dev = mp->m_super->s_dev; \
+ __entry->agno = agno; \
+ __entry->refcount = refcount; \
+ __entry->caller_ip = caller_ip; \
+ ), \
+ TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
+ MAJOR(__entry->dev), MINOR(__entry->dev), \
+ __entry->agno, \
+ __entry->refcount, \
+ (char *)__entry->caller_ip) \
+);
+
+DEFINE_PERAG_REF_EVENT(xfs_perag_get)
+DEFINE_PERAG_REF_EVENT(xfs_perag_put)
+
#define DEFINE_ATTR_LIST_EVENT(name) \
TRACE_EVENT(name, \
TP_PROTO(struct xfs_attr_list_context *ctx), \
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 18ae43f..963bc27 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -197,6 +197,8 @@ typedef struct xfs_perag_busy {
#endif
typedef struct xfs_perag {
+ struct xfs_mount *pag_mount; /* owner filesystem */
+ xfs_agnumber_t pag_agno; /* AG this structure belongs to */
atomic_t pag_ref; /* perag reference count */
char pagf_init; /* this agf's entry is initialized */
char pagi_init; /* this agi's entry is initialized */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 1d00f7f..223d9c3 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -201,6 +201,38 @@ xfs_uuid_unmount(
/*
+ * Reference counting access wrappers to the perag structures.
+ */
+struct xfs_perag *
+xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag;
+ int ref = 0;
+
+ spin_lock(&mp->m_perag_lock);
+ pag = radix_tree_lookup(&mp->m_perag_tree, agno);
+ if (pag) {
+ ASSERT(atomic_read(&pag->pag_ref) >= 0);
+ /* catch leaks in the positive direction during testing */
+ ASSERT(atomic_read(&pag->pag_ref) < 1000);
+ ref = atomic_inc_return(&pag->pag_ref);
+ }
+ spin_unlock(&mp->m_perag_lock);
+ trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
+ return pag;
+}
+
+void
+xfs_perag_put(struct xfs_perag *pag)
+{
+ int ref;
+
+ ASSERT(atomic_read(&pag->pag_ref) > 0);
+ ref = atomic_dec_return(&pag->pag_ref);
+ trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
+}
+
+/*
* Free up the resources associated with a mount structure. Assume that
* the structure was initially zeroed, so we can tell which fields got
* initialized.
@@ -433,6 +465,8 @@ xfs_initialize_perag(
kmem_free(pag);
return -EEXIST;
}
+ pag->pag_agno = index;
+ pag->pag_mount = mp;
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 16b2212..e62fd1c 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -386,29 +386,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
/*
* perag get/put wrappers for ref counting
*/
-static inline struct xfs_perag *
-xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
-{
- struct xfs_perag *pag;
-
- spin_lock(&mp->m_perag_lock);
- pag = radix_tree_lookup(&mp->m_perag_tree, agno);
- if (pag) {
- ASSERT(atomic_read(&pag->pag_ref) >= 0);
- /* catch leaks in the positive direction during testing */
- ASSERT(atomic_read(&pag->pag_ref) < 1000);
- atomic_inc(&pag->pag_ref);
- }
- spin_unlock(&mp->m_perag_lock);
- return pag;
-}
-
-static inline void
-xfs_perag_put(struct xfs_perag *pag)
-{
- ASSERT(atomic_read(&pag->pag_ref) > 0);
- atomic_dec(&pag->pag_ref);
-}
+struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
+void xfs_perag_put(struct xfs_perag *pag);
/*
* Per-cpu superblock locking functions
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 23+ messages in thread
* RE: [PATCH 1/6] XFS: rename xfs_get_perag
2009-12-15 6:11 ` [PATCH 1/6] XFS: rename xfs_get_perag Dave Chinner
@ 2009-12-21 22:21 ` Alex Elder
2009-12-21 22:40 ` Dave Chinner
0 siblings, 1 reply; 23+ messages in thread
From: Alex Elder @ 2009-12-21 22:21 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
Dave Chinner wrote:
> xfs_get_perag is really getting the perag that an inode
> belongs to based on it's inode number. Convert the use of this
> function to just get the perag from a provided ag number.
> Use this new function to obtain the per-ag structure when
> traversing the per AG inode trees for sync and reclaim.
General
- I like that you now use balanced get/put calls in some places
that previously "got" the ag reference directly (i.e., open
coded), but then used the put interface to release it.
- I do prefer the xfs_perag_get/put naming convention you use (FYI)
But a real question...
- Why is there no matching xfs_perag_put() in xfs_iflush_cluster()?
(It was that way before. I only superficially read that part
of the code so I'm probably just missing something.)
The change looks good but I'd like to know the answer to that
before I take it. Thanks.
-Alex
> Signed-off-by: Dave Chinner <david@fromorbit.com>
> ---
> fs/xfs/linux-2.6/xfs_sync.c | 33 +++++++++++++++++++++------------
> fs/xfs/xfs_iget.c | 10 +++++-----
> fs/xfs/xfs_inode.c | 8 +++++---
> fs/xfs/xfs_mount.h | 8 ++++----
> 4 files changed, 35 insertions(+), 24 deletions(-)
>
> diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
> index 6fed97a..0e17683 100644
> --- a/fs/xfs/linux-2.6/xfs_sync.c
> +++ b/fs/xfs/linux-2.6/xfs_sync.c
> @@ -96,13 +96,12 @@ unlock:
> STATIC int
> xfs_inode_ag_walk(
> struct xfs_mount *mp,
> - xfs_agnumber_t ag,
> + struct xfs_perag *pag,
> int (*execute)(struct xfs_inode *ip,
> struct xfs_perag *pag, int flags),
> int flags,
> int tag)
> {
> - struct xfs_perag *pag = &mp->m_perag[ag];
> uint32_t first_index;
> int last_error = 0;
> int skipped;
> @@ -137,8 +136,6 @@ restart:
> delay(1);
> goto restart;
> }
> -
> - xfs_put_perag(mp, pag);
> return last_error;
> }
>
> @@ -155,9 +152,15 @@ xfs_inode_ag_iterator(
> xfs_agnumber_t ag;
>
> for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
> - if (!mp->m_perag[ag].pag_ici_init)
> + struct xfs_perag *pag;
> +
> + pag = xfs_perag_get(mp, ag);
> + if (!pag->pag_ici_init) {
> + xfs_perag_put(pag);
> continue;
> - error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
> + }
> + error = xfs_inode_ag_walk(mp, pag, execute, flags, tag);
> + xfs_perag_put(pag);
> if (error) {
> last_error = error;
> if (error == EFSCORRUPTED)
> @@ -669,25 +672,30 @@ xfs_reclaim_inode(
> xfs_inode_t *ip,
> int sync_mode)
> {
> - xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_perag *pag;
> +
>
> - /* The hash lock here protects a thread in xfs_iget_core from
> + /*
> + * The radix tree lock here protects a thread in xfs_iget_core from
> * racing with us on linking the inode back with a vnode.
> * Once we have the XFS_IRECLAIM flag set it will not touch
> * us.
> */
> + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
> write_lock(&pag->pag_ici_lock);
> spin_lock(&ip->i_flags_lock);
> if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
> !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
> spin_unlock(&ip->i_flags_lock);
> write_unlock(&pag->pag_ici_lock);
> + xfs_perag_put(pag);
This is a good catch.
> return -EAGAIN;
> }
> __xfs_iflags_set(ip, XFS_IRECLAIM);
> spin_unlock(&ip->i_flags_lock);
> write_unlock(&pag->pag_ici_lock);
> - xfs_put_perag(ip->i_mount, pag);
> + xfs_perag_put(pag);
>
> /*
> * If the inode is still dirty, then flush it out. If the inode
> @@ -737,16 +745,17 @@ void
> xfs_inode_set_reclaim_tag(
> xfs_inode_t *ip)
> {
> - xfs_mount_t *mp = ip->i_mount;
> - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_perag *pag;
>
> + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
> read_lock(&pag->pag_ici_lock);
> spin_lock(&ip->i_flags_lock);
> __xfs_inode_set_reclaim_tag(pag, ip);
> __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
> spin_unlock(&ip->i_flags_lock);
> read_unlock(&pag->pag_ici_lock);
> - xfs_put_perag(mp, pag);
> + xfs_perag_put(pag);
> }
>
> void
> diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
> index f5c904a..d1e7250 100644
> --- a/fs/xfs/xfs_iget.c
> +++ b/fs/xfs/xfs_iget.c
> @@ -375,7 +375,7 @@ xfs_iget(
> return EINVAL;
>
> /* get the perag structure and ensure that it's inode capable */
> - pag = xfs_get_perag(mp, ino);
> + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
> if (!pag->pagi_inodeok)
> return EINVAL;
> ASSERT(pag->pag_ici_init);
> @@ -399,7 +399,7 @@ again:
> if (error)
> goto out_error_or_again;
> }
> - xfs_put_perag(mp, pag);
> + xfs_perag_put(pag);
>
> *ipp = ip;
>
> @@ -418,7 +418,7 @@ out_error_or_again:
> delay(1);
> goto again;
> }
> - xfs_put_perag(mp, pag);
> + xfs_perag_put(pag);
> return error;
> }
>
> @@ -486,11 +486,11 @@ xfs_ireclaim(
> * if it was never added to it because radix_tree_delete can deal
> * with that case just fine.
> */
> - pag = xfs_get_perag(mp, ip->i_ino);
> + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
> write_lock(&pag->pag_ici_lock);
> radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
> write_unlock(&pag->pag_ici_lock);
> - xfs_put_perag(mp, pag);
> + xfs_perag_put(pag);
>
> /*
> * Here we do an (almost) spurious inode lock in order to coordinate
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index ce278b3..1f69dda 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -1946,8 +1946,9 @@ xfs_ifree_cluster(
> xfs_inode_t *ip, **ip_found;
> xfs_inode_log_item_t *iip;
> xfs_log_item_t *lip;
> - xfs_perag_t *pag = xfs_get_perag(mp, inum);
> + struct xfs_perag *pag;
>
> + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
> if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
> blks_per_cluster = 1;
> ninodes = mp->m_sb.sb_inopblock;
> @@ -2088,7 +2089,7 @@ xfs_ifree_cluster(
> }
>
> kmem_free(ip_found);
> - xfs_put_perag(mp, pag);
> + xfs_perag_put(pag);
> }
>
> /*
> @@ -2675,7 +2676,7 @@ xfs_iflush_cluster(
> xfs_buf_t *bp)
> {
> xfs_mount_t *mp = ip->i_mount;
> - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
> + struct xfs_perag *pag;
> unsigned long first_index, mask;
> unsigned long inodes_per_cluster;
> int ilist_size;
> @@ -2686,6 +2687,7 @@ xfs_iflush_cluster(
> int bufwasdelwri;
> int i;
>
> + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
> ASSERT(pag->pagi_inodeok);
> ASSERT(pag->pag_ici_init);
>
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 1df7e45..f8a68a2 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -386,14 +386,14 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
> /*
> * perag get/put wrappers for eventual ref counting
> */
> -static inline xfs_perag_t *
> -xfs_get_perag(struct xfs_mount *mp, xfs_ino_t ino)
> +static inline struct xfs_perag *
> +xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
> {
> - return &mp->m_perag[XFS_INO_TO_AGNO(mp, ino)];
> + return &mp->m_perag[agno];
> }
>
> static inline void
> -xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag)
> +xfs_perag_put(struct xfs_perag *pag)
> {
> /* nothing to see here, move along */
> }
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/6] XFS: rename xfs_get_perag
2009-12-21 22:21 ` Alex Elder
@ 2009-12-21 22:40 ` Dave Chinner
0 siblings, 0 replies; 23+ messages in thread
From: Dave Chinner @ 2009-12-21 22:40 UTC (permalink / raw)
To: Alex Elder; +Cc: xfs
On Mon, Dec 21, 2009 at 04:21:13PM -0600, Alex Elder wrote:
> Dave Chinner wrote:
> > xfs_get_perag is really getting the perag that an inode
> > belongs to based on it's inode number. Convert the use of this
> > function to just get the perag from a provided ag number.
> > Use this new function to obtain the per-ag structure when
> > traversing the per AG inode trees for sync and reclaim.
>
> General
> - I like that you now use balanced get/put calls in some places
> that previously "got" the ag reference directly (i.e., open
> coded), but then used the put interface to release it.
> - I do prefer the xfs_perag_get/put naming convention you use (FYI)
>
> But a real question...
> - Why is there no matching xfs_perag_put() in xfs_iflush_cluster()?
> (It was that way before. I only superficially read that part
> of the code so I'm probably just missing something.)
Because this patch is only really converting existing users, not
fixing unbalance bugs in th existing get/put calls. The balancing
bugs are fixed in a later patch ("XFS: convert remaining direct
references to m_perag" IIRC). There are several other balancing bugs
fixed in that patch.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* RE: [PATCH 2/6] XFS: Don't directly reference m_perag in allocation code
2009-12-15 6:11 ` [PATCH 2/6] XFS: Don't directly reference m_perag in allocation code Dave Chinner
2009-12-20 4:24 ` Dave Chinner
@ 2009-12-22 14:18 ` Alex Elder
1 sibling, 0 replies; 23+ messages in thread
From: Alex Elder @ 2009-12-22 14:18 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
Dave Chinner wrote:
> Start abstracting the perag references so that the indexing of the
> structures is not directly coded into all the places that uses the
> perag structures. This will allow us to separate the use of the
> perag structure and the way it is indexed and hence avoid the known
> deadlocks related to growing a busy filesystem.
Looks good. I think I would have preferred that you
added the matching "put" calls at the time you converted
the references to use "get" calls (mentioned on the last
patch) because now I find myself ignoring the mismatches.
No big deal.
> Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Alex Elder <aelder@sgi.com>
> ---
> fs/xfs/xfs_alloc.c | 81 +++++++++++++++++++++++++--------------------
> fs/xfs/xfs_alloc_btree.c | 9 ++++-
> 2 files changed, 52 insertions(+), 38 deletions(-)
>
> diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
> index a1c65fc..3cb533c 100644
> --- a/fs/xfs/xfs_alloc.c
> +++ b/fs/xfs/xfs_alloc.c
> @@ -1662,11 +1662,13 @@ xfs_free_ag_extent(
> xfs_agf_t *agf;
> xfs_perag_t *pag; /* per allocation group data */
>
> + pag = xfs_perag_get(mp, agno);
> + pag->pagf_freeblks += len;
> + xfs_perag_put(pag);
> +
> agf = XFS_BUF_TO_AGF(agbp);
> - pag = &mp->m_perag[agno];
> be32_add_cpu(&agf->agf_freeblks, len);
> xfs_trans_agblocks_delta(tp, len);
> - pag->pagf_freeblks += len;
> XFS_WANT_CORRUPTED_GOTO(
> be32_to_cpu(agf->agf_freeblks) <=
> be32_to_cpu(agf->agf_length),
> @@ -1969,7 +1971,8 @@ xfs_alloc_get_freelist(
> xfs_trans_brelse(tp, agflbp);
> if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
> agf->agf_flfirst = 0;
> - pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
> +
> + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
> be32_add_cpu(&agf->agf_flcount, -1);
> xfs_trans_agflist_delta(tp, -1);
> pag->pagf_flcount--;
> @@ -2078,7 +2081,8 @@ xfs_alloc_put_freelist(
> be32_add_cpu(&agf->agf_fllast, 1);
> if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
> agf->agf_fllast = 0;
> - pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
> +
> + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
> be32_add_cpu(&agf->agf_flcount, 1);
> xfs_trans_agflist_delta(tp, 1);
> pag->pagf_flcount++;
> @@ -2089,6 +2093,7 @@ xfs_alloc_put_freelist(
> pag->pagf_btreeblks--;
> logflags |= XFS_AGF_BTREEBLKS;
> }
> + xfs_perag_put(pag);
>
> xfs_alloc_log_agf(tp, agbp, logflags);
>
> @@ -2152,7 +2157,6 @@ xfs_read_agf(
> xfs_trans_brelse(tp, *bpp);
> return XFS_ERROR(EFSCORRUPTED);
> }
> -
> XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
> return 0;
> }
> @@ -2184,7 +2188,7 @@ xfs_alloc_read_agf(
> ASSERT(!XFS_BUF_GETERROR(*bpp));
>
> agf = XFS_BUF_TO_AGF(*bpp);
> - pag = &mp->m_perag[agno];
> + pag = xfs_perag_get(mp, agno);
> if (!pag->pagf_init) {
> pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
> pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
> @@ -2211,6 +2215,7 @@ xfs_alloc_read_agf(
> be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
> }
> #endif
> + xfs_perag_put(pag);
> return 0;
> }
>
> @@ -2271,7 +2276,7 @@ xfs_alloc_vextent(
> */
> args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
> down_read(&mp->m_peraglock);
> - args->pag = &mp->m_perag[args->agno];
> + args->pag = xfs_perag_get(mp, args->agno);
> args->minleft = 0;
> error = xfs_alloc_fix_freelist(args, 0);
> args->minleft = minleft;
> @@ -2341,7 +2346,7 @@ xfs_alloc_vextent(
> */
> down_read(&mp->m_peraglock);
> for (;;) {
> - args->pag = &mp->m_perag[args->agno];
> + args->pag = xfs_perag_get(mp, args->agno);
> if (no_min) args->minleft = 0;
> error = xfs_alloc_fix_freelist(args, flags);
> args->minleft = minleft;
> @@ -2400,6 +2405,7 @@ xfs_alloc_vextent(
> }
> }
> }
> + xfs_perag_put(args->pag);
> }
> up_read(&mp->m_peraglock);
> if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
> @@ -2427,8 +2433,10 @@ xfs_alloc_vextent(
> args->len);
> #endif
> }
> + xfs_perag_put(args->pag);
> return 0;
> error0:
> + xfs_perag_put(args->pag);
> up_read(&mp->m_peraglock);
> return error;
> }
> @@ -2455,7 +2463,7 @@ xfs_free_extent(
> ASSERT(args.agno < args.mp->m_sb.sb_agcount);
> args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
> down_read(&args.mp->m_peraglock);
> - args.pag = &args.mp->m_perag[args.agno];
> + args.pag = xfs_perag_get(args.mp, args.agno);
> if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
> goto error0;
> #ifdef DEBUG
> @@ -2465,6 +2473,7 @@ xfs_free_extent(
> #endif
> error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
> error0:
> + xfs_perag_put(args.pag);
> up_read(&args.mp->m_peraglock);
> return error;
> }
> @@ -2486,15 +2495,15 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
> xfs_agblock_t bno,
> xfs_extlen_t len)
> {
> - xfs_mount_t *mp;
> xfs_perag_busy_t *bsy;
> + struct xfs_perag *pag;
> int n;
>
> - mp = tp->t_mountp;
> - spin_lock(&mp->m_perag[agno].pagb_lock);
> + pag = xfs_perag_get(tp->t_mountp, agno);
> + spin_lock(&pag->pagb_lock);
>
> /* search pagb_list for an open slot */
> - for (bsy = mp->m_perag[agno].pagb_list, n = 0;
> + for (bsy = pag->pagb_list, n = 0;
> n < XFS_PAGB_NUM_SLOTS;
> bsy++, n++) {
> if (bsy->busy_tp == NULL) {
> @@ -2502,11 +2511,11 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
> }
> }
>
> - trace_xfs_alloc_busy(mp, agno, bno, len, n);
> + trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n);
>
> if (n < XFS_PAGB_NUM_SLOTS) {
> - bsy = &mp->m_perag[agno].pagb_list[n];
> - mp->m_perag[agno].pagb_count++;
> + bsy = &pag->pagb_list[n];
> + pag->pagb_count++;
> bsy->busy_start = bno;
> bsy->busy_length = len;
> bsy->busy_tp = tp;
> @@ -2521,7 +2530,8 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
> xfs_trans_set_sync(tp);
> }
>
> - spin_unlock(&mp->m_perag[agno].pagb_lock);
> + spin_unlock(&pag->pagb_lock);
> + xfs_perag_put(pag);
> }
>
> void
> @@ -2529,24 +2539,23 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
> xfs_agnumber_t agno,
> int idx)
> {
> - xfs_mount_t *mp;
> + struct xfs_perag *pag;
> xfs_perag_busy_t *list;
>
> - mp = tp->t_mountp;
> -
> - spin_lock(&mp->m_perag[agno].pagb_lock);
> - list = mp->m_perag[agno].pagb_list;
> -
> ASSERT(idx < XFS_PAGB_NUM_SLOTS);
> + pag = xfs_perag_get(tp->t_mountp, agno);
> + spin_lock(&pag->pagb_lock);
> + list = pag->pagb_list;
>
> - trace_xfs_alloc_unbusy(mp, agno, idx, list[idx].busy_tp == tp);
> + trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp);
>
> if (list[idx].busy_tp == tp) {
> list[idx].busy_tp = NULL;
> - mp->m_perag[agno].pagb_count--;
> + pag->pagb_count--;
> }
>
> - spin_unlock(&mp->m_perag[agno].pagb_lock);
> + spin_unlock(&pag->pagb_lock);
> + xfs_perag_put(pag);
> }
>
>
> @@ -2560,21 +2569,20 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
> xfs_agblock_t bno,
> xfs_extlen_t len)
> {
> - xfs_mount_t *mp;
> + struct xfs_perag *pag;
> xfs_perag_busy_t *bsy;
> xfs_agblock_t uend, bend;
> xfs_lsn_t lsn;
> int cnt;
>
> - mp = tp->t_mountp;
> -
> - spin_lock(&mp->m_perag[agno].pagb_lock);
> - cnt = mp->m_perag[agno].pagb_count;
> + pag = xfs_perag_get(tp->t_mountp, agno);
> + spin_lock(&pag->pagb_lock);
> + cnt = pag->pagb_count;
>
> uend = bno + len - 1;
>
> /* search pagb_list for this slot, skipping open slots */
> - for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) {
> + for (bsy = pag->pagb_list; cnt; bsy++) {
>
> /*
> * (start1,length1) within (start2, length2)
> @@ -2589,7 +2597,7 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
> }
> }
>
> - trace_xfs_alloc_busysearch(mp, agno, bno, len, !!cnt);
> + trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, !!cnt);
>
> /*
> * If a block was found, force the log through the LSN of the
> @@ -2597,9 +2605,10 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
> */
> if (cnt) {
> lsn = bsy->busy_tp->t_commit_lsn;
> - spin_unlock(&mp->m_perag[agno].pagb_lock);
> - xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
> + spin_unlock(&pag->pagb_lock);
> + xfs_log_force(tp->t_mountp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
> } else {
> - spin_unlock(&mp->m_perag[agno].pagb_lock);
> + spin_unlock(&pag->pagb_lock);
> }
> + xfs_perag_put(pag);
> }
> diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
> index adbd914..b726e10 100644
> --- a/fs/xfs/xfs_alloc_btree.c
> +++ b/fs/xfs/xfs_alloc_btree.c
> @@ -61,12 +61,14 @@ xfs_allocbt_set_root(
> struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
> xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
> int btnum = cur->bc_btnum;
> + struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
>
> ASSERT(ptr->s != 0);
>
> agf->agf_roots[btnum] = ptr->s;
> be32_add_cpu(&agf->agf_levels[btnum], inc);
> - cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc;
> + pag->pagf_levels[btnum] += inc;
> + xfs_perag_put(pag);
>
> xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
> }
> @@ -150,6 +152,7 @@ xfs_allocbt_update_lastrec(
> {
> struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
> xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
> + struct xfs_perag *pag;
> __be32 len;
> int numrecs;
>
> @@ -193,7 +196,9 @@ xfs_allocbt_update_lastrec(
> }
>
> agf->agf_longest = len;
> - cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len);
> + pag = xfs_perag_get(cur->bc_mp, seqno);
> + pag->pagf_longest = be32_to_cpu(len);
> + xfs_perag_put(pag);
> xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
> }
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* RE: [PATCH 3/6] XFS: Convert filestreams code to use per-ag get/putroutines
2009-12-15 6:11 ` [PATCH 3/6] XFS: Convert filestreams code to use per-ag get/put routines Dave Chinner
@ 2009-12-22 15:08 ` Alex Elder
0 siblings, 0 replies; 23+ messages in thread
From: Alex Elder @ 2009-12-22 15:08 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
Dave Chinner wrote:
> Signed-off-by: Dave Chinner <david@fromorbit.com>
Looks good. I'll add a short one-line explanation at the
top of the patch for you.
Reviewed-by: Alex Elder <aelder@sgi.com>
> ---
> fs/xfs/xfs_filestream.c | 19 ++++++++++++-------
> fs/xfs/xfs_filestream.h | 27 ++++++++++++++++++++++++---
> 2 files changed, 36 insertions(+), 10 deletions(-)
>
> diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
> index a631e14..e61f2aa 100644
> --- a/fs/xfs/xfs_filestream.c
> +++ b/fs/xfs/xfs_filestream.c
. . .
> @@ -173,6 +174,7 @@ _xfs_filestream_pick_ag(
> /* Keep track of the AG with the most free blocks. */
> if (pag->pagf_freeblks > maxfree) {
> maxfree = pag->pagf_freeblks;
> + max_streams = atomic_read(&pag->pagf_fstrms);
> max_ag = ag;
> }
>
> @@ -195,6 +197,8 @@ _xfs_filestream_pick_ag(
>
> /* Break out, retaining the reference on the AG. */
> free = pag->pagf_freeblks;
> + streams = atomic_read(&pag->pagf_fstrms);
> + xfs_perag_put(pag);
> *agp = ag;
> break;
> }
> @@ -202,6 +206,7 @@ _xfs_filestream_pick_ag(
> /* Drop the reference on this AG, it's not usable. */
> xfs_filestream_put_ag(mp, ag);
> next_ag:
> + xfs_perag_put(pag);
> /* Move to the next AG, wrapping to AG 0 if necessary. */
> if (++ag >= mp->m_sb.sb_agcount)
> ag = 0;
> @@ -229,6 +234,7 @@ next_ag:
> if (max_ag != NULLAGNUMBER) {
> xfs_filestream_get_ag(mp, max_ag);
> TRACE_AG_PICK1(mp, max_ag, maxfree);
> + streams = max_streams;
> free = maxfree;
> *agp = max_ag;
> break;
> @@ -240,8 +246,7 @@ next_ag:
> return 0;
> }
>
> - TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp),
> - free, nscan, flags);
> + TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags);
>
> return 0;
> }
> diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
> index 4aba67c..58378b2 100644
> --- a/fs/xfs/xfs_filestream.h
> +++ b/fs/xfs/xfs_filestream.h
> @@ -79,12 +79,21 @@ extern ktrace_t *xfs_filestreams_trace_buf;
> * the cache that reference per-ag array elements that have since been
> * reallocated.
> */
> +/*
> + * xfs_filestream_peek_ag is only used in tracing code
> + */
> static inline int
> xfs_filestream_peek_ag(
> xfs_mount_t *mp,
> xfs_agnumber_t agno)
> {
> - return atomic_read(&mp->m_perag[agno].pagf_fstrms);
> + struct xfs_perag *pag;
> + int ret;
> +
> + pag = xfs_perag_get(mp, agno);
> + ret = atomic_read(&pag->pagf_fstrms);
> + xfs_perag_put(pag);
> + return ret;
> }
>
> static inline int
> @@ -92,7 +101,13 @@ xfs_filestream_get_ag(
> xfs_mount_t *mp,
> xfs_agnumber_t agno)
> {
> - return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
> + struct xfs_perag *pag;
> + int ret;
> +
> + pag = xfs_perag_get(mp, agno);
> + ret = atomic_inc_return(&pag->pagf_fstrms);
> + xfs_perag_put(pag);
> + return ret;
> }
>
> static inline int
> @@ -100,7 +115,13 @@ xfs_filestream_put_ag(
> xfs_mount_t *mp,
> xfs_agnumber_t agno)
> {
> - return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms);
> + struct xfs_perag *pag;
> + int ret;
> +
> + pag = xfs_perag_get(mp, agno);
> + ret = atomic_dec_return(&pag->pagf_fstrms);
> + xfs_perag_put(pag);
> + return ret;
> }
>
> /* allocation selection flags */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/6] XFS: Replace per-ag array with a radix tree
2009-12-15 6:11 ` [PATCH 5/6] XFS: Replace per-ag array with a radix tree Dave Chinner
@ 2009-12-23 16:02 ` Christoph Hellwig
2009-12-23 22:08 ` Alex Elder
1 sibling, 0 replies; 23+ messages in thread
From: Christoph Hellwig @ 2009-12-23 16:02 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
Btw, after this we should switch pagb_list to a static array instead
of the current individual allocation. And maybe revisit the choise
of the array size.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* RE: [PATCH 4/6] XFS: convert remaining direct references to m_perag
2009-12-15 6:11 ` [PATCH 4/6] XFS: convert remaining direct references to m_perag Dave Chinner
@ 2009-12-23 20:49 ` Alex Elder
0 siblings, 0 replies; 23+ messages in thread
From: Alex Elder @ 2009-12-23 20:49 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
Dave Chinner wrote:
> Convert the remaining direct lookups of the per ag structures
> to use get/put accesses. Ensure that the loops across AGs and
> prior users of the interface balance gets and puts correctly.
Generally looks good, but I have a question below.
Also, it looks to me like you missed adding a matching
xfs_perag_put() call in xfs_alloc_get_freelist(). I
have otherwise satisfied myself that all the get/put
calls are all matched up. Furthermore, the only real
references to the m_perag field in the mount structure
is via xfs_perag_get().
-Alex
> Signed-off-by: Dave Chinner <david@fromorbit.com>
> ---
> fs/xfs/xfs_bmap.c | 8 +++++++-
> fs/xfs/xfs_ialloc.c | 35 +++++++++++++++++++++++++----------
> fs/xfs/xfs_inode.c | 5 ++++-
> fs/xfs/xfs_mount.c | 9 ++++++---
> 4 files changed, 42 insertions(+), 15 deletions(-)
>
> diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
> index 98251cd..a9b95d9 100644
> --- a/fs/xfs/xfs_bmap.c
> +++ b/fs/xfs/xfs_bmap.c
. . .
> @@ -2667,6 +2668,7 @@ xfs_bmap_btalloc(
> break;
>
> error = xfs_filestream_new_ag(ap, &ag);
> + xfs_perag_put(pag);
Is there a reason you do the put operation *after* the call to
xfs_filestream_new_ag()? I only ask because I had a similar
question (below) where xfs_ialloc_read_agi() was called, without
passing pag, but before calling xfs_perag_put() and I wondered
whether there was an implied requirement there. If there is,
I'd like to see that made more explicit. If not, I prefer to
see the put call done as early as possible.
> if (error) {
> up_read(&mp->m_peraglock);
> return error;
. . .
> diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
> index cb907ba..884ee13 100644
> --- a/fs/xfs/xfs_ialloc.c
> +++ b/fs/xfs/xfs_ialloc.c
. . .
> @@ -772,11 +778,14 @@ nextag:
> return noroom ? ENOSPC : 0;
> }
> down_read(&mp->m_peraglock);
> - if (mp->m_perag[tagno].pagi_inodeok == 0) {
> + pag = xfs_perag_get(mp, tagno);
> + if (pag->pagi_inodeok == 0) {
> + xfs_perag_put(pag);
> up_read(&mp->m_peraglock);
> goto nextag;
> }
> error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
> + xfs_perag_put(pag);
Here is the spot I wondered about--whether the pag needed to
be held across the call to xfs_ialloc_read_agi(). If so,
it isn't obvious from looking directly at this spot in the
code so it should maybe be spelled out more clearly in a
comment.
> up_read(&mp->m_peraglock);
> if (error)
> goto nextag;
. . .
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* RE: [PATCH 5/6] XFS: Replace per-ag array with a radix tree
2009-12-15 6:11 ` [PATCH 5/6] XFS: Replace per-ag array with a radix tree Dave Chinner
2009-12-23 16:02 ` Christoph Hellwig
@ 2009-12-23 22:08 ` Alex Elder
2009-12-26 4:17 ` Dave Chinner
1 sibling, 1 reply; 23+ messages in thread
From: Alex Elder @ 2009-12-23 22:08 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
Dave Chinner wrote:
> The use of an array for the per-ag structures requires reallocation of the
> array when growing the filesystem. This requires locking access to the array to
> avoid use after free situations, and the locking is difficult to get right. To
> avoid needing to reallocate an array, change the per-ag structures to an
> allocated object per ag and index them using a tree structure.
>
> The AGs are always densely indexed (hence the use of an array), but the number
> supported is 2^32 and lookups tend to be random and hence indexing needs to
> scale. A simple choice is a radix tree - it works well with this sort of index.
> This change also removes another large contiguous allocation from the
> mount/growfs path in XFS.
>
> The growing process now needs to change to only initialise the new AGs required
> for the extra space, and as such only needs to exclusively lock the tree for
> inserts. The rest of the code only needs to lock the tree while doing lookups,
> and hence this will remove all the deadlocks that currently occur on the
> m_perag_lock as it is now an innermost lock. The lock is also changed to a
> spinlock from a read/write lock as the hold time is now extremely short.
>
> To complete the picture, the per-ag structures will need to be reference
> counted to ensure that we don't free/modify them while they are still in use.
> This will be done in subsequent patch.
In general, this looks good, and I especially like that you have added
some new comments to help explain what's going on in a few places.
I have a few concerns though, and I have some questions/suggestions below.
> Signed-off-by: Dave Chinner <david@fromorbit.com>
. . .
> diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
> index e61f2aa..914d00d 100644
> --- a/fs/xfs/xfs_filestream.c
> +++ b/fs/xfs/xfs_filestream.c
. . .
> @@ -456,10 +455,10 @@ xfs_filestream_unmount(
> }
>
> /*
> - * If the mount point's m_perag array is going to be reallocated, all
> + * If the mount point's m_perag tree is going to be modified, all
> * outstanding cache entries must be flushed to avoid accessing reference count
> * addresses that have been freed. The call to xfs_filestream_flush() must be
> - * made inside the block that holds the m_peraglock in write mode to do the
> + * made inside the block that holds the m_perag_lock in write mode to do the
Your change actually gets rid of the need to do this flush in the grow case
(which is great), so this comment is no longer pertinent and should be killed
off.
> * reallocation.
> */
> void
. . .
> diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
> index a13919a..37a6f62 100644
> --- a/fs/xfs/xfs_fsops.c
> +++ b/fs/xfs/xfs_fsops.c
> @@ -167,27 +167,14 @@ xfs_growfs_data_private(
> }
> new = nb - mp->m_sb.sb_dblocks;
> oagcount = mp->m_sb.sb_agcount;
> - if (nagcount > oagcount) {
> - void *new_perag, *old_perag;
> -
> - xfs_filestream_flush(mp);
> -
> - new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
> - KM_MAYFAIL);
> - if (!new_perag)
> - return XFS_ERROR(ENOMEM);
> -
> - down_write(&mp->m_peraglock);
> - memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
> - old_perag = mp->m_perag;
> - mp->m_perag = new_perag;
> -
> - mp->m_flags |= XFS_MOUNT_32BITINODES;
I'm not sure why this flag was getting set, but your change
does not implement this, at least not unconditionally. (It
is getting set based on mp->m_flags in xfs_initialize_perag()).
Is that OK?
> - nagimax = xfs_initialize_perag(mp, nagcount);
> - up_write(&mp->m_peraglock);
>
> - kmem_free(old_perag);
> + /* allocate the new per-ag structures */
> + if (nagcount > oagcount) {
Maybe this occurs in another way, but in order to allow for the
incremental update I think something needs to be done to prevent
concurrent grow requests, possibly here. (See more about this below.)
> + error = xfs_initialize_perag(mp, nagcount, &nagimax);
> + if (error)
> + return error;
> }
> +
> tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
> tp->t_flags |= XFS_TRANS_RESERVE;
> if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
. . .
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 4739c2c..73d61d4 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -209,13 +209,16 @@ STATIC void
> xfs_free_perag(
> xfs_mount_t *mp)
> {
> - if (mp->m_perag) {
> - int agno;
> + xfs_agnumber_t agno;
> + struct xfs_perag *pag;
>
> - for (agno = 0; agno < mp->m_maxagi; agno++)
> - if (mp->m_perag[agno].pagb_list)
> - kmem_free(mp->m_perag[agno].pagb_list);
> - kmem_free(mp->m_perag);
> + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
Why do you switch to using m_sb.sb_agcount rather than mp->m_maxagi?
Can we get rid of the latter at some point, or is there a window
of time where it is possible and meaningful for them to be
different? (The change is fine--I guess this is more like a
possibly unrelated question...)
> + spin_lock(&mp->m_perag_lock);
> + pag = radix_tree_delete(&mp->m_perag_tree, agno);
> + spin_unlock(&mp->m_perag_lock);
> + ASSERT(pag);
> + kmem_free(pag->pagb_list);
> + kmem_free(pag);
> }
> }
>
> @@ -389,10 +392,11 @@ xfs_initialize_perag_icache(
> }
> }
>
> -xfs_agnumber_t
> +int
> xfs_initialize_perag(
> xfs_mount_t *mp,
> - xfs_agnumber_t agcount)
> + xfs_agnumber_t agcount,
> + xfs_agnumber_t *maxagi)
Now that you're returning an errno... If this function is
successful, it simply returns agcount in this location. If
it fails, it returns nothing meaningful. I'd say there is
no real need to have maxagi in the function definition any
more (though it may just be a convenience for the caller).
> {
> xfs_agnumber_t index, max_metadata;
> xfs_perag_t *pag;
> @@ -405,6 +409,33 @@ xfs_initialize_perag(
> agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
> ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
>
> + /*
> + * Walk the current per-ag tree so we don't try to initialise AGs
> + * that already exist (growfs case). Allocate and insert all the
> + * AGs we don't find ready for initialisation.
> + */
> + for (index = 0; index < agcount; index++) {
> + pag = xfs_perag_get(mp, index);
> + if (pag) {
> + xfs_perag_put(pag);
> + continue;
> + }
> + pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
> + if (!pag)
> + return -ENOMEM;
Suppose we're adding two AG's to the file system. What if
the first one gets its per-ag structure successfully
inserted, and the second one fails. Then the call to
xfs_initalize_perag() will fail but the mount struct's
perag information will not be consistent. Neither
m_sb.sb_agcount nor mp->m_maxagi will reflect the presence
of this allocated perag structure, yet it could interfere
with subsequent attempts to grow the file system. If
the index value were returned in *maxagi at this point
then maybe the caller could try to recover or something,
but I think logic to handle this case is better
incorporated/hidden in or under this function.
> + if (radix_tree_preload(GFP_NOFS))
> + return -ENOMEM;
> + spin_lock(&mp->m_perag_lock);
> + if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
> + BUG();
There's a window between the xfs_perag_get() above and here in
which, if concurrent grows were allowed, the insert could fail.
This is why I said above that there needs to be protection from
concurrent grows. Does that already get taken care of somewhere?
> + spin_unlock(&mp->m_perag_lock);
> + kmem_free(pag);
> + return -EEXIST;
> + }
> + spin_unlock(&mp->m_perag_lock);
> + radix_tree_preload_end();
> + }
> +
> /* Clear the mount flag if no inode can overflow 32 bits
> * on this filesystem, or if specifically requested..
> */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* RE: [PATCH 6/6] XFS: Reference count per-ag structures
2009-12-15 6:11 ` [PATCH 6/6] XFS: Reference count per-ag structures Dave Chinner
@ 2009-12-23 22:12 ` Alex Elder
0 siblings, 0 replies; 23+ messages in thread
From: Alex Elder @ 2009-12-23 22:12 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
Dave Chinner wrote:
> Reference count the per-ag structures to ensure that we keep get/put
> pairs balanced. Assert that the reference counts are zero at unmount
> time to catch leaks. In future, reference counts will enable us to
> safely remove perag structures by allowing us to detect when they
> are no longer in use.
Looks good. Two comments below; I'll fix up if you want.
> Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Alex Elder <aelder@sgi.com>
> ---
. . .
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 73d61d4..1d00f7f 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -215,6 +215,7 @@ xfs_free_perag(
> for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> spin_lock(&mp->m_perag_lock);
> pag = radix_tree_delete(&mp->m_perag_tree, agno);
> + ASSERT(atomic_read(&pag->pag_ref) == 0);
> spin_unlock(&mp->m_perag_lock);
> ASSERT(pag);
This ASSERT(pag) should be moved up before ASSERT(atomic_read(&pag->pag_ref == 0)
> kmem_free(pag->pagb_list);
. . .
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index cfa7a5d..16b2212 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
. . .
> @@ -393,6 +393,12 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
>
> spin_lock(&mp->m_perag_lock);
> pag = radix_tree_lookup(&mp->m_perag_tree, agno);
> + if (pag) {
> + ASSERT(atomic_read(&pag->pag_ref) >= 0);
> + /* catch leaks in the positive direction during testing */
> + ASSERT(atomic_read(&pag->pag_ref) < 1000);
This is good; what makes 1000 a reasonable number (why not
100 or 10000?)
> + atomic_inc(&pag->pag_ref);
> + }
> spin_unlock(&mp->m_perag_lock);
> return pag;
> }
. . .
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/6] XFS: Replace per-ag array with a radix tree
2009-12-23 22:08 ` Alex Elder
@ 2009-12-26 4:17 ` Dave Chinner
0 siblings, 0 replies; 23+ messages in thread
From: Dave Chinner @ 2009-12-26 4:17 UTC (permalink / raw)
To: Alex Elder; +Cc: xfs
On Wed, Dec 23, 2009 at 04:08:54PM -0600, Alex Elder wrote:
> Dave Chinner wrote:
> > --- a/fs/xfs/xfs_filestream.c
> > +++ b/fs/xfs/xfs_filestream.c
> . . .
> > @@ -456,10 +455,10 @@ xfs_filestream_unmount(
> > }
> >
> > /*
> > - * If the mount point's m_perag array is going to be reallocated, all
> > + * If the mount point's m_perag tree is going to be modified, all
> > * outstanding cache entries must be flushed to avoid accessing reference count
> > * addresses that have been freed. The call to xfs_filestream_flush() must be
> > - * made inside the block that holds the m_peraglock in write mode to do the
> > + * made inside the block that holds the m_perag_lock in write mode to do the
>
> Your change actually gets rid of the need to do this flush in the grow case
> (which is great), so this comment is no longer pertinent and should be killed
> off.
I am planning to kill off the unneeded filestreams stuff in another
cleanup patch - I just haven't got to it yet.
>
> > * reallocation.
> > */
> > void
> . . .
> > diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
> > index a13919a..37a6f62 100644
> > --- a/fs/xfs/xfs_fsops.c
> > +++ b/fs/xfs/xfs_fsops.c
> > @@ -167,27 +167,14 @@ xfs_growfs_data_private(
> > }
> > new = nb - mp->m_sb.sb_dblocks;
> > oagcount = mp->m_sb.sb_agcount;
> > - if (nagcount > oagcount) {
> > - void *new_perag, *old_perag;
> > -
> > - xfs_filestream_flush(mp);
> > -
> > - new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
> > - KM_MAYFAIL);
> > - if (!new_perag)
> > - return XFS_ERROR(ENOMEM);
> > -
> > - down_write(&mp->m_peraglock);
> > - memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
> > - old_perag = mp->m_perag;
> > - mp->m_perag = new_perag;
> > -
> > - mp->m_flags |= XFS_MOUNT_32BITINODES;
>
> I'm not sure why this flag was getting set, but your change
> does not implement this, at least not unconditionally. (It
> is getting set based on mp->m_flags in xfs_initialize_perag()).
> Is that OK?
I think so - xfs_initialize_perag() got changed a while back not to
be dependent on this flag already being set to do the right thing,
but it wasn't removed from the growfs code. Hence I killed it here.
> > - nagimax = xfs_initialize_perag(mp, nagcount);
> > - up_write(&mp->m_peraglock);
> >
> > - kmem_free(old_perag);
> > + /* allocate the new per-ag structures */
> > + if (nagcount > oagcount) {
>
> Maybe this occurs in another way, but in order to allow for the
> incremental update I think something needs to be done to prevent
> concurrent grow requests, possibly here. (See more about this below.)
Agreed, and We've already got the mp->m_growlock to do this (see
xfs_growfs_data()).
> > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> > index 4739c2c..73d61d4 100644
> > --- a/fs/xfs/xfs_mount.c
> > +++ b/fs/xfs/xfs_mount.c
> > @@ -209,13 +209,16 @@ STATIC void
> > xfs_free_perag(
> > xfs_mount_t *mp)
> > {
> > - if (mp->m_perag) {
> > - int agno;
> > + xfs_agnumber_t agno;
> > + struct xfs_perag *pag;
> >
> > - for (agno = 0; agno < mp->m_maxagi; agno++)
> > - if (mp->m_perag[agno].pagb_list)
> > - kmem_free(mp->m_perag[agno].pagb_list);
> > - kmem_free(mp->m_perag);
> > + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
>
> Why do you switch to using m_sb.sb_agcount rather than mp->m_maxagi?
I did that because there is a window between sb_agcount() being
updated and m_maxagi being updated in the grow case. If there is
a failure between the two
> Can we get rid of the latter at some point, or is there a window
> of time where it is possible and meaningful for them to be
> different?
During the grow they can be different. e.g. if the growfs
transaction commit fails we haven't yet updated m_maxagi, but
we have already updated and initialised sb_agcount...
> > @@ -389,10 +392,11 @@ xfs_initialize_perag_icache(
> > }
> > }
> >
> > -xfs_agnumber_t
> > +int
> > xfs_initialize_perag(
> > xfs_mount_t *mp,
> > - xfs_agnumber_t agcount)
> > + xfs_agnumber_t agcount,
> > + xfs_agnumber_t *maxagi)
>
> Now that you're returning an errno... If this function is
> successful, it simply returns agcount in this location. If
> it fails, it returns nothing meaningful. I'd say there is
> no real need to have maxagi in the function definition any
> more (though it may just be a convenience for the caller).
*maxagi is needed to be returned because the m_maxagi is not
allowed to be updated until the growfs transaction commits
so that inode allocation does not use AGs being added during
the grow. Hence we have to be able to return both an errno and
the new maxagi from the function....
> > xfs_agnumber_t index, max_metadata;
> > xfs_perag_t *pag;
> > @@ -405,6 +409,33 @@ xfs_initialize_perag(
> > agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
> > ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
> >
> > + /*
> > + * Walk the current per-ag tree so we don't try to initialise AGs
> > + * that already exist (growfs case). Allocate and insert all the
> > + * AGs we don't find ready for initialisation.
> > + */
> > + for (index = 0; index < agcount; index++) {
> > + pag = xfs_perag_get(mp, index);
> > + if (pag) {
> > + xfs_perag_put(pag);
> > + continue;
> > + }
> > + pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
> > + if (!pag)
> > + return -ENOMEM;
>
> Suppose we're adding two AG's to the file system. What if
> the first one gets its per-ag structure successfully
> inserted, and the second one fails. Then the call to
> xfs_initalize_perag() will fail but the mount struct's
> perag information will not be consistent.
Yup, and an ENOMEM here during a grow will cause a filesystem
shutdown, so it doesn't really matter that much.
> Neither
> m_sb.sb_agcount nor mp->m_maxagi will reflect the presence
> of this allocated perag structure, yet it could interfere
> with subsequent attempts to grow the file system.
This won't happen because the filesystem will be shut down
and a new grow attempt cannot be done without unmount/mount
cycle.
> If
> the index value were returned in *maxagi at this point
> then maybe the caller could try to recover or something,
> but I think logic to handle this case is better
> incorporated/hidden in or under this function.
Yeah, I think it needs to unwind the allocations that it has
already done. I'll send another patch to do that rather than
perturb this one again.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
* RE: [PATCH 7/6] XFS: Add trace points for per-ag refcount debugging.
2009-12-20 4:28 ` [PATCH 7/6] XFS: Add trace points for per-ag refcount debugging Dave Chinner
@ 2010-01-05 18:08 ` Alex Elder
0 siblings, 0 replies; 23+ messages in thread
From: Alex Elder @ 2010-01-05 18:08 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
Dave Chinner wrote:
> Uninline xfs_perag_{get,put} so that tracepoints can be inserted
> into them to speed debugging of reference count problems.
Looks good.
> Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Alex Elder <aelder@sgi.com>
> ---
> fs/xfs/linux-2.6/xfs_trace.h | 27 +++++++++++++++++++++++++++
> fs/xfs/xfs_ag.h | 2 ++
> fs/xfs/xfs_mount.c | 34 ++++++++++++++++++++++++++++++++++
> fs/xfs/xfs_mount.h | 25 ++-----------------------
> 4 files changed, 65 insertions(+), 23 deletions(-)
>
> diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
> index d4ded59..5ec1475 100644
> --- a/fs/xfs/linux-2.6/xfs_trace.h
> +++ b/fs/xfs/linux-2.6/xfs_trace.h
> @@ -33,6 +33,33 @@ struct xfs_dquot;
> struct xlog_ticket;
> struct log;
>
> +#define DEFINE_PERAG_REF_EVENT(name) \
> +TRACE_EVENT(name, \
> + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
> + unsigned long caller_ip), \
> + TP_ARGS(mp, agno, refcount, caller_ip), \
> + TP_STRUCT__entry( \
> + __field(dev_t, dev) \
> + __field(xfs_agnumber_t, agno) \
> + __field(int, refcount) \
> + __field(unsigned long, caller_ip) \
> + ), \
> + TP_fast_assign( \
> + __entry->dev = mp->m_super->s_dev; \
> + __entry->agno = agno; \
> + __entry->refcount = refcount; \
> + __entry->caller_ip = caller_ip; \
> + ), \
> + TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
> + MAJOR(__entry->dev), MINOR(__entry->dev), \
> + __entry->agno, \
> + __entry->refcount, \
> + (char *)__entry->caller_ip) \
> +);
> +
> +DEFINE_PERAG_REF_EVENT(xfs_perag_get)
> +DEFINE_PERAG_REF_EVENT(xfs_perag_put)
> +
> #define DEFINE_ATTR_LIST_EVENT(name) \
> TRACE_EVENT(name, \
> TP_PROTO(struct xfs_attr_list_context *ctx), \
> diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
> index 18ae43f..963bc27 100644
> --- a/fs/xfs/xfs_ag.h
> +++ b/fs/xfs/xfs_ag.h
> @@ -197,6 +197,8 @@ typedef struct xfs_perag_busy {
> #endif
>
> typedef struct xfs_perag {
> + struct xfs_mount *pag_mount; /* owner filesystem */
> + xfs_agnumber_t pag_agno; /* AG this structure belongs to */
> atomic_t pag_ref; /* perag reference count */
> char pagf_init; /* this agf's entry is initialized */
> char pagi_init; /* this agi's entry is initialized */
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 1d00f7f..223d9c3 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -201,6 +201,38 @@ xfs_uuid_unmount(
>
>
> /*
> + * Reference counting access wrappers to the perag structures.
> + */
> +struct xfs_perag *
> +xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
> +{
> + struct xfs_perag *pag;
> + int ref = 0;
> +
> + spin_lock(&mp->m_perag_lock);
> + pag = radix_tree_lookup(&mp->m_perag_tree, agno);
> + if (pag) {
> + ASSERT(atomic_read(&pag->pag_ref) >= 0);
> + /* catch leaks in the positive direction during testing */
> + ASSERT(atomic_read(&pag->pag_ref) < 1000);
> + ref = atomic_inc_return(&pag->pag_ref);
> + }
> + spin_unlock(&mp->m_perag_lock);
> + trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
> + return pag;
> +}
> +
> +void
> +xfs_perag_put(struct xfs_perag *pag)
> +{
> + int ref;
> +
> + ASSERT(atomic_read(&pag->pag_ref) > 0);
> + ref = atomic_dec_return(&pag->pag_ref);
> + trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
> +}
> +
> +/*
> * Free up the resources associated with a mount structure. Assume that
> * the structure was initially zeroed, so we can tell which fields got
> * initialized.
> @@ -433,6 +465,8 @@ xfs_initialize_perag(
> kmem_free(pag);
> return -EEXIST;
> }
> + pag->pag_agno = index;
> + pag->pag_mount = mp;
> spin_unlock(&mp->m_perag_lock);
> radix_tree_preload_end();
> }
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 16b2212..e62fd1c 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -386,29 +386,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
> /*
> * perag get/put wrappers for ref counting
> */
> -static inline struct xfs_perag *
> -xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
> -{
> - struct xfs_perag *pag;
> -
> - spin_lock(&mp->m_perag_lock);
> - pag = radix_tree_lookup(&mp->m_perag_tree, agno);
> - if (pag) {
> - ASSERT(atomic_read(&pag->pag_ref) >= 0);
> - /* catch leaks in the positive direction during testing */
> - ASSERT(atomic_read(&pag->pag_ref) < 1000);
> - atomic_inc(&pag->pag_ref);
> - }
> - spin_unlock(&mp->m_perag_lock);
> - return pag;
> -}
> -
> -static inline void
> -xfs_perag_put(struct xfs_perag *pag)
> -{
> - ASSERT(atomic_read(&pag->pag_ref) > 0);
> - atomic_dec(&pag->pag_ref);
> -}
> +struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
> +void xfs_perag_put(struct xfs_perag *pag);
>
> /*
> * Per-cpu superblock locking functions
>
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 23+ messages in thread
end of thread, other threads:[~2010-01-05 18:07 UTC | newest]
Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-15 6:11 [PATCH 0/6] XFS: Fix growfs deadlocks and per-AG use after free V3 Dave Chinner
2009-12-15 6:11 ` [PATCH 1/6] XFS: rename xfs_get_perag Dave Chinner
2009-12-21 22:21 ` Alex Elder
2009-12-21 22:40 ` Dave Chinner
2009-12-15 6:11 ` [PATCH 2/6] XFS: Don't directly reference m_perag in allocation code Dave Chinner
2009-12-20 4:24 ` Dave Chinner
2009-12-22 14:18 ` Alex Elder
2009-12-15 6:11 ` [PATCH 3/6] XFS: Convert filestreams code to use per-ag get/put routines Dave Chinner
2009-12-22 15:08 ` [PATCH 3/6] XFS: Convert filestreams code to use per-ag get/putroutines Alex Elder
2009-12-15 6:11 ` [PATCH 4/6] XFS: convert remaining direct references to m_perag Dave Chinner
2009-12-23 20:49 ` Alex Elder
2009-12-15 6:11 ` [PATCH 5/6] XFS: Replace per-ag array with a radix tree Dave Chinner
2009-12-23 16:02 ` Christoph Hellwig
2009-12-23 22:08 ` Alex Elder
2009-12-26 4:17 ` Dave Chinner
2009-12-15 6:11 ` [PATCH 6/6] XFS: Reference count per-ag structures Dave Chinner
2009-12-23 22:12 ` Alex Elder
2009-12-20 4:28 ` [PATCH 7/6] XFS: Add trace points for per-ag refcount debugging Dave Chinner
2010-01-05 18:08 ` Alex Elder
-- strict thread matches above, loose matches on Subject: below --
2009-12-14 23:11 [PATCH 0/6] XFS: Fix growfs deadlocks and per-AG use after free V2 Dave Chinner
2009-12-14 23:11 ` [PATCH 6/6] XFS: Reference count per-ag structures Dave Chinner
2009-12-02 6:11 [PATCH 0/6] [XFS] Fix growfs deadlocks and per-AG use after free Dave Chinner
2009-12-02 6:11 ` [PATCH 6/6] [XFS] Reference count per-ag structures Dave Chinner
2009-12-10 23:48 ` Christoph Hellwig
2009-12-11 0:50 ` Dave Chinner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox