* [PATCH 3/3] free inodes using destroy_inode
@ 2008-10-20 22:20 Christoph Hellwig
2008-10-21 3:07 ` Dave Chinner
0 siblings, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2008-10-20 22:20 UTC (permalink / raw)
To: xfs
To make sure we free the security data inodes need to be freed using
the proper VFS helper (which we also need to export for this). To make
sure we don't corrupt the radix tree we need to add another special
case to xfs_reclaim for inodes that haven't been fully initialized yet.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: xfs-2.6/fs/inode.c
===================================================================
--- xfs-2.6.orig/fs/inode.c 2008-10-20 23:49:27.000000000 +0200
+++ xfs-2.6/fs/inode.c 2008-10-20 23:54:08.000000000 +0200
@@ -212,6 +212,7 @@ void destroy_inode(struct inode *inode)
else
kmem_cache_free(inode_cachep, (inode));
}
+EXPORT_SYMBOL(destroy_inode);
/*
Index: xfs-2.6/fs/xfs/xfs_iget.c
===================================================================
--- xfs-2.6.orig/fs/xfs/xfs_iget.c 2008-10-20 23:49:27.000000000 +0200
+++ xfs-2.6/fs/xfs/xfs_iget.c 2008-10-20 23:54:08.000000000 +0200
@@ -197,7 +197,7 @@ out_unlock:
write_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
out_destroy:
- xfs_idestroy(ip);
+ xfs_destroy_inode(ip);
return error;
}
Index: xfs-2.6/fs/xfs/xfs_inode.c
===================================================================
--- xfs-2.6.orig/fs/xfs/xfs_inode.c 2008-10-20 23:54:05.000000000 +0200
+++ xfs-2.6/fs/xfs/xfs_inode.c 2008-10-20 23:54:08.000000000 +0200
@@ -872,10 +872,8 @@ xfs_iread(
imap.im_blkno = bno;
error = xfs_imap(mp, tp, ip->i_ino, &imap,
XFS_IMAP_LOOKUP | imap_flags);
- if (error) {
- xfs_idestroy(ip);
- return error;
- }
+ if (error)
+ goto out_destroy_inode;
/*
* Fill in the fields in the inode that will be used to
@@ -887,10 +885,8 @@ xfs_iread(
ASSERT(bno == 0 || bno == imap.im_blkno);
error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
- if (error) {
- xfs_idestroy(ip);
- return error;
- }
+ if (error)
+ goto out_destroy_inode;
dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
@@ -899,8 +895,6 @@ xfs_iread(
* (nfs or dmi) has a stale handle.
*/
if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) {
- xfs_idestroy(ip);
- xfs_trans_brelse(tp, bp);
#ifdef DEBUG
xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
"dip->di_core.di_magic (0x%x) != "
@@ -908,7 +902,8 @@ xfs_iread(
be16_to_cpu(dip->di_core.di_magic),
XFS_DINODE_MAGIC);
#endif /* DEBUG */
- return XFS_ERROR(EINVAL);
+ error = XFS_ERROR(EINVAL);
+ goto out_brelse;
}
/*
@@ -922,14 +917,12 @@ xfs_iread(
xfs_dinode_from_disk(&ip->i_d, &dip->di_core);
error = xfs_iformat(ip, dip);
if (error) {
- xfs_idestroy(ip);
- xfs_trans_brelse(tp, bp);
#ifdef DEBUG
xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
"xfs_iformat() returned error %d",
error);
#endif /* DEBUG */
- return error;
+ goto out_brelse;
}
} else {
ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic);
@@ -995,6 +988,12 @@ xfs_iread(
xfs_trans_brelse(tp, bp);
*ipp = ip;
return 0;
+
+ out_brelse:
+ xfs_trans_brelse(tp, bp);
+ out_destroy_inode:
+ xfs_destroy_inode(ip);
+ return error;
}
/*
Index: xfs-2.6/fs/xfs/xfs_inode.h
===================================================================
--- xfs-2.6.orig/fs/xfs/xfs_inode.h 2008-10-20 23:54:05.000000000 +0200
+++ xfs-2.6/fs/xfs/xfs_inode.h 2008-10-20 23:54:08.000000000 +0200
@@ -309,6 +309,12 @@ static inline struct inode *VFS_I(struct
return &ip->i_vnode;
}
+static inline void xfs_destroy_inode(struct xfs_inode *ip)
+{
+ make_bad_inode(VFS_I(ip));
+ return destroy_inode(VFS_I(ip));
+}
+
/*
* i_flags helper functions
*/
Index: xfs-2.6/fs/xfs/xfs_vnodeops.c
===================================================================
--- xfs-2.6.orig/fs/xfs/xfs_vnodeops.c 2008-10-20 23:49:27.000000000 +0200
+++ xfs-2.6/fs/xfs/xfs_vnodeops.c 2008-10-20 23:55:31.000000000 +0200
@@ -2798,13 +2798,19 @@ int
xfs_reclaim(
xfs_inode_t *ip)
{
+ struct inode *inode = VFS_I(ip);
xfs_itrace_entry(ip);
- ASSERT(!VN_MAPPED(VFS_I(ip)));
+ ASSERT(!VN_MAPPED(inode));
+
+ if (unlikely(inode->i_state & I_NEW)) {
+ xfs_idestroy(ip);
+ return 0;
+ }
/* bad inode, get out here ASAP */
- if (VN_BAD(VFS_I(ip))) {
+ if (unlikely(is_bad_inode(inode))) {
xfs_ireclaim(ip);
return 0;
}
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 3/3] free inodes using destroy_inode
2008-10-20 22:20 [PATCH 3/3] free inodes using destroy_inode Christoph Hellwig
@ 2008-10-21 3:07 ` Dave Chinner
2008-10-21 9:07 ` Christoph Hellwig
0 siblings, 1 reply; 5+ messages in thread
From: Dave Chinner @ 2008-10-21 3:07 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Tue, Oct 21, 2008 at 12:20:44AM +0200, Christoph Hellwig wrote:
> To make sure we free the security data inodes need to be freed using
> the proper VFS helper (which we also need to export for this). To make
> sure we don't corrupt the radix tree we need to add another special
> case to xfs_reclaim for inodes that haven't been fully initialized yet.
Yes, this should fix the problems destroying inodes in the
not-quite-fully-instantiated state. Couple of things, though.
> Index: xfs-2.6/fs/xfs/xfs_inode.c
> ===================================================================
> --- xfs-2.6.orig/fs/xfs/xfs_inode.c 2008-10-20 23:54:05.000000000 +0200
> +++ xfs-2.6/fs/xfs/xfs_inode.c 2008-10-20 23:54:08.000000000 +0200
> @@ -872,10 +872,8 @@ xfs_iread(
> imap.im_blkno = bno;
> error = xfs_imap(mp, tp, ip->i_ino, &imap,
> XFS_IMAP_LOOKUP | imap_flags);
> - if (error) {
> - xfs_idestroy(ip);
> - return error;
> - }
> + if (error)
> + goto out_destroy_inode;
^
Extra whitespace.
> @@ -887,10 +885,8 @@ xfs_iread(
> ASSERT(bno == 0 || bno == imap.im_blkno);
>
> error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
> - if (error) {
> - xfs_idestroy(ip);
> - return error;
> - }
> + if (error)
> + goto out_destroy_inode;
^
Ditto.
> Index: xfs-2.6/fs/xfs/xfs_inode.h
> ===================================================================
> --- xfs-2.6.orig/fs/xfs/xfs_inode.h 2008-10-20 23:54:05.000000000 +0200
> +++ xfs-2.6/fs/xfs/xfs_inode.h 2008-10-20 23:54:08.000000000 +0200
> @@ -309,6 +309,12 @@ static inline struct inode *VFS_I(struct
> return &ip->i_vnode;
> }
>
> +static inline void xfs_destroy_inode(struct xfs_inode *ip)
> +{
> + make_bad_inode(VFS_I(ip));
> + return destroy_inode(VFS_I(ip));
> +}
Yes, makes sense to mark it bad first to avoid most of the
reclaim code.
> Index: xfs-2.6/fs/xfs/xfs_vnodeops.c
> ===================================================================
> --- xfs-2.6.orig/fs/xfs/xfs_vnodeops.c 2008-10-20 23:49:27.000000000 +0200
> +++ xfs-2.6/fs/xfs/xfs_vnodeops.c 2008-10-20 23:55:31.000000000 +0200
> @@ -2798,13 +2798,19 @@ int
> xfs_reclaim(
> xfs_inode_t *ip)
> {
> + struct inode *inode = VFS_I(ip);
>
> xfs_itrace_entry(ip);
>
> - ASSERT(!VN_MAPPED(VFS_I(ip)));
> + ASSERT(!VN_MAPPED(inode));
> +
> + if (unlikely(inode->i_state & I_NEW)) {
> + xfs_idestroy(ip);
> + return 0;
> + }
Can that happen? I thought xfs_iput_new() took care of clearing the
I_NEW flag via unlock_new_inode() and so there is no way that flag
can leak through to here. perhaps a comment explaining what the
error path is that leads to needing this check is in order....
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 3/3] free inodes using destroy_inode
2008-10-21 3:07 ` Dave Chinner
@ 2008-10-21 9:07 ` Christoph Hellwig
2008-10-21 21:06 ` Dave Chinner
0 siblings, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2008-10-21 9:07 UTC (permalink / raw)
To: Christoph Hellwig, xfs
On Tue, Oct 21, 2008 at 02:07:26PM +1100, Dave Chinner wrote:
> ^
> Extra whitespace.
>
> ^
> Ditto.
Fixed.
> Yes, makes sense to mark it bad first to avoid most of the
> reclaim code.
> Can that happen? I thought xfs_iput_new() took care of clearing the
> I_NEW flag via unlock_new_inode() and so there is no way that flag
> can leak through to here. perhaps a comment explaining what the
> error path is that leads to needing this check is in order....
The make_inode_bad isn't actually nessecary anymore - this was my first
attempt at skipping the flushing in xfs_reclaim, but it was still too
much as the radix tree removal for and inode that's not in the tree
tripped up quite badly. So I use I_NEW here to detect these half setup
inodes. Real I_NEW indoes still go through xfs_iput_new.
Updated version that has a comment explaining all this below:
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: xfs-2.6/fs/inode.c
===================================================================
--- xfs-2.6.orig/fs/inode.c 2008-10-20 23:49:27.000000000 +0200
+++ xfs-2.6/fs/inode.c 2008-10-20 23:54:08.000000000 +0200
@@ -212,6 +212,7 @@ void destroy_inode(struct inode *inode)
else
kmem_cache_free(inode_cachep, (inode));
}
+EXPORT_SYMBOL(destroy_inode);
/*
Index: xfs-2.6/fs/xfs/xfs_iget.c
===================================================================
--- xfs-2.6.orig/fs/xfs/xfs_iget.c 2008-10-20 23:49:27.000000000 +0200
+++ xfs-2.6/fs/xfs/xfs_iget.c 2008-10-20 23:54:08.000000000 +0200
@@ -197,7 +197,7 @@ out_unlock:
write_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
out_destroy:
- xfs_idestroy(ip);
+ xfs_destroy_inode(ip);
return error;
}
Index: xfs-2.6/fs/xfs/xfs_inode.c
===================================================================
--- xfs-2.6.orig/fs/xfs/xfs_inode.c 2008-10-20 23:54:05.000000000 +0200
+++ xfs-2.6/fs/xfs/xfs_inode.c 2008-10-21 09:24:30.000000000 +0200
@@ -872,10 +872,8 @@ xfs_iread(
imap.im_blkno = bno;
error = xfs_imap(mp, tp, ip->i_ino, &imap,
XFS_IMAP_LOOKUP | imap_flags);
- if (error) {
- xfs_idestroy(ip);
- return error;
- }
+ if (error)
+ goto out_destroy_inode;
/*
* Fill in the fields in the inode that will be used to
@@ -887,10 +885,8 @@ xfs_iread(
ASSERT(bno == 0 || bno == imap.im_blkno);
error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
- if (error) {
- xfs_idestroy(ip);
- return error;
- }
+ if (error)
+ goto out_destroy_inode;
dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
@@ -899,8 +895,6 @@ xfs_iread(
* (nfs or dmi) has a stale handle.
*/
if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) {
- xfs_idestroy(ip);
- xfs_trans_brelse(tp, bp);
#ifdef DEBUG
xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
"dip->di_core.di_magic (0x%x) != "
@@ -908,7 +902,8 @@ xfs_iread(
be16_to_cpu(dip->di_core.di_magic),
XFS_DINODE_MAGIC);
#endif /* DEBUG */
- return XFS_ERROR(EINVAL);
+ error = XFS_ERROR(EINVAL);
+ goto out_brelse;
}
/*
@@ -922,14 +917,12 @@ xfs_iread(
xfs_dinode_from_disk(&ip->i_d, &dip->di_core);
error = xfs_iformat(ip, dip);
if (error) {
- xfs_idestroy(ip);
- xfs_trans_brelse(tp, bp);
#ifdef DEBUG
xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
"xfs_iformat() returned error %d",
error);
#endif /* DEBUG */
- return error;
+ goto out_brelse;
}
} else {
ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic);
@@ -995,6 +988,12 @@ xfs_iread(
xfs_trans_brelse(tp, bp);
*ipp = ip;
return 0;
+
+ out_brelse:
+ xfs_trans_brelse(tp, bp);
+ out_destroy_inode:
+ xfs_destroy_inode(ip);
+ return error;
}
/*
Index: xfs-2.6/fs/xfs/xfs_inode.h
===================================================================
--- xfs-2.6.orig/fs/xfs/xfs_inode.h 2008-10-20 23:54:05.000000000 +0200
+++ xfs-2.6/fs/xfs/xfs_inode.h 2008-10-21 09:24:42.000000000 +0200
@@ -309,6 +309,11 @@ static inline struct inode *VFS_I(struct
return &ip->i_vnode;
}
+static inline void xfs_destroy_inode(struct xfs_inode *ip)
+{
+ return destroy_inode(VFS_I(ip));
+}
+
/*
* i_flags helper functions
*/
Index: xfs-2.6/fs/xfs/xfs_vnodeops.c
===================================================================
--- xfs-2.6.orig/fs/xfs/xfs_vnodeops.c 2008-10-20 23:49:27.000000000 +0200
+++ xfs-2.6/fs/xfs/xfs_vnodeops.c 2008-10-21 09:26:38.000000000 +0200
@@ -2798,13 +2798,29 @@ int
xfs_reclaim(
xfs_inode_t *ip)
{
+ struct inode *inode = VFS_I(ip);
xfs_itrace_entry(ip);
- ASSERT(!VN_MAPPED(VFS_I(ip)));
+ ASSERT(!VN_MAPPED(inode));
+
+ /*
+ * If we get an uninitialized inode, immediate destroy it here and
+ * don't go through writeback or removing it from the radix-tree to
+ * which is has never been added.
+ *
+ * Note that I_NEW is a rather fragile way to detect this as I_NEW
+ * is still set by partially set-up inode that has been added to the
+ * radix-tree. But all those failure cases go through xfs_iput_new
+ * and thus never end up with I_NEW set here.
+ */
+ if (unlikely(inode->i_state & I_NEW)) {
+ xfs_idestroy(ip);
+ return 0;
+ }
/* bad inode, get out here ASAP */
- if (VN_BAD(VFS_I(ip))) {
+ if (unlikely(is_bad_inode(inode))) {
xfs_ireclaim(ip);
return 0;
}
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 3/3] free inodes using destroy_inode
2008-10-21 9:07 ` Christoph Hellwig
@ 2008-10-21 21:06 ` Dave Chinner
2008-10-22 16:28 ` Christoph Hellwig
0 siblings, 1 reply; 5+ messages in thread
From: Dave Chinner @ 2008-10-21 21:06 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Christoph Hellwig, xfs
On Tue, Oct 21, 2008 at 05:07:08AM -0400, Christoph Hellwig wrote:
> On Tue, Oct 21, 2008 at 02:07:26PM +1100, Dave Chinner wrote:
> > ^
> > Extra whitespace.
> >
> > ^
> > Ditto.
>
> Fixed.
>
> > Yes, makes sense to mark it bad first to avoid most of the
> > reclaim code.
>
> > Can that happen? I thought xfs_iput_new() took care of clearing the
> > I_NEW flag via unlock_new_inode() and so there is no way that flag
> > can leak through to here. perhaps a comment explaining what the
> > error path is that leads to needing this check is in order....
>
> The make_inode_bad isn't actually nessecary anymore - this was my first
> attempt at skipping the flushing in xfs_reclaim, but it was still too
> much as the radix tree removal for and inode that's not in the tree
> tripped up quite badly. So I use I_NEW here to detect these half setup
> inodes. Real I_NEW indoes still go through xfs_iput_new.
Hmmmm - I still don't see that as possible. We don't set I_NEW until
we are inside xfs_setup_inode(), which occurs after we insert
the inode into the radix tree. xfs_setup_inode() also calls
unlock_new_inode(), so the I_NEW flag is cleared before it returns,
too. So I can't really see how this check in reclaim does anything....
AFAICT, once we've inserted the new inode into the radix tree,
we can't get an error before xfs_setup_inode() is called - even
in the allocation case. Hence once we're in the radix tree,
xfs_iput_new() should be called to cleanup.
All the cases that xfs_destroy_inode() handles are before the inode
is inserted into the radix tree, so marking the XFS inode XFS_IBAD
in xfs_destroy_inode() is probably a much more reliable way to
detect immediate destroy cases in the reclaim code than relying
on I_NEW......
Thoughts?
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 3/3] free inodes using destroy_inode
2008-10-21 21:06 ` Dave Chinner
@ 2008-10-22 16:28 ` Christoph Hellwig
0 siblings, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2008-10-22 16:28 UTC (permalink / raw)
To: xfs
On Wed, Oct 22, 2008 at 08:06:33AM +1100, Dave Chinner wrote:
> Hmmmm - I still don't see that as possible. We don't set I_NEW until
> we are inside xfs_setup_inode(), which occurs after we insert
> the inode into the radix tree. xfs_setup_inode() also calls
> unlock_new_inode(), so the I_NEW flag is cleared before it returns,
> too. So I can't really see how this check in reclaim does anything....
>
> AFAICT, once we've inserted the new inode into the radix tree,
> we can't get an error before xfs_setup_inode() is called - even
> in the allocation case. Hence once we're in the radix tree,
> xfs_iput_new() should be called to cleanup.
>
> All the cases that xfs_destroy_inode() handles are before the inode
> is inserted into the radix tree, so marking the XFS inode XFS_IBAD
> in xfs_destroy_inode() is probably a much more reliable way to
> detect immediate destroy cases in the reclaim code than relying
> on I_NEW......
I've put in some instrumentation and we indeed newer hit the I_NEW
case. I also can't reproduce the problems I saw before anymore -
in fact calling radix_tree_remove on an inode is explicitly fine
per documentation and does work. I'll send a reworked patch that
just uses make_inode_bad as this has passed xfsqa a few times now.
I've also removed patch 2 for now a it's not a bug fix an I'll put
it into my later series that cleans up a lot of mess in that area.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2008-10-22 16:27 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-20 22:20 [PATCH 3/3] free inodes using destroy_inode Christoph Hellwig
2008-10-21 3:07 ` Dave Chinner
2008-10-21 9:07 ` Christoph Hellwig
2008-10-21 21:06 ` Dave Chinner
2008-10-22 16:28 ` Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox