public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Patrick Schreurs <patrick@news-service.com>
To: Christoph Hellwig <hch@infradead.org>
Cc: Tommy van Leeuwen <tommy@news-service.com>, xfs@oss.sgi.com
Subject: Re: [PATCH] Inode reclaim fixes (was Re: 2.6.31	xfs_fs_destroy_inode: cannot reclaim)
Date: Tue, 09 Feb 2010 09:48:38 +0100	[thread overview]
Message-ID: <4B712166.9010701@news-service.com> (raw)
In-Reply-To: <20100208194226.GD9527@infradead.org>

[-- Attachment #1: Type: text/plain, Size: 551 bytes --]

On 8-2-2010 20:42, Christoph Hellwig wrote:
> On Mon, Feb 01, 2010 at 05:52:30PM +0100, Patrick Schreurs wrote:
>> Hello Dave,
>>
>> I'm afraid we had another crash a few days ago. Have a look at the
>> screenshot. Can you make anything out of it? This server is running
>> 2.6.32.3 with your inode-reclaim patch applied and XFS_DEBUG still
>> enabled.
>
> Just wondering, which set of patches is this exactly?

This is a clean 2.6.32.3 with the xfs-inode-reclaim-2.6.32 patch i 
received from Dave on January 8th (see attachment).

Thanks,

-Patrick

[-- Attachment #2: xfs-inode-reclaim-2.6.32 --]
[-- Type: text/plain, Size: 12580 bytes --]

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 18a4b8e..f3c622a 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -930,13 +930,37 @@ xfs_fs_alloc_inode(
  */
 STATIC void
 xfs_fs_destroy_inode(
-	struct inode	*inode)
+	struct inode		*inode)
 {
-	xfs_inode_t		*ip = XFS_I(inode);
+	struct xfs_inode	*ip = XFS_I(inode);
+
+	xfs_itrace_entry(ip);
 
 	XFS_STATS_INC(vn_reclaim);
-	if (xfs_reclaim(ip))
-		panic("%s: cannot reclaim 0x%p\n", __func__, inode);
+
+	/* bad inode, get out here ASAP */
+	if (is_bad_inode(inode))
+		goto out_reclaim;
+
+	xfs_ioend_wait(ip);
+
+	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+
+	/*
+	 * We should never get here with one of the reclaim flags already set.
+	 */
+	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
+
+	/*
+	 * we always use background reclaim here because even if the
+	 * inode is clean, it still may be under IO and hence we have
+	 * to take the flush lock. The background reclaim path handles
+	 * this more efficiently than we can here, so simply let background
+	 * reclaim tear down all inodes.
+	 */
+out_reclaim:
+	xfs_inode_set_reclaim_tag(ip);
 }
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 961df0a..897fcb5 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -54,7 +54,8 @@ xfs_inode_ag_lookup(
 	struct xfs_mount	*mp,
 	struct xfs_perag	*pag,
 	uint32_t		*first_index,
-	int			tag)
+	int			tag,
+	int			write_lock)
 {
 	int			nr_found;
 	struct xfs_inode	*ip;
@@ -64,7 +65,10 @@ xfs_inode_ag_lookup(
 	 * as the tree is sparse and a gang lookup walks to find
 	 * the number of objects requested.
 	 */
-	read_lock(&pag->pag_ici_lock);
+	if (write_lock)
+		write_lock(&pag->pag_ici_lock);
+	else
+		read_lock(&pag->pag_ici_lock);
 	if (tag == XFS_ICI_NO_TAG) {
 		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
 				(void **)&ip, *first_index, 1);
@@ -88,7 +92,10 @@ xfs_inode_ag_lookup(
 	return ip;
 
 unlock:
-	read_unlock(&pag->pag_ici_lock);
+	if (write_lock)
+		write_unlock(&pag->pag_ici_lock);
+	else
+		read_unlock(&pag->pag_ici_lock);
 	return NULL;
 }
 
@@ -99,7 +106,8 @@ xfs_inode_ag_walk(
 	int			(*execute)(struct xfs_inode *ip,
 					   struct xfs_perag *pag, int flags),
 	int			flags,
-	int			tag)
+	int			tag,
+	int			write_lock)
 {
 	struct xfs_perag	*pag = &mp->m_perag[ag];
 	uint32_t		first_index;
@@ -113,7 +121,8 @@ restart:
 		int		error = 0;
 		xfs_inode_t	*ip;
 
-		ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
+		ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag,
+						write_lock);
 		if (!ip)
 			break;
 
@@ -147,7 +156,8 @@ xfs_inode_ag_iterator(
 	int			(*execute)(struct xfs_inode *ip,
 					   struct xfs_perag *pag, int flags),
 	int			flags,
-	int			tag)
+	int			tag,
+	int			write_lock)
 {
 	int			error = 0;
 	int			last_error = 0;
@@ -156,7 +166,8 @@ xfs_inode_ag_iterator(
 	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
 		if (!mp->m_perag[ag].pag_ici_init)
 			continue;
-		error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
+		error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
+					write_lock);
 		if (error) {
 			last_error = error;
 			if (error == EFSCORRUPTED)
@@ -180,18 +191,20 @@ xfs_sync_inode_valid(
 		return EFSCORRUPTED;
 	}
 
-	/*
-	 * If we can't get a reference on the inode, it must be in reclaim.
-	 * Leave it for the reclaim code to flush. Also avoid inodes that
-	 * haven't been fully initialised.
-	 */
+	/* avoid new or reclaimable inodes. Leave for reclaim code to flush */
+	if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) {
+		read_unlock(&pag->pag_ici_lock);
+		return ENOENT;
+	}
+
+	/* If we can't get a reference on the inode, it must be in reclaim. */
 	if (!igrab(inode)) {
 		read_unlock(&pag->pag_ici_lock);
 		return ENOENT;
 	}
 	read_unlock(&pag->pag_ici_lock);
 
-	if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
+	if (is_bad_inode(inode)) {
 		IRELE(ip);
 		return ENOENT;
 	}
@@ -281,7 +294,7 @@ xfs_sync_data(
 	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
 
 	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
-				      XFS_ICI_NO_TAG);
+				      XFS_ICI_NO_TAG, 0);
 	if (error)
 		return XFS_ERROR(error);
 
@@ -303,7 +316,7 @@ xfs_sync_attr(
 	ASSERT((flags & ~SYNC_WAIT) == 0);
 
 	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
-				     XFS_ICI_NO_TAG);
+				     XFS_ICI_NO_TAG, 0);
 }
 
 STATIC int
@@ -663,36 +676,11 @@ xfs_syncd_stop(
 	kthread_stop(mp->m_sync_task);
 }
 
-int
+STATIC int
 xfs_reclaim_inode(
 	xfs_inode_t	*ip,
-	int		locked,
 	int		sync_mode)
 {
-	xfs_perag_t	*pag = xfs_get_perag(ip->i_mount, ip->i_ino);
-
-	/* The hash lock here protects a thread in xfs_iget_core from
-	 * racing with us on linking the inode back with a vnode.
-	 * Once we have the XFS_IRECLAIM flag set it will not touch
-	 * us.
-	 */
-	write_lock(&pag->pag_ici_lock);
-	spin_lock(&ip->i_flags_lock);
-	if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
-	    !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
-		spin_unlock(&ip->i_flags_lock);
-		write_unlock(&pag->pag_ici_lock);
-		if (locked) {
-			xfs_ifunlock(ip);
-			xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		}
-		return -EAGAIN;
-	}
-	__xfs_iflags_set(ip, XFS_IRECLAIM);
-	spin_unlock(&ip->i_flags_lock);
-	write_unlock(&pag->pag_ici_lock);
-	xfs_put_perag(ip->i_mount, pag);
-
 	/*
 	 * If the inode is still dirty, then flush it out.  If the inode
 	 * is not in the AIL, then it will be OK to flush it delwri as
@@ -704,14 +692,14 @@ xfs_reclaim_inode(
 	 * We get the flush lock regardless, though, just to make sure
 	 * we don't free it while it is being flushed.
 	 */
-	if (!locked) {
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		xfs_iflock(ip);
-	}
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_iflock(ip);
 
 	/*
 	 * In the case of a forced shutdown we rely on xfs_iflush() to
 	 * wait for the inode to be unpinned before returning an error.
+	 * Because we hold the flush lock, we know that the inode cannot
+	 * be under IO, so if it reports clean it can be reclaimed.
 	 */
 	if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
 		/* synchronize with xfs_iflush_done */
@@ -771,14 +759,24 @@ xfs_reclaim_inode_now(
 	struct xfs_perag	*pag,
 	int			flags)
 {
-	/* ignore if already under reclaim */
-	if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
-		read_unlock(&pag->pag_ici_lock);
+	/*
+	 * The radix tree lock here protects a thread in xfs_iget from racing
+	 * with us starting reclaim on the inode.  Once we have the
+	 * XFS_IRECLAIM flag set it will not touch us.
+	 */
+	spin_lock(&ip->i_flags_lock);
+	ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+	if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
+		/* ignore as it is already under reclaim */
+		spin_unlock(&ip->i_flags_lock);
+		write_unlock(&pag->pag_ici_lock);
 		return 0;
 	}
-	read_unlock(&pag->pag_ici_lock);
+	__xfs_iflags_set(ip, XFS_IRECLAIM);
+	spin_unlock(&ip->i_flags_lock);
+	write_unlock(&pag->pag_ici_lock);
 
-	return xfs_reclaim_inode(ip, 0, flags);
+	return xfs_reclaim_inode(ip, flags);
 }
 
 int
@@ -787,5 +785,5 @@ xfs_reclaim_inodes(
 	int		mode)
 {
 	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
-					XFS_ICI_RECLAIM_TAG);
+					XFS_ICI_RECLAIM_TAG, 1);
 }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 27920eb..ea932b4 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -44,7 +44,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
 
 void xfs_flush_inodes(struct xfs_inode *ip);
 
-int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
 int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
 
 void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
@@ -55,6 +54,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
 int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
 	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-	int flags, int tag);
+	int flags, int tag, int write_lock);
 
 #endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d1a3b9..f99cfa4 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -893,7 +893,7 @@ xfs_qm_dqrele_all_inodes(
 	uint		 flags)
 {
 	ASSERT(mp->m_quotainfo);
-	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG);
+	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
 }
 
 /*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 80e5264..40e8775 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -511,17 +511,21 @@ xfs_ireclaim(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_perag	*pag;
+	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 
 	XFS_STATS_INC(xs_ig_reclaims);
 
 	/*
-	 * Remove the inode from the per-AG radix tree.  It doesn't matter
-	 * if it was never added to it because radix_tree_delete can deal
-	 * with that case just fine.
+	 * Remove the inode from the per-AG radix tree.
+	 *
+	 * Because radix_tree_delete won't complain even if the item was never
+	 * added to the tree assert that it's been there before to catch
+	 * problems with the inode life time early on.
 	 */
 	pag = xfs_get_perag(mp, ip->i_ino);
 	write_lock(&pag->pag_ici_lock);
-	radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
+	if (!radix_tree_delete(&pag->pag_ici_root, agino))
+		ASSERT(0);
 	write_unlock(&pag->pag_ici_lock);
 	xfs_put_perag(mp, pag);
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b92a4fa..13d7d21 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2877,10 +2877,14 @@ xfs_iflush(
 	mp = ip->i_mount;
 
 	/*
-	 * If the inode isn't dirty, then just release the inode
-	 * flush lock and do nothing.
+	 * If the inode isn't dirty, then just release the inode flush lock and
+	 * do nothing. Treat stale inodes the same; we cannot rely on the
+	 * backing buffer remaining stale in cache for the remaining life of
+	 * the stale inode and so xfs_itobp() below may give us a buffer that
+	 * no longer contains inodes below. Doing this stale check here also
+	 * avoids forcing the log on pinned, stale inodes.
 	 */
-	if (xfs_inode_clean(ip)) {
+	if (xfs_inode_clean(ip) || xfs_iflags_test(ip, XFS_ISTALE)) {
 		xfs_ifunlock(ip);
 		return 0;
 	}
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b572f7e..3fac146 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2456,46 +2456,6 @@ xfs_set_dmattrs(
 	return error;
 }
 
-int
-xfs_reclaim(
-	xfs_inode_t	*ip)
-{
-
-	xfs_itrace_entry(ip);
-
-	ASSERT(!VN_MAPPED(VFS_I(ip)));
-
-	/* bad inode, get out here ASAP */
-	if (is_bad_inode(VFS_I(ip))) {
-		xfs_ireclaim(ip);
-		return 0;
-	}
-
-	xfs_ioend_wait(ip);
-
-	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-
-	/*
-	 * If we have nothing to flush with this inode then complete the
-	 * teardown now, otherwise break the link between the xfs inode and the
-	 * linux inode and clean up the xfs inode later. This avoids flushing
-	 * the inode to disk during the delete operation itself.
-	 *
-	 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
-	 * first to ensure that xfs_iunpin() will never see an xfs inode
-	 * that has a linux inode being reclaimed. Synchronisation is provided
-	 * by the i_flags_lock.
-	 */
-	if (!ip->i_update_core && (ip->i_itemp == NULL)) {
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		xfs_iflock(ip);
-		xfs_iflags_set(ip, XFS_IRECLAIMABLE);
-		return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
-	}
-	xfs_inode_set_reclaim_tag(ip);
-	return 0;
-}
-
 /*
  * xfs_alloc_file_space()
  *      This routine allocates disk space for the given file.
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index a9e102d..167a467 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -38,7 +38,6 @@ int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
 		const char *target_path, mode_t mode, struct xfs_inode **ipp,
 		cred_t *credp);
 int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
-int xfs_reclaim(struct xfs_inode *ip);
 int xfs_change_file_space(struct xfs_inode *ip, int cmd,
 		xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
 int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,

[-- Attachment #3: Type: text/plain, Size: 121 bytes --]

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  reply	other threads:[~2010-02-09  8:47 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-16 10:27 2.6.31 xfs_fs_destroy_inode: cannot reclaim Tommy van Leeuwen
2009-09-17 18:59 ` Christoph Hellwig
2009-09-29 10:15   ` Patrick Schreurs
2009-09-29 12:57     ` Christoph Hellwig
2009-09-30 10:48       ` Patrick Schreurs
2009-09-30 12:41         ` Christoph Hellwig
2009-10-02 14:24           ` Bas Couwenberg
2009-10-05 21:43             ` Christoph Hellwig
2009-10-06  9:04               ` Patrick Schreurs
2009-10-07  1:19                 ` Christoph Hellwig
2009-10-08  8:45                   ` Patrick Schreurs
2009-10-11  7:43                   ` Patrick Schreurs
2009-10-11 12:24                     ` Christoph Hellwig
2009-10-12 23:38                     ` Christoph Hellwig
2009-10-15 15:06                       ` Tommy van Leeuwen
2009-10-18 23:59                         ` Christoph Hellwig
2009-10-19  1:17                           ` Dave Chinner
2009-10-19  3:53                             ` Christoph Hellwig
2009-10-19  1:16                       ` Dave Chinner
2009-10-19  3:54                         ` Christoph Hellwig
2009-10-20  3:40                           ` Dave Chinner
2009-10-21  9:45                             ` Tommy van Leeuwen
2009-10-22  8:59                               ` Christoph Hellwig
2009-10-27 10:41                                 ` Tommy van Leeuwen
     [not found]                                   ` <89c4f90c0910280519k759230c1r7b1586932ac792f7@mail.gmail.com>
2009-10-30 10:16                                     ` Christoph Hellwig
2009-11-03 14:46                                       ` Patrick Schreurs
2009-11-14 16:21                                         ` Christoph Hellwig
     [not found]                                           ` <4B0A8075.8080008@news-service.com>
     [not found]                                             ` <20091211115932.GA20632@infradead.org>
     [not found]                                               ` <4B3F9F88.9030307@news-service.com>
     [not found]                                                 ` <20100107110446.GA13802@discord.disaster>
     [not found]                                                   ` <4B45CFAC.4000607@news-service.com>
2010-01-08 11:31                                                     ` [PATCH] Inode reclaim fixes (was Re: 2.6.31 xfs_fs_destroy_inode: cannot reclaim) Dave Chinner
2010-01-11 20:22                                                       ` Patrick Schreurs
2010-01-15 11:01                                                       ` Patrick Schreurs
2010-02-01 16:52                                                         ` Patrick Schreurs
2010-02-08 10:16                                                           ` Patrick Schreurs
2010-02-08 19:42                                                           ` Christoph Hellwig
2010-02-09  8:48                                                             ` Patrick Schreurs [this message]
2010-02-09 10:31                                                               ` Christoph Hellwig
2010-02-10 12:42                                                                 ` Patrick Schreurs
2010-02-10 14:55                                                                   ` Christoph Hellwig
2010-02-10 15:42                                                                     ` Patrick Schreurs
2010-02-10 15:47                                                                       ` Christoph Hellwig
2010-02-24 18:30                                                                       ` Patrick Schreurs
2010-02-25 23:45                                                                         ` Dave Chinner
2010-03-01  9:51                                                                           ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B712166.9010701@news-service.com \
    --to=patrick@news-service.com \
    --cc=hch@infradead.org \
    --cc=tommy@news-service.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox