public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 15/18] xfs: batch inode reclaim lookup
Date: Mon, 27 Sep 2010 11:47:50 +1000	[thread overview]
Message-ID: <1285552073-14663-16-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1285552073-14663-1-git-send-email-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

Batch and optimise the per-ag inode lookup for reclaim to minimise
scanning overhead. This involves gang lookups on the radix trees to
get multiple inodes during each tree walk, and tighter validation of
what inodes can be reclaimed without blocking befor we take any
locks.

This is based on ideas suggested in a proof-of-concept patch
posted by Nick Piggin.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_sync.c |  110 ++++++++++++++++++++++++++++++-------------
 1 files changed, 77 insertions(+), 33 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 892ce33..f8e0644 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -623,6 +623,43 @@ __xfs_inode_clear_reclaim_tag(
 }
 
 /*
+ * Grab the inode for reclaim exclusively.
+ * Return 0 if we grabbed it, non-zero otherwise.
+ */
+STATIC int
+xfs_reclaim_inode_grab(
+	struct xfs_inode	*ip,
+	int			flags)
+{
+
+	/*
+	 * do some unlocked checks first to avoid unnecceary lock traffic.
+	 * The first is a flush lock check, the second is a already in reclaim
+	 * check. Only do these checks if we are not going to block on locks.
+	 */
+	if ((flags & SYNC_TRYLOCK) &&
+	    (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+		return 1;
+	}
+
+	/*
+	 * The radix tree lock here protects a thread in xfs_iget from racing
+	 * with us starting reclaim on the inode.  Once we have the
+	 * XFS_IRECLAIM flag set it will not touch us.
+	 */
+	spin_lock(&ip->i_flags_lock);
+	ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+	if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
+		/* ignore as it is already under reclaim */
+		spin_unlock(&ip->i_flags_lock);
+		return 1;
+	}
+	__xfs_iflags_set(ip, XFS_IRECLAIM);
+	spin_unlock(&ip->i_flags_lock);
+	return 0;
+}
+
+/*
  * Inodes in different states need to be treated differently, and the return
  * value of xfs_iflush is not sufficient to get this right. The following table
  * lists the inode states and the reclaim actions necessary for non-blocking
@@ -680,23 +717,6 @@ xfs_reclaim_inode(
 {
 	int	error = 0;
 
-	/*
-	 * The radix tree lock here protects a thread in xfs_iget from racing
-	 * with us starting reclaim on the inode.  Once we have the
-	 * XFS_IRECLAIM flag set it will not touch us.
-	 */
-	spin_lock(&ip->i_flags_lock);
-	ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-	if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
-		/* ignore as it is already under reclaim */
-		spin_unlock(&ip->i_flags_lock);
-		write_unlock(&pag->pag_ici_lock);
-		return 0;
-	}
-	__xfs_iflags_set(ip, XFS_IRECLAIM);
-	spin_unlock(&ip->i_flags_lock);
-	write_unlock(&pag->pag_ici_lock);
-
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	if (!xfs_iflock_nowait(ip)) {
 		if (!(sync_mode & SYNC_WAIT))
@@ -813,16 +833,19 @@ xfs_reclaim_inodes_ag(
 	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
 		unsigned long	first_index = 0;
 		int		done = 0;
+		int		nr_found = 0;
 
 		ag = pag->pag_agno + 1;
 
 		do {
-			struct xfs_inode *ip;
-			int	nr_found;
+			struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+			int	i;
 
 			write_lock(&pag->pag_ici_lock);
-			nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
-					(void **)&ip, first_index, 1,
+			nr_found = radix_tree_gang_lookup_tag(
+					&pag->pag_ici_root,
+					(void **)batch, first_index,
+					XFS_LOOKUP_BATCH,
 					XFS_ICI_RECLAIM_TAG);
 			if (!nr_found) {
 				write_unlock(&pag->pag_ici_lock);
@@ -830,20 +853,41 @@ xfs_reclaim_inodes_ag(
 			}
 
 			/*
-			 * Update the index for the next lookup. Catch overflows
-			 * into the next AG range which can occur if we have inodes
-			 * in the last block of the AG and we are currently
-			 * pointing to the last inode.
+			 * Grab the inodes before we drop the lock. if we found
+			 * nothing, nr == 0 and the loop will be skipped.
 			 */
-			first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-			if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-				done = 1;
+			for (i = 0; i < nr_found; i++) {
+				struct xfs_inode *ip = batch[i];
+
+				if (done || xfs_reclaim_inode_grab(ip, flags))
+					batch[i] = NULL;
+
+				/*
+				 * Update the index for the next lookup. Catch
+				 * overflows into the next AG range which can
+				 * occur if we have inodes in the last block of
+				 * the AG and we are currently pointing to the
+				 * last inode.
+				 */
+				first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+				if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+					done = 1;
+			}
 
-			error = xfs_reclaim_inode(ip, pag, flags);
-			if (error && last_error != EFSCORRUPTED)
-				last_error = error;
+			/* unlock now we've grabbed the inodes. */
+			write_unlock(&pag->pag_ici_lock);
+
+			for (i = 0; i < nr_found; i++) {
+				if (!batch[i])
+					continue;
+				error = xfs_reclaim_inode(batch[i], pag, flags);
+				if (error && last_error != EFSCORRUPTED)
+					last_error = error;
+			}
+
+			*nr_to_scan -= XFS_LOOKUP_BATCH;
 
-		} while (!done && (*nr_to_scan)--);
+		} while (nr_found && !done && *nr_to_scan > 0);
 
 		xfs_perag_put(pag);
 	}
@@ -879,7 +923,7 @@ xfs_reclaim_inode_shrink(
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
 
-		xfs_reclaim_inodes_ag(mp, 0, &nr_to_scan);
+		xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
 		/* terminate if we don't exhaust the scan */
 		if (nr_to_scan > 0)
 			return -1;
-- 
1.7.1

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2010-09-27  1:47 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-27  1:47 [PATCH 0/18] xfs: metadata scalability V4 Dave Chinner
2010-09-27  1:47 ` [PATCH 01/18] xfs: force background CIL push under sustained load Dave Chinner
2010-09-27  1:47 ` [PATCH 02/18] xfs: reduce the number of CIL lock round trips during commit Dave Chinner
2010-09-27  1:47 ` [PATCH 03/18] xfs: remove debug assert for per-ag reference counting Dave Chinner
2010-09-27  1:47 ` [PATCH 04/18] xfs: lockless per-ag lookups Dave Chinner
2010-09-27  1:47 ` [PATCH 05/18] xfs: don't use vfs writeback for pure metadata modifications Dave Chinner
2010-09-27  4:53   ` Christoph Hellwig
2010-09-27  1:47 ` [PATCH 06/18] xfs: rename xfs_buf_get_nodaddr to be more appropriate Dave Chinner
2010-09-27  1:47 ` [PATCH 07/18] xfs: introduced uncached buffer read primitve Dave Chinner
2010-09-27  1:47 ` [PATCH 08/18] xfs: store xfs_mount in the buftarg instead of in the xfs_buf Dave Chinner
2010-09-27  1:47 ` [PATCH 09/18] xfs: kill XBF_FS_MANAGED buffers Dave Chinner
2010-09-27  1:47 ` [PATCH 10/18] xfs: use unhashed buffers for size checks Dave Chinner
2010-09-27  1:47 ` [PATCH 11/18] xfs: remove buftarg hash for external devices Dave Chinner
2010-09-27  1:47 ` [PATCH 12/18] xfs: split inode AG walking into separate code for reclaim Dave Chinner
2010-09-27  1:47 ` [PATCH 13/18] xfs: split out inode walk inode grabbing Dave Chinner
2010-09-27 15:58   ` Alex Elder
2010-09-27  1:47 ` [PATCH 14/18] xfs: implement batched inode lookups for AG walking Dave Chinner
2010-09-27 17:50   ` Alex Elder
2010-09-27  1:47 ` Dave Chinner [this message]
2010-09-27  1:47 ` [PATCH 16/18] xfs: serialise inode reclaim within an AG Dave Chinner
2010-09-27  1:47 ` [PATCH 17/18] xfs: convert buffer cache hash to rbtree Dave Chinner
2010-09-27  1:47 ` [PATCH 18/18] xfs: pack xfs_buf structure more tightly Dave Chinner
2010-09-27 17:52 ` [PATCH 0/18] xfs: metadata scalability V4 Alex Elder
2010-09-28  1:42   ` Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2010-09-24 12:30 [PATCH 0/18] xfs: metadata scalability V3 Dave Chinner
2010-09-24 12:31 ` [PATCH 15/18] xfs: batch inode reclaim lookup Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1285552073-14663-16-git-send-email-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox