public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] XFS: replace the mount inode list with radix tree traversals
@ 2008-07-20 12:19 Dave Chinner
  2008-07-20 12:19 ` [PATCH 1/4] XFS: Remove xfs_iflush_all and clean up xfs_finish_reclaim_all() Dave Chinner
                   ` (3 more replies)
  0 siblings, 4 replies; 31+ messages in thread
From: Dave Chinner @ 2008-07-20 12:19 UTC (permalink / raw)
  To: xfs

The list of all inodes on a mount is superfluous. We can traverse
all inodes now by walking the per-AG inode radix trees without
needing a separate list. This enables us to remove a bunch of
complex list traversal code and remove another two pointers from
the xfs_inode.

Also, by replacing the sync traversal with an ascending inode
number traversal, we will issue better inode I/O patterns for
writeback triggered by xfssyncd or unmount.

Diffstat for the change:

 xfs/quota/xfs_qm_syscalls.c |  120 ++++++--------
 xfs/xfs_iget.c              |   42 -----
 xfs/xfs_inode.c             |   39 ----
 xfs/xfs_inode.h             |   11 -
 xfs/xfs_mount.c             |    7
 xfs/xfs_mount.h             |    1
 xfs/xfs_vfsops.c            |  366 ++++++++++++--------------------------------
 xfs/xfs_vnodeops.c          |   42 ++---
 8 files changed, 176 insertions(+), 452 deletions(-)

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 1/4] XFS: Remove xfs_iflush_all and clean up xfs_finish_reclaim_all()
  2008-07-20 12:19 [PATCH 0/4] XFS: replace the mount inode list with radix tree traversals Dave Chinner
@ 2008-07-20 12:19 ` Dave Chinner
  2008-07-21  7:58   ` Christoph Hellwig
  2008-07-20 12:19 ` [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes Dave Chinner
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 31+ messages in thread
From: Dave Chinner @ 2008-07-20 12:19 UTC (permalink / raw)
  To: xfs; +Cc: Dave Chinner

xfs_iflush_all() walks the m_inodes list to find inodes that
need reclaiming. We already have such a list - the m_del_inodes
list. Replace xfs_iflush_all() with a call to xfs_finish_reclaim_all()
and clean up xfs_finish_reclaim_all() to handle the different flush
modes now needed.

Originally based on a patch from Christoph Hellwig.

Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_inode.c    |   39 ---------------------------------------
 fs/xfs/xfs_inode.h    |    3 +--
 fs/xfs/xfs_mount.c    |    2 +-
 fs/xfs/xfs_vfsops.c   |    8 +++-----
 fs/xfs/xfs_vnodeops.c |   42 ++++++++++++++++++------------------------
 5 files changed, 23 insertions(+), 71 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 20b6f87..ae19b05 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3456,45 +3456,6 @@ corrupt_out:
 	return XFS_ERROR(EFSCORRUPTED);
 }
 
-
-/*
- * Flush all inactive inodes in mp.
- */
-void
-xfs_iflush_all(
-	xfs_mount_t	*mp)
-{
-	xfs_inode_t	*ip;
-	bhv_vnode_t	*vp;
-
- again:
-	XFS_MOUNT_ILOCK(mp);
-	ip = mp->m_inodes;
-	if (ip == NULL)
-		goto out;
-
-	do {
-		/* Make sure we skip markers inserted by sync */
-		if (ip->i_mount == NULL) {
-			ip = ip->i_mnext;
-			continue;
-		}
-
-		vp = XFS_ITOV_NULL(ip);
-		if (!vp) {
-			XFS_MOUNT_IUNLOCK(mp);
-			xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
-			goto again;
-		}
-
-		ASSERT(vn_count(vp) == 0);
-
-		ip = ip->i_mnext;
-	} while (ip != mp->m_inodes);
- out:
-	XFS_MOUNT_IUNLOCK(mp);
-}
-
 #ifdef XFS_ILOCK_TRACE
 ktrace_t	*xfs_ilock_trace_buf;
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 17a04b6..7ce41d3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -480,7 +480,7 @@ void		xfs_iunlock_map_shared(xfs_inode_t *, uint);
 void		xfs_ifunlock(xfs_inode_t *);
 void		xfs_ireclaim(xfs_inode_t *);
 int		xfs_finish_reclaim(xfs_inode_t *, int, int);
-int		xfs_finish_reclaim_all(struct xfs_mount *, int);
+int		xfs_finish_reclaim_all(struct xfs_mount *, int, int);
 
 /*
  * xfs_inode.c prototypes.
@@ -518,7 +518,6 @@ void		xfs_ipin(xfs_inode_t *);
 void		xfs_iunpin(xfs_inode_t *);
 int		xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
 int		xfs_iflush(xfs_inode_t *, uint);
-void		xfs_iflush_all(struct xfs_mount *);
 void		xfs_ichgtime(xfs_inode_t *, int);
 xfs_fsize_t	xfs_file_last_byte(xfs_inode_t *);
 void		xfs_lock_inodes(xfs_inode_t **, int, uint);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6c5d132..0c23f6a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1268,7 +1268,7 @@ xfs_unmountfs(xfs_mount_t *mp)
 	 * need to force the log first.
 	 */
 	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
-	xfs_iflush_all(mp);
+	xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_ASYNC);
 
 	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
 
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 4a9a433..9d72646 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -65,7 +65,7 @@ xfs_quiesce_fs(
 	int			count = 0, pincount;
 
 	xfs_flush_buftarg(mp->m_ddev_targp, 0);
-	xfs_finish_reclaim_all(mp, 0);
+	xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_ASYNC);
 
 	/* This loop must run at least twice.
 	 * The first instance of the loop will flush
@@ -653,10 +653,8 @@ xfs_syncsub(
 	xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
 
 	if (flags & (SYNC_ATTR|SYNC_DELWRI)) {
-		if (flags & SYNC_BDFLUSH)
-			xfs_finish_reclaim_all(mp, 1);
-		else
-			error = xfs_sync_inodes(mp, flags, bypassed);
+		xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+		error = xfs_sync_inodes(mp, flags, bypassed);
 	}
 
 	/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b792a12..2af1be3 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2985,36 +2985,30 @@ xfs_finish_reclaim(
 }
 
 int
-xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock)
+xfs_finish_reclaim_all(
+	xfs_mount_t	*mp,
+	int		 noblock,
+	int		mode)
 {
-	int		purged;
 	xfs_inode_t	*ip, *n;
-	int		done = 0;
 
-	while (!done) {
-		purged = 0;
-		XFS_MOUNT_ILOCK(mp);
-		list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
-			if (noblock) {
-				if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
-					continue;
-				if (xfs_ipincount(ip) ||
-				    !xfs_iflock_nowait(ip)) {
-					xfs_iunlock(ip, XFS_ILOCK_EXCL);
-					continue;
-				}
+restart:
+	XFS_MOUNT_ILOCK(mp);
+	list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
+		if (noblock) {
+			if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
+				continue;
+			if (xfs_ipincount(ip) ||
+			    !xfs_iflock_nowait(ip)) {
+				xfs_iunlock(ip, XFS_ILOCK_EXCL);
+				continue;
 			}
-			XFS_MOUNT_IUNLOCK(mp);
-			if (xfs_finish_reclaim(ip, noblock,
-					XFS_IFLUSH_DELWRI_ELSE_ASYNC))
-				delay(1);
-			purged = 1;
-			break;
 		}
-
-		done = !purged;
+		XFS_MOUNT_IUNLOCK(mp);
+		if (xfs_finish_reclaim(ip, noblock, mode))
+			delay(1);
+		goto restart;
 	}
-
 	XFS_MOUNT_IUNLOCK(mp);
 	return 0;
 }
-- 
1.5.6

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-20 12:19 [PATCH 0/4] XFS: replace the mount inode list with radix tree traversals Dave Chinner
  2008-07-20 12:19 ` [PATCH 1/4] XFS: Remove xfs_iflush_all and clean up xfs_finish_reclaim_all() Dave Chinner
@ 2008-07-20 12:19 ` Dave Chinner
  2008-07-22  4:28   ` Christoph Hellwig
  2008-07-20 12:19 ` [PATCH 3/4] XFS: Traverse inode trees when releasing dquots Dave Chinner
  2008-07-20 12:19 ` [PATCH 4/4] XFS: remove the mount inode list Dave Chinner
  3 siblings, 1 reply; 31+ messages in thread
From: Dave Chinner @ 2008-07-20 12:19 UTC (permalink / raw)
  To: xfs; +Cc: Dave Chinner

Update xfs_sync_inodes to walk the inode radix tree cache to find
dirty inodes. This removes a huge bunch of nasty, messy code for
traversing the mount inode list safely and removes another user of
the mount inode list.

Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_vfsops.c |  358 ++++++++++++++-------------------------------------
 1 files changed, 99 insertions(+), 259 deletions(-)

diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 9d72646..0ff9812 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -271,356 +271,196 @@ xfs_sync(
 }
 
 /*
- * xfs sync routine for internal use
- *
- * This routine supports all of the flags defined for the generic vfs_sync
- * interface as explained above under xfs_sync.
- *
+ * Sync all the inodes in the given AG according to the
+ * direction given by the flags.
  */
-int
-xfs_sync_inodes(
+STATIC int
+xfs_sync_inodes_ag(
 	xfs_mount_t	*mp,
+	int		ag,
 	int		flags,
-	int             *bypassed)
+	int		*bypassed)
 {
 	xfs_inode_t	*ip = NULL;
 	bhv_vnode_t	*vp = NULL;
-	int		error;
-	int		last_error;
-	uint64_t	fflag;
-	uint		lock_flags;
-	uint		base_lock_flags;
-	boolean_t	mount_locked;
-	boolean_t	vnode_refed;
-	int		preempt;
-	xfs_iptr_t	*ipointer;
-#ifdef DEBUG
-	boolean_t	ipointer_in = B_FALSE;
-
-#define IPOINTER_SET	ipointer_in = B_TRUE
-#define IPOINTER_CLR	ipointer_in = B_FALSE
-#else
-#define IPOINTER_SET
-#define IPOINTER_CLR
-#endif
-
-
-/* Insert a marker record into the inode list after inode ip. The list
- * must be locked when this is called. After the call the list will no
- * longer be locked.
- */
-#define IPOINTER_INSERT(ip, mp)	{ \
-		ASSERT(ipointer_in == B_FALSE); \
-		ipointer->ip_mnext = ip->i_mnext; \
-		ipointer->ip_mprev = ip; \
-		ip->i_mnext = (xfs_inode_t *)ipointer; \
-		ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \
-		preempt = 0; \
-		XFS_MOUNT_IUNLOCK(mp); \
-		mount_locked = B_FALSE; \
-		IPOINTER_SET; \
-	}
-
-/* Remove the marker from the inode list. If the marker was the only item
- * in the list then there are no remaining inodes and we should zero out
- * the whole list. If we are the current head of the list then move the head
- * past us.
- */
-#define IPOINTER_REMOVE(ip, mp)	{ \
-		ASSERT(ipointer_in == B_TRUE); \
-		if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \
-			ip = ipointer->ip_mnext; \
-			ip->i_mprev = ipointer->ip_mprev; \
-			ipointer->ip_mprev->i_mnext = ip; \
-			if (mp->m_inodes == (xfs_inode_t *)ipointer) { \
-				mp->m_inodes = ip; \
-			} \
-		} else { \
-			ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \
-			mp->m_inodes = NULL; \
-			ip = NULL; \
-		} \
-		IPOINTER_CLR; \
-	}
-
-#define XFS_PREEMPT_MASK	0x7f
-
-	ASSERT(!(flags & SYNC_BDFLUSH));
-
-	if (bypassed)
-		*bypassed = 0;
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return 0;
-	error = 0;
-	last_error = 0;
-	preempt = 0;
-
-	/* Allocate a reference marker */
-	ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP);
+	xfs_perag_t	*pag = &mp->m_perag[ag];
+	boolean_t	vnode_refed = B_FALSE;
+	int		nr_found;
+	int		first_index = 0;
+	int		error = 0;
+	int		last_error = 0;
+	int		fflag = XFS_B_ASYNC;
+	int		lock_flags = XFS_ILOCK_SHARED;
 
-	fflag = XFS_B_ASYNC;		/* default is don't wait */
 	if (flags & SYNC_DELWRI)
 		fflag = XFS_B_DELWRI;
 	if (flags & SYNC_WAIT)
 		fflag = 0;		/* synchronous overrides all */
 
-	base_lock_flags = XFS_ILOCK_SHARED;
 	if (flags & (SYNC_DELWRI | SYNC_CLOSE)) {
 		/*
 		 * We need the I/O lock if we're going to call any of
 		 * the flush/inval routines.
 		 */
-		base_lock_flags |= XFS_IOLOCK_SHARED;
+		lock_flags |= XFS_IOLOCK_SHARED;
 	}
 
-	XFS_MOUNT_ILOCK(mp);
-
-	ip = mp->m_inodes;
-
-	mount_locked = B_TRUE;
-	vnode_refed  = B_FALSE;
-
-	IPOINTER_CLR;
+	if (!pag->pag_ici_init)
+		return 0;
 
 	do {
-		ASSERT(ipointer_in == B_FALSE);
-		ASSERT(vnode_refed == B_FALSE);
-
-		lock_flags = base_lock_flags;
+		read_lock(&pag->pag_ici_lock);
+		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+				(void**)&ip, first_index, 1);
 
-		/*
-		 * There were no inodes in the list, just break out
-		 * of the loop.
-		 */
-		if (ip == NULL) {
+		if (!nr_found) {
+			read_unlock(&pag->pag_ici_lock);
 			break;
 		}
 
-		/*
-		 * We found another sync thread marker - skip it
-		 */
-		if (ip->i_mount == NULL) {
-			ip = ip->i_mnext;
-			continue;
-		}
-
-		vp = XFS_ITOV_NULL(ip);
+		/* update the index for the next lookup */
+		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
 
 		/*
-		 * If the vnode is gone then this is being torn down,
-		 * call reclaim if it is flushed, else let regular flush
-		 * code deal with it later in the loop.
+		 * skip inodes in reclaim. Let xfs_syncsub do that for
+		 * us so we don't need to worry.
 		 */
-
-		if (vp == NULL) {
-			/* Skip ones already in reclaim */
-			if (ip->i_flags & XFS_IRECLAIM) {
-				ip = ip->i_mnext;
-				continue;
-			}
-			if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
-				ip = ip->i_mnext;
-			} else if ((xfs_ipincount(ip) == 0) &&
-				    xfs_iflock_nowait(ip)) {
-				IPOINTER_INSERT(ip, mp);
-
-				xfs_finish_reclaim(ip, 1,
-						XFS_IFLUSH_DELWRI_ELSE_ASYNC);
-
-				XFS_MOUNT_ILOCK(mp);
-				mount_locked = B_TRUE;
-				IPOINTER_REMOVE(ip, mp);
-			} else {
-				xfs_iunlock(ip, XFS_ILOCK_EXCL);
-				ip = ip->i_mnext;
-			}
+		vp = XFS_ITOV_NULL(ip);
+		if (!vp) {
+			read_unlock(&pag->pag_ici_lock);
 			continue;
 		}
 
+		/* bad inodes are dealt with elsewhere */
 		if (VN_BAD(vp)) {
-			ip = ip->i_mnext;
+			read_unlock(&pag->pag_ici_lock);
 			continue;
 		}
 
+		/* nothing to sync during shutdown */
 		if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
-			XFS_MOUNT_IUNLOCK(mp);
-			kmem_free(ipointer);
+			read_unlock(&pag->pag_ici_lock);
 			return 0;
 		}
 
 		/*
-		 * Try to lock without sleeping.  We're out of order with
-		 * the inode list lock here, so if we fail we need to drop
-		 * the mount lock and try again.  If we're called from
-		 * bdflush() here, then don't bother.
-		 *
-		 * The inode lock here actually coordinates with the
-		 * almost spurious inode lock in xfs_ireclaim() to prevent
-		 * the vnode we handle here without a reference from
-		 * being freed while we reference it.  If we lock the inode
-		 * while it's on the mount list here, then the spurious inode
-		 * lock in xfs_ireclaim() after the inode is pulled from
-		 * the mount list will sleep until we release it here.
-		 * This keeps the vnode from being freed while we reference
-		 * it.
+		 * The inode lock here actually coordinates with the almost
+		 * spurious inode lock in xfs_ireclaim() to prevent the vnode
+		 * we handle here without a reference from being freed while we
+		 * reference it.  If we lock the inode while it's on the mount
+		 * list here, then the spurious inode lock in xfs_ireclaim()
+		 * after the inode is pulled from the mount list will sleep
+		 * until we release it here.  This keeps the vnode from being
+		 * freed while we reference it.
 		 */
 		if (xfs_ilock_nowait(ip, lock_flags) == 0) {
-			if (vp == NULL) {
-				ip = ip->i_mnext;
-				continue;
-			}
-
 			vp = vn_grab(vp);
-			if (vp == NULL) {
-				ip = ip->i_mnext;
+			read_unlock(&pag->pag_ici_lock);
+			if (!vp)
 				continue;
-			}
-
-			IPOINTER_INSERT(ip, mp);
 			xfs_ilock(ip, lock_flags);
 
 			ASSERT(vp == XFS_ITOV(ip));
 			ASSERT(ip->i_mount == mp);
 
 			vnode_refed = B_TRUE;
+		} else {
+			/* safe to unlock here as we have a reference */
+			read_unlock(&pag->pag_ici_lock);
 		}
-
-		/* From here on in the loop we may have a marker record
-		 * in the inode list.
-		 */
-
 		/*
 		 * If we have to flush data or wait for I/O completion
 		 * we need to drop the ilock that we currently hold.
 		 * If we need to drop the lock, insert a marker if we
 		 * have not already done so.
 		 */
-		if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) ||
-		    ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
-			if (mount_locked) {
-				IPOINTER_INSERT(ip, mp);
-			}
+		if (flags & SYNC_CLOSE) {
 			xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-			if (flags & SYNC_CLOSE) {
-				/* Shutdown case. Flush and invalidate. */
-				if (XFS_FORCED_SHUTDOWN(mp))
-					xfs_tosspages(ip, 0, -1,
-							     FI_REMAPF);
-				else
-					error = xfs_flushinval_pages(ip,
-							0, -1, FI_REMAPF);
-			} else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
-				error = xfs_flush_pages(ip, 0,
-							-1, fflag, FI_NONE);
-			}
-
-			/*
-			 * When freezing, we need to wait ensure all I/O (including direct
-			 * I/O) is complete to ensure no further data modification can take
-			 * place after this point
-			 */
+			if (XFS_FORCED_SHUTDOWN(mp))
+				xfs_tosspages(ip, 0, -1, FI_REMAPF);
+			else
+				error = xfs_flushinval_pages(ip, 0, -1,
+							FI_REMAPF);
+			/* wait for I/O on freeze */
 			if (flags & SYNC_IOWAIT)
 				vn_iowait(ip);
 
 			xfs_ilock(ip, XFS_ILOCK_SHARED);
 		}
 
-		if ((flags & SYNC_ATTR) &&
-		    (ip->i_update_core ||
-		     (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) {
-			if (mount_locked)
-				IPOINTER_INSERT(ip, mp);
+		if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
+			xfs_iunlock(ip, XFS_ILOCK_SHARED);
+			error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
+			if (flags & SYNC_IOWAIT)
+				vn_iowait(ip);
+			xfs_ilock(ip, XFS_ILOCK_SHARED);
+		}
 
+		if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
 			if (flags & SYNC_WAIT) {
 				xfs_iflock(ip);
-				error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
-
-			/*
-			 * If we can't acquire the flush lock, then the inode
-			 * is already being flushed so don't bother waiting.
-			 *
-			 * If we can lock it then do a delwri flush so we can
-			 * combine multiple inode flushes in each disk write.
-			 */
+				if (!xfs_inode_clean(ip))
+					error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
+				else
+					xfs_ifunlock(ip);
 			} else if (xfs_iflock_nowait(ip)) {
-				error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
+				if (!xfs_inode_clean(ip))
+					error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
+				else
+					xfs_ifunlock(ip);
 			} else if (bypassed) {
 				(*bypassed)++;
 			}
 		}
 
-		if (lock_flags != 0) {
+		if (lock_flags)
 			xfs_iunlock(ip, lock_flags);
-		}
 
 		if (vnode_refed) {
-			/*
-			 * If we had to take a reference on the vnode
-			 * above, then wait until after we've unlocked
-			 * the inode to release the reference.  This is
-			 * because we can be already holding the inode
-			 * lock when IRELE() calls xfs_inactive().
-			 *
-			 * Make sure to drop the mount lock before calling
-			 * IRELE() so that we don't trip over ourselves if
-			 * we have to go for the mount lock again in the
-			 * inactive code.
-			 */
-			if (mount_locked) {
-				IPOINTER_INSERT(ip, mp);
-			}
-
-			IRELE(ip);
-
+			VN_RELE(vp);
 			vnode_refed = B_FALSE;
 		}
 
-		if (error) {
+		if (error)
 			last_error = error;
-		}
-
 		/*
 		 * bail out if the filesystem is corrupted.
 		 */
-		if (error == EFSCORRUPTED)  {
-			if (!mount_locked) {
-				XFS_MOUNT_ILOCK(mp);
-				IPOINTER_REMOVE(ip, mp);
-			}
-			XFS_MOUNT_IUNLOCK(mp);
-			ASSERT(ipointer_in == B_FALSE);
-			kmem_free(ipointer);
+		if (error == EFSCORRUPTED)
 			return XFS_ERROR(error);
-		}
 
-		/* Let other threads have a chance at the mount lock
-		 * if we have looped many times without dropping the
-		 * lock.
-		 */
-		if ((++preempt & XFS_PREEMPT_MASK) == 0) {
-			if (mount_locked) {
-				IPOINTER_INSERT(ip, mp);
-			}
-		}
+	} while (nr_found);
 
-		if (mount_locked == B_FALSE) {
-			XFS_MOUNT_ILOCK(mp);
-			mount_locked = B_TRUE;
-			IPOINTER_REMOVE(ip, mp);
-			continue;
-		}
+	return last_error;
+}
 
-		ASSERT(ipointer_in == B_FALSE);
-		ip = ip->i_mnext;
+int
+xfs_sync_inodes(
+	xfs_mount_t	*mp,
+	int		flags,
+	int             *bypassed)
+{
+	int		error;
+	int		last_error;
+	int		i;
+
+	if (bypassed)
+		*bypassed = 0;
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		return 0;
+	error = 0;
+	last_error = 0;
 
-	} while (ip != mp->m_inodes);
 
-	XFS_MOUNT_IUNLOCK(mp);
+	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+		error = xfs_sync_inodes_ag(mp, i, flags, bypassed);
+		if (error)
+			last_error = error;
+		if (error == EFSCORRUPTED)
+			break;
+	}
 
-	ASSERT(ipointer_in == B_FALSE);
 
-	kmem_free(ipointer);
 	return XFS_ERROR(last_error);
 }
 
-- 
1.5.6

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 3/4] XFS: Traverse inode trees when releasing dquots
  2008-07-20 12:19 [PATCH 0/4] XFS: replace the mount inode list with radix tree traversals Dave Chinner
  2008-07-20 12:19 ` [PATCH 1/4] XFS: Remove xfs_iflush_all and clean up xfs_finish_reclaim_all() Dave Chinner
  2008-07-20 12:19 ` [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes Dave Chinner
@ 2008-07-20 12:19 ` Dave Chinner
  2008-07-20 12:19 ` [PATCH 4/4] XFS: remove the mount inode list Dave Chinner
  3 siblings, 0 replies; 31+ messages in thread
From: Dave Chinner @ 2008-07-20 12:19 UTC (permalink / raw)
  To: xfs; +Cc: Dave Chinner

Make releasing all inode dquots traverse the per-ag
inode radix trees rather than the mount inode list.
This removes another user of the mount inode list.

Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/quota/xfs_qm_syscalls.c |  120 ++++++++++++++++++----------------------
 1 files changed, 53 insertions(+), 67 deletions(-)

diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index adfb872..c43b6d4 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1022,101 +1022,87 @@ xfs_qm_export_flags(
 
 
 /*
- * Go thru all the inodes in the file system, releasing their dquots.
- * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff. This also gets called from
- * xfs_rootumount.
+ * Release all the dquots on the inodes in an AG.
  */
-void
-xfs_qm_dqrele_all_inodes(
-	struct xfs_mount *mp,
-	uint		 flags)
+STATIC void
+xfs_qm_dqrele_inodes_ag(
+	xfs_mount_t	*mp,
+	int		ag,
+	uint		flags)
 {
-	xfs_inode_t	*ip, *topino;
-	uint		ireclaims;
-	bhv_vnode_t	*vp;
-	boolean_t	vnode_refd;
-
-	ASSERT(mp->m_quotainfo);
+	xfs_inode_t	*ip = NULL;
+	bhv_vnode_t	*vp = NULL;
+	xfs_perag_t	*pag = &mp->m_perag[ag];
+	int		first_index = 0;
+	int		nr_found;
 
-	XFS_MOUNT_ILOCK(mp);
-again:
-	ip = mp->m_inodes;
-	if (ip == NULL) {
-		XFS_MOUNT_IUNLOCK(mp);
+	if (!pag->pag_ici_init)
 		return;
-	}
 	do {
-		/* Skip markers inserted by xfs_sync */
-		if (ip->i_mount == NULL) {
-			ip = ip->i_mnext;
-			continue;
+		boolean_t	vnode_refd = B_FALSE;
+
+		read_lock(&pag->pag_ici_lock);
+		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+				(void**)&ip, first_index, 1);
+
+		if (!nr_found) {
+			read_unlock(&pag->pag_ici_lock);
+			break;
 		}
+
+		/* update the index for the next lookup */
+		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+
 		/* Root inode, rbmip and rsumip have associated blocks */
-		if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
-			ASSERT(ip->i_udquot == NULL);
-			ASSERT(ip->i_gdquot == NULL);
-			ip = ip->i_mnext;
-			continue;
-		}
 		vp = XFS_ITOV_NULL(ip);
-		if (!vp) {
+		if (!vp || ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
 			ASSERT(ip->i_udquot == NULL);
 			ASSERT(ip->i_gdquot == NULL);
-			ip = ip->i_mnext;
+			read_unlock(&pag->pag_ici_lock);
 			continue;
 		}
-		vnode_refd = B_FALSE;
 		if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
-			ireclaims = mp->m_ireclaims;
-			topino = mp->m_inodes;
 			vp = vn_grab(vp);
+			read_unlock(&pag->pag_ici_lock);
 			if (!vp)
-				goto again;
-
-			XFS_MOUNT_IUNLOCK(mp);
-			/* XXX restart limit ? */
-			xfs_ilock(ip, XFS_ILOCK_EXCL);
+				continue;
 			vnode_refd = B_TRUE;
+			xfs_ilock(ip, XFS_ILOCK_EXCL);
 		} else {
-			ireclaims = mp->m_ireclaims;
-			topino = mp->m_inodes;
-			XFS_MOUNT_IUNLOCK(mp);
+			read_unlock(&pag->pag_ici_lock);
 		}
-
-		/*
-		 * We don't keep the mountlock across the dqrele() call,
-		 * since it can take a while..
-		 */
 		if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
 			xfs_qm_dqrele(ip->i_udquot);
 			ip->i_udquot = NULL;
 		}
-		if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+		if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
+		    ip->i_gdquot) {
 			xfs_qm_dqrele(ip->i_gdquot);
 			ip->i_gdquot = NULL;
 		}
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		/*
-		 * Wait until we've dropped the ilock and mountlock to
-		 * do the vn_rele. Or be condemned to an eternity in the
-		 * inactive code in hell.
-		 */
 		if (vnode_refd)
-			IRELE(ip);
-		XFS_MOUNT_ILOCK(mp);
-		/*
-		 * If an inode was inserted or removed, we gotta
-		 * start over again.
-		 */
-		if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) {
-			/* XXX use a sentinel */
-			goto again;
-		}
-		ip = ip->i_mnext;
-	} while (ip != mp->m_inodes);
+			VN_RELE(vp);
+	} while (nr_found);
+}
 
-	XFS_MOUNT_IUNLOCK(mp);
+/*
+ * Go thru all the inodes in the file system, releasing their dquots.
+ * Note that the mount structure gets modified to indicate that quotas are off
+ * AFTER this, in the case of quotaoff. This also gets called from
+ * xfs_rootumount.
+ */
+void
+xfs_qm_dqrele_all_inodes(
+	struct xfs_mount *mp,
+	uint		 flags)
+{
+	int		i;
+
+	ASSERT(mp->m_quotainfo);
+	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+		xfs_qm_dqrele_inodes_ag(mp, i, flags);
+	}
 }
 
 /*------------------------------------------------------------------------*/
-- 
1.5.6

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 4/4] XFS: remove the mount inode list
  2008-07-20 12:19 [PATCH 0/4] XFS: replace the mount inode list with radix tree traversals Dave Chinner
                   ` (2 preceding siblings ...)
  2008-07-20 12:19 ` [PATCH 3/4] XFS: Traverse inode trees when releasing dquots Dave Chinner
@ 2008-07-20 12:19 ` Dave Chinner
  2008-07-22  4:29   ` Christoph Hellwig
  3 siblings, 1 reply; 31+ messages in thread
From: Dave Chinner @ 2008-07-20 12:19 UTC (permalink / raw)
  To: xfs; +Cc: Dave Chinner

Now we've removed all users of the mount inode list,
we can kill it. This reduces the size of the xfs_inode
by 2 pointers.

Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_iget.c  |   42 +-----------------------------------------
 fs/xfs/xfs_inode.h |    8 --------
 fs/xfs/xfs_mount.c |    5 -----
 fs/xfs/xfs_mount.h |    1 -
 4 files changed, 1 insertions(+), 55 deletions(-)

diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b07604b..f16ca6c 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -76,7 +76,6 @@ xfs_iget_core(
 {
 	struct inode	*old_inode;
 	xfs_inode_t	*ip;
-	xfs_inode_t	*iq;
 	int		error;
 	unsigned long	first_index, mask;
 	xfs_perag_t	*pag;
@@ -260,24 +259,6 @@ finish_inode:
 
 	write_unlock(&pag->pag_ici_lock);
 	radix_tree_preload_end();
-
-	/*
-	 * Link ip to its mount and thread it on the mount's inode list.
-	 */
-	XFS_MOUNT_ILOCK(mp);
-	if ((iq = mp->m_inodes)) {
-		ASSERT(iq->i_mprev->i_mnext == iq);
-		ip->i_mprev = iq->i_mprev;
-		iq->i_mprev->i_mnext = ip;
-		iq->i_mprev = ip;
-		ip->i_mnext = iq;
-	} else {
-		ip->i_mnext = ip;
-		ip->i_mprev = ip;
-	}
-	mp->m_inodes = ip;
-
-	XFS_MOUNT_IUNLOCK(mp);
 	xfs_put_perag(mp, pag);
 
  return_ip:
@@ -490,36 +471,15 @@ xfs_iextract(
 {
 	xfs_mount_t	*mp = ip->i_mount;
 	xfs_perag_t	*pag = xfs_get_perag(mp, ip->i_ino);
-	xfs_inode_t	*iq;
 
 	write_lock(&pag->pag_ici_lock);
 	radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
 	write_unlock(&pag->pag_ici_lock);
 	xfs_put_perag(mp, pag);
 
-	/*
-	 * Remove from mount's inode list.
-	 */
-	XFS_MOUNT_ILOCK(mp);
-	ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
-	iq = ip->i_mnext;
-	iq->i_mprev = ip->i_mprev;
-	ip->i_mprev->i_mnext = iq;
-
-	/*
-	 * Fix up the head pointer if it points to the inode being deleted.
-	 */
-	if (mp->m_inodes == ip) {
-		if (ip == iq) {
-			mp->m_inodes = NULL;
-		} else {
-			mp->m_inodes = iq;
-		}
-	}
-
 	/* Deal with the deleted inodes list */
+	XFS_MOUNT_ILOCK(mp);
 	list_del_init(&ip->i_reclaim);
-
 	mp->m_ireclaims++;
 	XFS_MOUNT_IUNLOCK(mp);
 }
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 7ce41d3..c440b1d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -192,16 +192,8 @@ typedef struct xfs_icdinode {
 	__uint32_t	di_gen;		/* generation number */
 } xfs_icdinode_t;
 
-typedef struct {
-	struct xfs_inode	*ip_mnext;	/* next inode in mount list */
-	struct xfs_inode	*ip_mprev;	/* ptr to prev inode */
-	struct xfs_mount	*ip_mount;	/* fs mount struct ptr */
-} xfs_iptr_t;
-
 typedef struct xfs_inode {
 	/* Inode linking and identification information. */
-	struct xfs_inode	*i_mnext;	/* next inode in mount list */
-	struct xfs_inode	*i_mprev;	/* ptr to prev inode */
 	struct xfs_mount	*i_mount;	/* fs mount struct ptr */
 	struct list_head	i_reclaim;	/* reclaim list */
 	bhv_vnode_t		*i_vnode;	/* vnode backpointer */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 0c23f6a..364c075 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1314,11 +1314,6 @@ xfs_unmountfs(xfs_mount_t *mp)
 
 	xfs_freesb(mp);
 
-	/*
-	 * All inodes from this mount point should be freed.
-	 */
-	ASSERT(mp->m_inodes == NULL);
-
 	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
 		uuid_table_remove(&mp->m_sb.sb_uuid);
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 6482005..4f257d7 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -247,7 +247,6 @@ typedef struct xfs_mount {
 	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */
 	spinlock_t		m_agirotor_lock;/* .. and lock protecting it */
 	xfs_agnumber_t		m_maxagi;	/* highest inode alloc group */
-	struct xfs_inode	*m_inodes;	/* active inode list */
 	struct list_head	m_del_inodes;	/* inodes to reclaim */
 	mutex_t			m_ilock;	/* inode list mutex */
 	uint			m_ireclaims;	/* count of calls to reclaim*/
-- 
1.5.6

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/4] XFS: Remove xfs_iflush_all and clean up xfs_finish_reclaim_all()
  2008-07-20 12:19 ` [PATCH 1/4] XFS: Remove xfs_iflush_all and clean up xfs_finish_reclaim_all() Dave Chinner
@ 2008-07-21  7:58   ` Christoph Hellwig
  2008-07-21 11:33     ` Dave Chinner
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2008-07-21  7:58 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Sun, Jul 20, 2008 at 10:19:51PM +1000, Dave Chinner wrote:
> xfs_iflush_all() walks the m_inodes list to find inodes that
> need reclaiming. We already have such a list - the m_del_inodes
> list. Replace xfs_iflush_all() with a call to xfs_finish_reclaim_all()
> and clean up xfs_finish_reclaim_all() to handle the different flush
> modes now needed.
>
>
> 
> Originally based on a patch from Christoph Hellwig.

Unlike my original patch is also now calls xfs_finish_reclaim_all
unconditonal in xfs_syncsub.  This looks harmless but useless in the
context of this patch, and actually useful for your next patches.

The xfs_finish_reclaim_all in xfs_quiesce_fs changes from
XFS_IFLUSH_DELWRI_ELSE_ASYNC to XFS_IFLUSH_ASYNC, which needs an
explanation.

Otherwiase this looks good to me, should have submitted the original
patch long time ago..

> 
> Signed-off-by: Dave Chinner <david@fromorbit.com>
> ---
>  fs/xfs/xfs_inode.c    |   39 ---------------------------------------
>  fs/xfs/xfs_inode.h    |    3 +--
>  fs/xfs/xfs_mount.c    |    2 +-
>  fs/xfs/xfs_vfsops.c   |    8 +++-----
>  fs/xfs/xfs_vnodeops.c |   42 ++++++++++++++++++------------------------
>  5 files changed, 23 insertions(+), 71 deletions(-)
> 
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 20b6f87..ae19b05 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -3456,45 +3456,6 @@ corrupt_out:
>  	return XFS_ERROR(EFSCORRUPTED);
>  }
>  
> -
> -/*
> - * Flush all inactive inodes in mp.
> - */
> -void
> -xfs_iflush_all(
> -	xfs_mount_t	*mp)
> -{
> -	xfs_inode_t	*ip;
> -	bhv_vnode_t	*vp;
> -
> - again:
> -	XFS_MOUNT_ILOCK(mp);
> -	ip = mp->m_inodes;
> -	if (ip == NULL)
> -		goto out;
> -
> -	do {
> -		/* Make sure we skip markers inserted by sync */
> -		if (ip->i_mount == NULL) {
> -			ip = ip->i_mnext;
> -			continue;
> -		}
> -
> -		vp = XFS_ITOV_NULL(ip);
> -		if (!vp) {
> -			XFS_MOUNT_IUNLOCK(mp);
> -			xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
> -			goto again;
> -		}
> -
> -		ASSERT(vn_count(vp) == 0);
> -
> -		ip = ip->i_mnext;
> -	} while (ip != mp->m_inodes);
> - out:
> -	XFS_MOUNT_IUNLOCK(mp);
> -}
> -
>  #ifdef XFS_ILOCK_TRACE
>  ktrace_t	*xfs_ilock_trace_buf;
>  
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 17a04b6..7ce41d3 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -480,7 +480,7 @@ void		xfs_iunlock_map_shared(xfs_inode_t *, uint);
>  void		xfs_ifunlock(xfs_inode_t *);
>  void		xfs_ireclaim(xfs_inode_t *);
>  int		xfs_finish_reclaim(xfs_inode_t *, int, int);
> -int		xfs_finish_reclaim_all(struct xfs_mount *, int);
> +int		xfs_finish_reclaim_all(struct xfs_mount *, int, int);
>  
>  /*
>   * xfs_inode.c prototypes.
> @@ -518,7 +518,6 @@ void		xfs_ipin(xfs_inode_t *);
>  void		xfs_iunpin(xfs_inode_t *);
>  int		xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
>  int		xfs_iflush(xfs_inode_t *, uint);
> -void		xfs_iflush_all(struct xfs_mount *);
>  void		xfs_ichgtime(xfs_inode_t *, int);
>  xfs_fsize_t	xfs_file_last_byte(xfs_inode_t *);
>  void		xfs_lock_inodes(xfs_inode_t **, int, uint);
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 6c5d132..0c23f6a 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -1268,7 +1268,7 @@ xfs_unmountfs(xfs_mount_t *mp)
>  	 * need to force the log first.
>  	 */
>  	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
> -	xfs_iflush_all(mp);
> +	xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_ASYNC);
>  
>  	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
>  
> diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
> index 4a9a433..9d72646 100644
> --- a/fs/xfs/xfs_vfsops.c
> +++ b/fs/xfs/xfs_vfsops.c
> @@ -65,7 +65,7 @@ xfs_quiesce_fs(
>  	int			count = 0, pincount;
>  
>  	xfs_flush_buftarg(mp->m_ddev_targp, 0);
> -	xfs_finish_reclaim_all(mp, 0);
> +	xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_ASYNC);
>  
>  	/* This loop must run at least twice.
>  	 * The first instance of the loop will flush
> @@ -653,10 +653,8 @@ xfs_syncsub(
>  	xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
>  
>  	if (flags & (SYNC_ATTR|SYNC_DELWRI)) {
> -		if (flags & SYNC_BDFLUSH)
> -			xfs_finish_reclaim_all(mp, 1);
> -		else
> -			error = xfs_sync_inodes(mp, flags, bypassed);
> +		xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
> +		error = xfs_sync_inodes(mp, flags, bypassed);
>  	}
>  
>  	/*
> diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
> index b792a12..2af1be3 100644
> --- a/fs/xfs/xfs_vnodeops.c
> +++ b/fs/xfs/xfs_vnodeops.c
> @@ -2985,36 +2985,30 @@ xfs_finish_reclaim(
>  }
>  
>  int
> -xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock)
> +xfs_finish_reclaim_all(
> +	xfs_mount_t	*mp,
> +	int		 noblock,
> +	int		mode)
>  {
> -	int		purged;
>  	xfs_inode_t	*ip, *n;
> -	int		done = 0;
>  
> -	while (!done) {
> -		purged = 0;
> -		XFS_MOUNT_ILOCK(mp);
> -		list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
> -			if (noblock) {
> -				if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
> -					continue;
> -				if (xfs_ipincount(ip) ||
> -				    !xfs_iflock_nowait(ip)) {
> -					xfs_iunlock(ip, XFS_ILOCK_EXCL);
> -					continue;
> -				}
> +restart:
> +	XFS_MOUNT_ILOCK(mp);
> +	list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
> +		if (noblock) {
> +			if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
> +				continue;
> +			if (xfs_ipincount(ip) ||
> +			    !xfs_iflock_nowait(ip)) {
> +				xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +				continue;
>  			}
> -			XFS_MOUNT_IUNLOCK(mp);
> -			if (xfs_finish_reclaim(ip, noblock,
> -					XFS_IFLUSH_DELWRI_ELSE_ASYNC))
> -				delay(1);
> -			purged = 1;
> -			break;
>  		}
> -
> -		done = !purged;
> +		XFS_MOUNT_IUNLOCK(mp);
> +		if (xfs_finish_reclaim(ip, noblock, mode))
> +			delay(1);
> +		goto restart;
>  	}
> -
>  	XFS_MOUNT_IUNLOCK(mp);
>  	return 0;
>  }
> -- 
> 1.5.6
> 
> 
---end quoted text---

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/4] XFS: Remove xfs_iflush_all and clean up xfs_finish_reclaim_all()
  2008-07-21  7:58   ` Christoph Hellwig
@ 2008-07-21 11:33     ` Dave Chinner
  2008-07-22  4:24       ` Christoph Hellwig
  0 siblings, 1 reply; 31+ messages in thread
From: Dave Chinner @ 2008-07-21 11:33 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Mon, Jul 21, 2008 at 03:58:26AM -0400, Christoph Hellwig wrote:
> On Sun, Jul 20, 2008 at 10:19:51PM +1000, Dave Chinner wrote:
> > xfs_iflush_all() walks the m_inodes list to find inodes that
> > need reclaiming. We already have such a list - the m_del_inodes
> > list. Replace xfs_iflush_all() with a call to xfs_finish_reclaim_all()
> > and clean up xfs_finish_reclaim_all() to handle the different flush
> > modes now needed.
> >
> >
> > 
> > Originally based on a patch from Christoph Hellwig.
> 
> Unlike my original patch is also now calls xfs_finish_reclaim_all
> unconditonal in xfs_syncsub.  This looks harmless but useless in the
> context of this patch, and actually useful for your next patches.

It's not really harmless - it reclaims inodes faster as reclaim
is now triggered by means other than xfssyncd. I'm not sure I should
perturb the reclaim behaviour right now (wasn't my intent) so I'll
change it back to the original code. It doesn't affect the other
patches in any material way...

> The xfs_finish_reclaim_all in xfs_quiesce_fs changes from
> XFS_IFLUSH_DELWRI_ELSE_ASYNC to XFS_IFLUSH_ASYNC, which needs an
> explanation.

Ah - that slipped through from the original patches - this change
was much later in the original series I wrote. It doesn't make much
sense to make this change until the inode writeback clustering gets
optimised.

Fixed patch is below.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com


XFS: Remove xfs_iflush_all and clean up xfs_finish_reclaim_all() V2

xfs_iflush_all() walks the m_inodes list to find inodes that
need reclaiming. We already have such a list - the m_del_inodes
list. Replace xfs_iflush_all() with a call to xfs_finish_reclaim_all()
and clean up xfs_finish_reclaim_all() to handle the different flush
modes now needed.

Originally based on a patch from Christoph Hellwig.

Signed-off-by: Dave Chinner <david@fromorbit.com>

---
Version 2
o revert xfs_syncsub() inode reclaim behaviour back to original
  code
o xfs_quiesce_fs() should use XFS_IFLUSH_DELWRI_ELSE_ASYNC, not
  XFS_IFLUSH_ASYNC, to prevent change of behaviour.

 fs/xfs/xfs_inode.c    |   39 ---------------------------------------
 fs/xfs/xfs_inode.h    |    3 +--
 fs/xfs/xfs_mount.c    |    2 +-
 fs/xfs/xfs_vfsops.c   |    4 ++--
 fs/xfs/xfs_vnodeops.c |   42 ++++++++++++++++++------------------------
 5 files changed, 22 insertions(+), 68 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 20b6f87..ae19b05 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3456,45 +3456,6 @@ corrupt_out:
 	return XFS_ERROR(EFSCORRUPTED);
 }
 
-
-/*
- * Flush all inactive inodes in mp.
- */
-void
-xfs_iflush_all(
-	xfs_mount_t	*mp)
-{
-	xfs_inode_t	*ip;
-	bhv_vnode_t	*vp;
-
- again:
-	XFS_MOUNT_ILOCK(mp);
-	ip = mp->m_inodes;
-	if (ip == NULL)
-		goto out;
-
-	do {
-		/* Make sure we skip markers inserted by sync */
-		if (ip->i_mount == NULL) {
-			ip = ip->i_mnext;
-			continue;
-		}
-
-		vp = XFS_ITOV_NULL(ip);
-		if (!vp) {
-			XFS_MOUNT_IUNLOCK(mp);
-			xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
-			goto again;
-		}
-
-		ASSERT(vn_count(vp) == 0);
-
-		ip = ip->i_mnext;
-	} while (ip != mp->m_inodes);
- out:
-	XFS_MOUNT_IUNLOCK(mp);
-}
-
 #ifdef XFS_ILOCK_TRACE
 ktrace_t	*xfs_ilock_trace_buf;
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 17a04b6..7ce41d3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -480,7 +480,7 @@ void		xfs_iunlock_map_shared(xfs_inode_t *, uint);
 void		xfs_ifunlock(xfs_inode_t *);
 void		xfs_ireclaim(xfs_inode_t *);
 int		xfs_finish_reclaim(xfs_inode_t *, int, int);
-int		xfs_finish_reclaim_all(struct xfs_mount *, int);
+int		xfs_finish_reclaim_all(struct xfs_mount *, int, int);
 
 /*
  * xfs_inode.c prototypes.
@@ -518,7 +518,6 @@ void		xfs_ipin(xfs_inode_t *);
 void		xfs_iunpin(xfs_inode_t *);
 int		xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
 int		xfs_iflush(xfs_inode_t *, uint);
-void		xfs_iflush_all(struct xfs_mount *);
 void		xfs_ichgtime(xfs_inode_t *, int);
 xfs_fsize_t	xfs_file_last_byte(xfs_inode_t *);
 void		xfs_lock_inodes(xfs_inode_t **, int, uint);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6c5d132..0c23f6a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1268,7 +1268,7 @@ xfs_unmountfs(xfs_mount_t *mp)
 	 * need to force the log first.
 	 */
 	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
-	xfs_iflush_all(mp);
+	xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_ASYNC);
 
 	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
 
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 4a9a433..ab3f004 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -65,7 +65,7 @@ xfs_quiesce_fs(
 	int			count = 0, pincount;
 
 	xfs_flush_buftarg(mp->m_ddev_targp, 0);
-	xfs_finish_reclaim_all(mp, 0);
+	xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 
 	/* This loop must run at least twice.
 	 * The first instance of the loop will flush
@@ -654,7 +654,7 @@ xfs_syncsub(
 
 	if (flags & (SYNC_ATTR|SYNC_DELWRI)) {
 		if (flags & SYNC_BDFLUSH)
-			xfs_finish_reclaim_all(mp, 1);
+			xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 		else
 			error = xfs_sync_inodes(mp, flags, bypassed);
 	}
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b792a12..2af1be3 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2985,36 +2985,30 @@ xfs_finish_reclaim(
 }
 
 int
-xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock)
+xfs_finish_reclaim_all(
+	xfs_mount_t	*mp,
+	int		 noblock,
+	int		mode)
 {
-	int		purged;
 	xfs_inode_t	*ip, *n;
-	int		done = 0;
 
-	while (!done) {
-		purged = 0;
-		XFS_MOUNT_ILOCK(mp);
-		list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
-			if (noblock) {
-				if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
-					continue;
-				if (xfs_ipincount(ip) ||
-				    !xfs_iflock_nowait(ip)) {
-					xfs_iunlock(ip, XFS_ILOCK_EXCL);
-					continue;
-				}
+restart:
+	XFS_MOUNT_ILOCK(mp);
+	list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
+		if (noblock) {
+			if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
+				continue;
+			if (xfs_ipincount(ip) ||
+			    !xfs_iflock_nowait(ip)) {
+				xfs_iunlock(ip, XFS_ILOCK_EXCL);
+				continue;
 			}
-			XFS_MOUNT_IUNLOCK(mp);
-			if (xfs_finish_reclaim(ip, noblock,
-					XFS_IFLUSH_DELWRI_ELSE_ASYNC))
-				delay(1);
-			purged = 1;
-			break;
 		}
-
-		done = !purged;
+		XFS_MOUNT_IUNLOCK(mp);
+		if (xfs_finish_reclaim(ip, noblock, mode))
+			delay(1);
+		goto restart;
 	}
-
 	XFS_MOUNT_IUNLOCK(mp);
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/4] XFS: Remove xfs_iflush_all and clean up xfs_finish_reclaim_all()
  2008-07-21 11:33     ` Dave Chinner
@ 2008-07-22  4:24       ` Christoph Hellwig
  0 siblings, 0 replies; 31+ messages in thread
From: Christoph Hellwig @ 2008-07-22  4:24 UTC (permalink / raw)
  To: Christoph Hellwig, xfs

On Mon, Jul 21, 2008 at 09:33:02PM +1000, Dave Chinner wrote:
> It's not really harmless - it reclaims inodes faster as reclaim
> is now triggered by means other than xfssyncd. I'm not sure I should
> perturb the reclaim behaviour right now (wasn't my intent) so I'll
> change it back to the original code. It doesn't affect the other
> patches in any material way...

Yes, makes sense.

> Fixed patch is below.

Looks good.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-20 12:19 ` [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes Dave Chinner
@ 2008-07-22  4:28   ` Christoph Hellwig
  2008-07-22  5:30     ` Dave Chinner
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2008-07-22  4:28 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Sun, Jul 20, 2008 at 10:19:52PM +1000, Dave Chinner wrote:
> Update xfs_sync_inodes to walk the inode radix tree cache to find
> dirty inodes. This removes a huge bunch of nasty, messy code for
> traversing the mount inode list safely and removes another user of
> the mount inode list.

Looks good, some minor nits below:

>  	xfs_inode_t	*ip = NULL;
>  	bhv_vnode_t	*vp = NULL;

As you're touching most uses of vp what about just turning it
into a plain Linux inode?

> +	if (!pag->pag_ici_init)
> +		return 0;

I think it would be cleaner to move this into the caller and not even
call into this function for uninitialized AGs.

> +		read_lock(&pag->pag_ici_lock);
> +		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
> +				(void**)&ip, first_index, 1);

This needs a big comment on why you're using the gang lookup for
a single lookup.  I guess that's because the gang lookup skips to
the next actually existing entry instead of returning NULL, but that's
not really obvious to the reader.

> +			VN_RELE(vp);

This should either be an iput or IRELE, VN_RELE is on it's way out.

Btw, all these also apply to the next patch, I won't comment on them
there again.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 4/4] XFS: remove the mount inode list
  2008-07-20 12:19 ` [PATCH 4/4] XFS: remove the mount inode list Dave Chinner
@ 2008-07-22  4:29   ` Christoph Hellwig
  2008-07-22  5:42     ` Dave Chinner
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2008-07-22  4:29 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Sun, Jul 20, 2008 at 10:19:54PM +1000, Dave Chinner wrote:
> Now we've removed all users of the mount inode list,
> we can kill it. This reduces the size of the xfs_inode
> by 2 pointers.

Looks good, but xfsidbg needs to be fixed up for this.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-22  4:28   ` Christoph Hellwig
@ 2008-07-22  5:30     ` Dave Chinner
  2008-07-22  7:27       ` Christoph Hellwig
  0 siblings, 1 reply; 31+ messages in thread
From: Dave Chinner @ 2008-07-22  5:30 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Tue, Jul 22, 2008 at 12:28:29AM -0400, Christoph Hellwig wrote:
> On Sun, Jul 20, 2008 at 10:19:52PM +1000, Dave Chinner wrote:
> > Update xfs_sync_inodes to walk the inode radix tree cache to find
> > dirty inodes. This removes a huge bunch of nasty, messy code for
> > traversing the mount inode list safely and removes another user of
> > the mount inode list.
> 
> Looks good, some minor nits below:
> 
> >  	xfs_inode_t	*ip = NULL;
> >  	bhv_vnode_t	*vp = NULL;
> 
> As you're touching most uses of vp what about just turning it
> into a plain Linux inode?

Until the method we'll use for referencing VFS inodes in core XFS
code is clear, I'd rather leave it consistent with the rest of the code.

As it is, I expect that this code will be revisited several times as
new functionality is added (e.g. combining the VFS and XFS inodes)
because that will change the way we interface with VFS and XFS inodes.
Much of this vnode specific code will change at that time; I'd
rather just change it once when the time is appropriate than have
to change it multiple times....

> > +	if (!pag->pag_ici_init)
> > +		return 0;
> 
> I think it would be cleaner to move this into the caller and not even
> call into this function for uninitialized AGs.

Ok. No big deal either way.

> 
> > +		read_lock(&pag->pag_ici_lock);
> > +		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
> > +				(void**)&ip, first_index, 1);
> 
> This needs a big comment on why you're using the gang lookup for
> a single lookup.  I guess that's because the gang lookup skips to
> the next actually existing entry instead of returning NULL, but that's
> not really obvious to the reader.

Fair enough. This was later in the original series where this was
a dirty tag lookup and didn't need commenting ;)

> > +			VN_RELE(vp);
> 
> This should either be an iput or IRELE, VN_RELE is on it's way out.

Good catch, I'll change it.

> Btw, all these also apply to the next patch, I won't comment on them
> there again.

Ok, I'll update that as well.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 4/4] XFS: remove the mount inode list
  2008-07-22  4:29   ` Christoph Hellwig
@ 2008-07-22  5:42     ` Dave Chinner
  0 siblings, 0 replies; 31+ messages in thread
From: Dave Chinner @ 2008-07-22  5:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Tue, Jul 22, 2008 at 12:29:03AM -0400, Christoph Hellwig wrote:
> On Sun, Jul 20, 2008 at 10:19:54PM +1000, Dave Chinner wrote:
> > Now we've removed all users of the mount inode list,
> > we can kill it. This reduces the size of the xfs_inode
> > by 2 pointers.
> 
> Looks good, but xfsidbg needs to be fixed up for this.

Yeah, well, it's not in the git tree. CVS + quilt is too painful
to go back to once you've started using guilt + git + branches
and basked in it's goodness.

SGI folk - a -dev branch in the XFS git tree that contains
everything the CVS tree contains would be really helpful here.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-22  5:30     ` Dave Chinner
@ 2008-07-22  7:27       ` Christoph Hellwig
  2008-07-23  0:05         ` Dave Chinner
  0 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2008-07-22  7:27 UTC (permalink / raw)
  To: Christoph Hellwig, xfs

On Tue, Jul 22, 2008 at 03:30:19PM +1000, Dave Chinner wrote:
> Until the method we'll use for referencing VFS inodes in core XFS
> code is clear, I'd rather leave it consistent with the rest of the code.

I would expect your cleanups in this area to go in before this patch,
or not?

> As it is, I expect that this code will be revisited several times as
> new functionality is added (e.g. combining the VFS and XFS inodes)
> because that will change the way we interface with VFS and XFS inodes.
> Much of this vnode specific code will change at that time; I'd
> rather just change it once when the time is appropriate than have
> to change it multiple times....

Well, it's not vnode specific code.  bhv_vnode_t is just a typedef for
struct inode and the various functions dealing with it are trivial
wrapper, many of them in fact expanding to no code at all.  But maybe
killing this syntactic sugar should be a separate patch.  I only fear
we'll never get it in with the current review and commit latencies
for XFS :(

> > This needs a big comment on why you're using the gang lookup for
> > a single lookup.  I guess that's because the gang lookup skips to
> > the next actually existing entry instead of returning NULL, but that's
> > not really obvious to the reader.
> 
> Fair enough. This was later in the original series where this was
> a dirty tag lookup and didn't need commenting ;)

Actually even then a comment describing why we want a gang lookup
when just looking up a single element might be a good idea.  On the
other hand I wonder whether we might actually want to use the gang
lookups, the unconditional igrab is probably cheaper than lots of
radix tree lookups.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-22  7:27       ` Christoph Hellwig
@ 2008-07-23  0:05         ` Dave Chinner
  2008-07-23  2:10           ` Mark Goodwin
  0 siblings, 1 reply; 31+ messages in thread
From: Dave Chinner @ 2008-07-23  0:05 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

On Tue, Jul 22, 2008 at 03:27:33AM -0400, Christoph Hellwig wrote:
> On Tue, Jul 22, 2008 at 03:30:19PM +1000, Dave Chinner wrote:
> > Until the method we'll use for referencing VFS inodes in core XFS
> > code is clear, I'd rather leave it consistent with the rest of the code.
> 
> I would expect your cleanups in this area to go in before this patch,
> or not?

The cleanups are on top of these patches. I'll have to reorder and
redo them if they are to go in first...

> > As it is, I expect that this code will be revisited several times as
> > new functionality is added (e.g. combining the VFS and XFS inodes)
> > because that will change the way we interface with VFS and XFS inodes.
> > Much of this vnode specific code will change at that time; I'd
> > rather just change it once when the time is appropriate than have
> > to change it multiple times....
> 
> Well, it's not vnode specific code.  bhv_vnode_t is just a typedef for
> struct inode and the various functions dealing with it are trivial
> wrapper, many of them in fact expanding to no code at all.

Right, but we are still using bhv_vnode_t in core XFS code, not
struct inode. There haven't been patches to decide that one way or
another.

> But maybe
> killing this syntactic sugar should be a separate patch.

I think so - do it once and once only.

> I only fear
> we'll never get it in with the current review and commit latencies
> for XFS :(

I can see this being a big issue in the not-too-distant future.....

> > > This needs a big comment on why you're using the gang lookup for
> > > a single lookup.  I guess that's because the gang lookup skips to
> > > the next actually existing entry instead of returning NULL, but that's
> > > not really obvious to the reader.
> > 
> > Fair enough. This was later in the original series where this was
> > a dirty tag lookup and didn't need commenting ;)
> 
> Actually even then a comment describing why we want a gang lookup
> when just looking up a single element might be a good idea.

Sure - I added that.

> On the
> other hand I wonder whether we might actually want to use the gang
> lookups, the unconditional igrab is probably cheaper than lots of
> radix tree lookups.

Yes, agreed, and that is the way I'm heading. I want to get
everything working against the radix tree before attempting
to optimise the lookups.

The next steps are:

	- combine the XFS + VFS inodes into one structure so that we
	  can simplify inode lookup.
	- bypass the linux icache so all lookups run from the radix
	  tree. This involves adding our own dirty VFS inode
	  tracking. This initially just uses the VFS superblock
	  dirty list like the generic code
	- expand the dirty inode tracking to setting a tag in the
	  radix tree.
	- changing radix tree walkers looking for dirty inodes to
	  use tag lookups - this requires adding interfaces to the
	  radix tree code
	- Use a tag in the radix tree to mark inodes for reclaim.
	- change reclaim to use a radix tree walk.
	- changing radix tree walkers to all use gang and gang-tag
	  lookups - this requires adding interfaces to the radix
	  tree code
	- factor the individual radix tree walk implementations once
	  commonalities are obvious.

This is just basic groundwork. The interesting stuff starts after
this - separating data and inode flushing, avoiding RMW cycles in
xfs_iflush, writeback aggregation beyond a single inode cluster,
multi-level caching and cache compression, RCU locking, thread pools
to parallelise tree walks, and so on.

There's a bunch of stuff in the pipeline - this is the basic
groundwork needed to catch it all effectively...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 4/4] XFS: remove the mount inode list
  2008-07-23  0:41 [PATCH 0/4] XFS: replace the mount inode list with radix tree traversals V2 Dave Chinner
@ 2008-07-23  0:41 ` Dave Chinner
  2008-07-23 20:46   ` Christoph Hellwig
  0 siblings, 1 reply; 31+ messages in thread
From: Dave Chinner @ 2008-07-23  0:41 UTC (permalink / raw)
  To: xfs; +Cc: Dave Chinner

Now we've removed all users of the mount inode list,
we can kill it. This reduces the size of the xfs_inode
by 2 pointers.

Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_iget.c  |   42 +-----------------------------------------
 fs/xfs/xfs_inode.h |    8 --------
 fs/xfs/xfs_mount.c |    5 -----
 fs/xfs/xfs_mount.h |    1 -
 4 files changed, 1 insertions(+), 55 deletions(-)

diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b07604b..f16ca6c 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -76,7 +76,6 @@ xfs_iget_core(
 {
 	struct inode	*old_inode;
 	xfs_inode_t	*ip;
-	xfs_inode_t	*iq;
 	int		error;
 	unsigned long	first_index, mask;
 	xfs_perag_t	*pag;
@@ -260,24 +259,6 @@ finish_inode:
 
 	write_unlock(&pag->pag_ici_lock);
 	radix_tree_preload_end();
-
-	/*
-	 * Link ip to its mount and thread it on the mount's inode list.
-	 */
-	XFS_MOUNT_ILOCK(mp);
-	if ((iq = mp->m_inodes)) {
-		ASSERT(iq->i_mprev->i_mnext == iq);
-		ip->i_mprev = iq->i_mprev;
-		iq->i_mprev->i_mnext = ip;
-		iq->i_mprev = ip;
-		ip->i_mnext = iq;
-	} else {
-		ip->i_mnext = ip;
-		ip->i_mprev = ip;
-	}
-	mp->m_inodes = ip;
-
-	XFS_MOUNT_IUNLOCK(mp);
 	xfs_put_perag(mp, pag);
 
  return_ip:
@@ -490,36 +471,15 @@ xfs_iextract(
 {
 	xfs_mount_t	*mp = ip->i_mount;
 	xfs_perag_t	*pag = xfs_get_perag(mp, ip->i_ino);
-	xfs_inode_t	*iq;
 
 	write_lock(&pag->pag_ici_lock);
 	radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
 	write_unlock(&pag->pag_ici_lock);
 	xfs_put_perag(mp, pag);
 
-	/*
-	 * Remove from mount's inode list.
-	 */
-	XFS_MOUNT_ILOCK(mp);
-	ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
-	iq = ip->i_mnext;
-	iq->i_mprev = ip->i_mprev;
-	ip->i_mprev->i_mnext = iq;
-
-	/*
-	 * Fix up the head pointer if it points to the inode being deleted.
-	 */
-	if (mp->m_inodes == ip) {
-		if (ip == iq) {
-			mp->m_inodes = NULL;
-		} else {
-			mp->m_inodes = iq;
-		}
-	}
-
 	/* Deal with the deleted inodes list */
+	XFS_MOUNT_ILOCK(mp);
 	list_del_init(&ip->i_reclaim);
-
 	mp->m_ireclaims++;
 	XFS_MOUNT_IUNLOCK(mp);
 }
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 7ce41d3..c440b1d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -192,16 +192,8 @@ typedef struct xfs_icdinode {
 	__uint32_t	di_gen;		/* generation number */
 } xfs_icdinode_t;
 
-typedef struct {
-	struct xfs_inode	*ip_mnext;	/* next inode in mount list */
-	struct xfs_inode	*ip_mprev;	/* ptr to prev inode */
-	struct xfs_mount	*ip_mount;	/* fs mount struct ptr */
-} xfs_iptr_t;
-
 typedef struct xfs_inode {
 	/* Inode linking and identification information. */
-	struct xfs_inode	*i_mnext;	/* next inode in mount list */
-	struct xfs_inode	*i_mprev;	/* ptr to prev inode */
 	struct xfs_mount	*i_mount;	/* fs mount struct ptr */
 	struct list_head	i_reclaim;	/* reclaim list */
 	bhv_vnode_t		*i_vnode;	/* vnode backpointer */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 0c23f6a..364c075 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1314,11 +1314,6 @@ xfs_unmountfs(xfs_mount_t *mp)
 
 	xfs_freesb(mp);
 
-	/*
-	 * All inodes from this mount point should be freed.
-	 */
-	ASSERT(mp->m_inodes == NULL);
-
 	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
 		uuid_table_remove(&mp->m_sb.sb_uuid);
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 6482005..4f257d7 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -247,7 +247,6 @@ typedef struct xfs_mount {
 	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */
 	spinlock_t		m_agirotor_lock;/* .. and lock protecting it */
 	xfs_agnumber_t		m_maxagi;	/* highest inode alloc group */
-	struct xfs_inode	*m_inodes;	/* active inode list */
 	struct list_head	m_del_inodes;	/* inodes to reclaim */
 	mutex_t			m_ilock;	/* inode list mutex */
 	uint			m_ireclaims;	/* count of calls to reclaim*/
-- 
1.5.6

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-23  0:05         ` Dave Chinner
@ 2008-07-23  2:10           ` Mark Goodwin
  2008-07-23  3:46             ` Eric Sandeen
                               ` (3 more replies)
  0 siblings, 4 replies; 31+ messages in thread
From: Mark Goodwin @ 2008-07-23  2:10 UTC (permalink / raw)
  To: Christoph Hellwig, xfs



Dave Chinner wrote:
> On Tue, Jul 22, 2008 at 03:27:33AM -0400, Christoph Hellwig wrote:
>> ...
>> I only fear
>> we'll never get it in with the current review and commit latencies
>> for XFS :(
>
> I can see this being a big issue in the not-too-distant future.....

[getting off-topic for this thread, but anyway ..]
This is already a big issue, obviously, and has been for some time.

Internally, we're attempting to refine our patch acceptance processes,
(e.g. gitify our internal dev tree and mirror it on oss so it's much
easier to push back out to oss). But the QA overhead remains a stubborn
problem. I think we're going to have to ask for QA tests (both regression
and performance) to be written as part of the patch acceptance policy -
under this policy, merely passing existing QA will not be sufficient.
Comments?

We have recently set up external access to a system for QA and
regression testing for Christoph's use .. perhaps that should
be a permanent offering?

Cheers
-- Mark

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-23  2:10           ` Mark Goodwin
@ 2008-07-23  3:46             ` Eric Sandeen
  2008-07-23  4:04               ` Mark Goodwin
  2008-07-23  4:18             ` Dave Chinner
                               ` (2 subsequent siblings)
  3 siblings, 1 reply; 31+ messages in thread
From: Eric Sandeen @ 2008-07-23  3:46 UTC (permalink / raw)
  To: markgw; +Cc: Christoph Hellwig, xfs

Mark Goodwin wrote:
> 
> Dave Chinner wrote:
>> On Tue, Jul 22, 2008 at 03:27:33AM -0400, Christoph Hellwig wrote:
>>> ...
>>> I only fear
>>> we'll never get it in with the current review and commit latencies
>>> for XFS :(
>> I can see this being a big issue in the not-too-distant future.....
> 
> [getting off-topic for this thread, but anyway ..]
> This is already a big issue, obviously, and has been for some time.
> 
> Internally, we're attempting to refine our patch acceptance processes,
> (e.g. gitify our internal dev tree and mirror it on oss so it's much
> easier to push back out to oss). But the QA overhead remains a stubborn
> problem. I think we're going to have to ask for QA tests (both regression
> and performance) to be written as part of the patch acceptance policy -
> under this policy, merely passing existing QA will not be sufficient.
> Comments?

I think that'll depend very much on what the change is.  For new
functionality, sounds good; for bugfixes with testcases, sounds good.
For general algorithm improvements... how do you write a new QA test for
"Use the inode tree for finding dirty inodes?"  Or for that matter, my
remaining 2 shouting-removal patches ;)

> We have recently set up external access to a system for QA and
> regression testing for Christoph's use .. perhaps that should
> be a permanent offering?

Sounds awesome, for serious contributors.  I'd be happy to use it, too ;)

-Eric

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-23  3:46             ` Eric Sandeen
@ 2008-07-23  4:04               ` Mark Goodwin
  2008-07-23  4:09                 ` Eric Sandeen
  2008-07-23  4:27                 ` Dave Chinner
  0 siblings, 2 replies; 31+ messages in thread
From: Mark Goodwin @ 2008-07-23  4:04 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: Christoph Hellwig, xfs



Eric Sandeen wrote:
> Mark Goodwin wrote:
>> Dave Chinner wrote:
>>> On Tue, Jul 22, 2008 at 03:27:33AM -0400, Christoph Hellwig wrote:
>>>> ...
>>>> I only fear
>>>> we'll never get it in with the current review and commit latencies
>>>> for XFS :(
>>> I can see this being a big issue in the not-too-distant future.....
>> [getting off-topic for this thread, but anyway ..]
>> This is already a big issue, obviously, and has been for some time.
>>
>> Internally, we're attempting to refine our patch acceptance processes,
>> (e.g. gitify our internal dev tree and mirror it on oss so it's much
>> easier to push back out to oss). But the QA overhead remains a stubborn
>> problem. I think we're going to have to ask for QA tests (both regression
>> and performance) to be written as part of the patch acceptance policy -
>> under this policy, merely passing existing QA will not be sufficient.
>> Comments?
> 
> I think that'll depend very much on what the change is.  For new
> functionality, sounds good; for bugfixes with testcases, sounds good.

and for cleanups, just run existing QA (new test wouldn't normally make 
sense).

> For general algorithm improvements... how do you write a new QA test for
> "Use the inode tree for finding dirty inodes?"

Case by case basis I guess. e.g. write a test that exercises or stresses
the changed algorithm or functionality, especially corner cases (low
space, full AGs, mem pressure, whatever). Performance regression testing
is trickier - need access to suitable h/w and the historical data; so it's
probably best run on dedicated h/w every night against the dev tree.

> Or for that matter, my
> remaining 2 shouting-removal patches ;)

As above, just run QA for cleanups.

> 
>> We have recently set up external access to a system for QA and
>> regression testing for Christoph's use .. perhaps that should
>> be a permanent offering?
> 
> Sounds awesome, for serious contributors.  I'd be happy to use it, too ;)
> 
> -Eric
> 
> 

-- 

  Mark Goodwin                                  markgw@sgi.com
  Engineering Manager for XFS and PCP    Phone: +61-3-99631937
  SGI Australian Software Group           Cell: +61-4-18969583
-------------------------------------------------------------

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-23  4:04               ` Mark Goodwin
@ 2008-07-23  4:09                 ` Eric Sandeen
  2008-07-23  5:00                   ` Mark Goodwin
  2008-07-23  4:27                 ` Dave Chinner
  1 sibling, 1 reply; 31+ messages in thread
From: Eric Sandeen @ 2008-07-23  4:09 UTC (permalink / raw)
  To: markgw; +Cc: Christoph Hellwig, xfs

Mark Goodwin wrote:

> As above, just run QA for cleanups.

Out of curiosity, will you trust this?  You'll run QA anyway, right?
And do you very often find errors in submitted patches by running QA?

-Eric

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-23  2:10           ` Mark Goodwin
  2008-07-23  3:46             ` Eric Sandeen
@ 2008-07-23  4:18             ` Dave Chinner
  2008-07-23 15:37             ` Russell Cattelan
  2008-07-23 20:49             ` Christoph Hellwig
  3 siblings, 0 replies; 31+ messages in thread
From: Dave Chinner @ 2008-07-23  4:18 UTC (permalink / raw)
  To: Mark Goodwin; +Cc: Christoph Hellwig, xfs

On Wed, Jul 23, 2008 at 12:10:03PM +1000, Mark Goodwin wrote:
>
>
> Dave Chinner wrote:
>> On Tue, Jul 22, 2008 at 03:27:33AM -0400, Christoph Hellwig wrote:
>>> ...
>>> I only fear
>>> we'll never get it in with the current review and commit latencies
>>> for XFS :(
>>
>> I can see this being a big issue in the not-too-distant future.....
>
> [getting off-topic for this thread, but anyway ..]
> This is already a big issue, obviously, and has been for some time.
>
> Internally, we're attempting to refine our patch acceptance processes,
> (e.g. gitify our internal dev tree and mirror it on oss so it's much
> easier to push back out to oss). But the QA overhead remains a stubborn
> problem. I think we're going to have to ask for QA tests (both regression
> and performance) to be written as part of the patch acceptance policy -
> under this policy, merely passing existing QA will not be sufficient.
> Comments?

In general - unnecessary because most changes submitted don't
change behaviour or interfaces or that subsystem/behaviour
is already tested by the QA suite. For new features with externally
visable interfaces (the rare case) then requiring new tests is
just fine.

In reality - the community will route around SGI and go straight
to Andrew or Linus if this proves to be a burden.

Passing XFSQA is a significant indication that the change is as good
as the developer can do in their own environment; if it passes
review then at that point it needs wider QA, and that comes from
committing to *public repositories* so that and varied workloads and
machines stress the code.

If QA is a burden, it's because you're not allowing the wider
community easy access to the dev code to test at an early
stage. A dev tree is *meant to break*, it's not a prodution
environment. Gett eh code out there early and distribute the
QA workload, don't make it a burden on the developers or
a bottleneck to getting code accepted.

> We have recently set up external access to a system for QA and
> regression testing for Christoph's use .. perhaps that should
> be a permanent offering?

Yes.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-23  4:04               ` Mark Goodwin
  2008-07-23  4:09                 ` Eric Sandeen
@ 2008-07-23  4:27                 ` Dave Chinner
  1 sibling, 0 replies; 31+ messages in thread
From: Dave Chinner @ 2008-07-23  4:27 UTC (permalink / raw)
  To: Mark Goodwin; +Cc: Eric Sandeen, Christoph Hellwig, xfs

On Wed, Jul 23, 2008 at 02:04:06PM +1000, Mark Goodwin wrote:
>
> Eric Sandeen wrote:
>> Mark Goodwin wrote:
>> For general algorithm improvements... how do you write a new QA test for
>> "Use the inode tree for finding dirty inodes?"
>
> Case by case basis I guess. e.g. write a test that exercises or stresses
> the changed algorithm or functionality, especially corner cases
> (low space, full AGs, mem pressure, whatever).

Most of these corner cases are already covered by XFSQA.

> Performance regression testing
> is trickier - need access to suitable h/w and the historical data; so it's
> probably best run on dedicated h/w every night against the dev tree.

Performance regressions are something that generally are only
detected some time after the commit occurs - usually when it gets to
the wider community. Once again, it's an argument for further
distributing the QA load by committing quickly.

Indeed, in the case of regressions caused by commmunity
contributions, I think a bugzilla bug needs to be raised to
track it properly....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-23  4:09                 ` Eric Sandeen
@ 2008-07-23  5:00                   ` Mark Goodwin
  0 siblings, 0 replies; 31+ messages in thread
From: Mark Goodwin @ 2008-07-23  5:00 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: Christoph Hellwig, xfs



Eric Sandeen wrote:
> Mark Goodwin wrote:
> 
>> As above, just run QA for cleanups.
> 
> Out of curiosity, will you trust this?  You'll run QA anyway, right?

yes we run it anyway, so maybe this (as a requirement) is overkill.

> And do you very often find errors in submitted patches by running QA?
> 

don't know, but it is part of the review process. For cleanups,
bugs/regressions seem to be fairly rare (hence they're subtle and
unexpected). I think you found one such regression a while back,
(after a herculean effort).

Cheers
-- Mark

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-23  2:10           ` Mark Goodwin
  2008-07-23  3:46             ` Eric Sandeen
  2008-07-23  4:18             ` Dave Chinner
@ 2008-07-23 15:37             ` Russell Cattelan
  2008-07-24  6:02               ` Mark Goodwin
  2008-07-23 20:49             ` Christoph Hellwig
  3 siblings, 1 reply; 31+ messages in thread
From: Russell Cattelan @ 2008-07-23 15:37 UTC (permalink / raw)
  To: markgw; +Cc: xfs

Mark Goodwin wrote:
>
>
> Dave Chinner wrote:
>> On Tue, Jul 22, 2008 at 03:27:33AM -0400, Christoph Hellwig wrote:
>>> ...
>>> I only fear
>>> we'll never get it in with the current review and commit latencies
>>> for XFS :(
>>
>> I can see this being a big issue in the not-too-distant future.....
>
> [getting off-topic for this thread, but anyway ..]
> This is already a big issue, obviously, and has been for some time.
>
> Internally, we're attempting to refine our patch acceptance processes,
> (e.g. gitify our internal dev tree and mirror it on oss so it's much
> easier to push back out to oss). 
I'm sure you have seen this before:
http://oss.sgi.com/cgi-bin/gitweb.cgi?p=cattelan/xfs-import/.git;a=summary'

That is a running mirror of the ptools tree into git. (via the cvs tree)

It would be really nice to move all xfs development to git finally shut down
the whole ptools -> cvs update process.

This would help facilitate creation of more "experimental" trees and/or 
branches
so there would not be such a long delay of getting patches distributed.




> But the QA overhead remains a stubborn
> problem. I think we're going to have to ask for QA tests (both regression
> and performance) to be written as part of the patch acceptance policy -
> under this policy, merely passing existing QA will not be sufficient.
> Comments?
>
> We have recently set up external access to a system for QA and
> regression testing for Christoph's use .. perhaps that should
> be a permanent offering?
>
> Cheers
> -- Mark
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 4/4] XFS: remove the mount inode list
  2008-07-23  0:41 ` [PATCH 4/4] XFS: remove the mount inode list Dave Chinner
@ 2008-07-23 20:46   ` Christoph Hellwig
  0 siblings, 0 replies; 31+ messages in thread
From: Christoph Hellwig @ 2008-07-23 20:46 UTC (permalink / raw)
  To: Dave Chinner; +Cc: xfs

On Wed, Jul 23, 2008 at 10:41:13AM +1000, Dave Chinner wrote:
> Now we've removed all users of the mount inode list,
> we can kill it. This reduces the size of the xfs_inode
> by 2 pointers.

With this m_ilock can also become a spinlock.  Might not be worth
depending on how soon you want to kill m_del_inodes.


Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: linux-2.6-xfs/fs/xfs/linux-2.6/xfs_super.c
===================================================================
--- linux-2.6-xfs.orig/fs/xfs/linux-2.6/xfs_super.c	2008-07-23 15:18:52.000000000 +0200
+++ linux-2.6-xfs/fs/xfs/linux-2.6/xfs_super.c	2008-07-23 15:19:28.000000000 +0200
@@ -1554,7 +1554,7 @@ xfs_fs_fill_super(
 		goto out;
 
 	spin_lock_init(&mp->m_sb_lock);
-	mutex_init(&mp->m_ilock);
+	spin_lock_init(&mp->m_ilock);
 	mutex_init(&mp->m_growlock);
 	atomic_set(&mp->m_active_trans, 0);
 	INIT_LIST_HEAD(&mp->m_sync_list);
Index: linux-2.6-xfs/fs/xfs/xfs_iget.c
===================================================================
--- linux-2.6-xfs.orig/fs/xfs/xfs_iget.c	2008-07-23 15:18:52.000000000 +0200
+++ linux-2.6-xfs/fs/xfs/xfs_iget.c	2008-07-23 15:19:28.000000000 +0200
@@ -144,9 +144,9 @@ again:
 			xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
 			read_unlock(&pag->pag_ici_lock);
 
-			XFS_MOUNT_ILOCK(mp);
+			spin_lock(&mp->m_ilock);
 			list_del_init(&ip->i_reclaim);
-			XFS_MOUNT_IUNLOCK(mp);
+			spin_unlock(&mp->m_ilock);
 
 			goto finish_inode;
 
@@ -484,10 +484,10 @@ xfs_iextract(
 	xfs_put_perag(mp, pag);
 
 	/* Deal with the deleted inodes list */
-	XFS_MOUNT_ILOCK(mp);
+	spin_lock(&mp->m_ilock);
 	list_del_init(&ip->i_reclaim);
 	mp->m_ireclaims++;
-	XFS_MOUNT_IUNLOCK(mp);
+	spin_unlock(&mp->m_ilock);
 }
 
 /*
Index: linux-2.6-xfs/fs/xfs/xfs_mount.c
===================================================================
--- linux-2.6-xfs.orig/fs/xfs/xfs_mount.c	2008-07-23 15:18:52.000000000 +0200
+++ linux-2.6-xfs/fs/xfs/xfs_mount.c	2008-07-23 15:19:28.000000000 +0200
@@ -142,7 +142,7 @@ xfs_mount_free(
 
 	spinlock_destroy(&mp->m_ail_lock);
 	spinlock_destroy(&mp->m_sb_lock);
-	mutex_destroy(&mp->m_ilock);
+	spinlock_destroy(&mp->m_ilock);
 	mutex_destroy(&mp->m_growlock);
 	if (mp->m_quotainfo)
 		XFS_QM_DONE(mp);
Index: linux-2.6-xfs/fs/xfs/xfs_mount.h
===================================================================
--- linux-2.6-xfs.orig/fs/xfs/xfs_mount.h	2008-07-23 15:18:52.000000000 +0200
+++ linux-2.6-xfs/fs/xfs/xfs_mount.h	2008-07-23 15:19:28.000000000 +0200
@@ -248,7 +248,7 @@ typedef struct xfs_mount {
 	spinlock_t		m_agirotor_lock;/* .. and lock protecting it */
 	xfs_agnumber_t		m_maxagi;	/* highest inode alloc group */
 	struct list_head	m_del_inodes;	/* inodes to reclaim */
-	mutex_t			m_ilock;	/* inode list mutex */
+	spinlock_t		m_ilock;	/* synchronize m_del_inodes */
 	uint			m_ireclaims;	/* count of calls to reclaim*/
 	uint			m_readio_log;	/* min read size log bytes */
 	uint			m_readio_blocks; /* min read size blocks */
@@ -509,9 +509,6 @@ typedef struct xfs_mod_sb {
 	int64_t		msb_delta;	/* Change to make to specified field */
 } xfs_mod_sb_t;
 
-#define	XFS_MOUNT_ILOCK(mp)	mutex_lock(&((mp)->m_ilock))
-#define	XFS_MOUNT_IUNLOCK(mp)	mutex_unlock(&((mp)->m_ilock))
-
 extern void	xfs_mod_sb(xfs_trans_t *, __int64_t);
 extern int	xfs_log_sbcount(xfs_mount_t *, uint);
 extern int	xfs_mountfs(xfs_mount_t *mp, int);
Index: linux-2.6-xfs/fs/xfs/xfs_vnodeops.c
===================================================================
--- linux-2.6-xfs.orig/fs/xfs/xfs_vnodeops.c	2008-07-23 15:18:52.000000000 +0200
+++ linux-2.6-xfs/fs/xfs/xfs_vnodeops.c	2008-07-23 15:19:28.000000000 +0200
@@ -2834,14 +2834,14 @@ xfs_reclaim(
 		xfs_mount_t	*mp = ip->i_mount;
 
 		/* Protect sync and unpin from us */
-		XFS_MOUNT_ILOCK(mp);
+		spin_lock(&mp->m_ilock);
 		spin_lock(&ip->i_flags_lock);
 		__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
 		vn_to_inode(vp)->i_private = NULL;
 		ip->i_vnode = NULL;
 		spin_unlock(&ip->i_flags_lock);
 		list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
-		XFS_MOUNT_IUNLOCK(mp);
+		spin_unlock(&mp->m_ilock);
 	}
 	return 0;
 }
@@ -2922,7 +2922,7 @@ xfs_finish_reclaim_all(
 	xfs_inode_t	*ip, *n;
 
 restart:
-	XFS_MOUNT_ILOCK(mp);
+	spin_lock(&mp->m_ilock);
 	list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
 		if (noblock) {
 			if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
@@ -2933,12 +2933,12 @@ restart:
 				continue;
 			}
 		}
-		XFS_MOUNT_IUNLOCK(mp);
+		spin_unlock(&mp->m_ilock);
 		if (xfs_finish_reclaim(ip, noblock, mode))
 			delay(1);
 		goto restart;
 	}
-	XFS_MOUNT_IUNLOCK(mp);
+	spin_unlock(&mp->m_ilock);
 	return 0;
 }
 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-23  2:10           ` Mark Goodwin
                               ` (2 preceding siblings ...)
  2008-07-23 15:37             ` Russell Cattelan
@ 2008-07-23 20:49             ` Christoph Hellwig
  2008-07-24 11:46               ` Dave Chinner
  3 siblings, 1 reply; 31+ messages in thread
From: Christoph Hellwig @ 2008-07-23 20:49 UTC (permalink / raw)
  To: Mark Goodwin; +Cc: Christoph Hellwig, xfs

On Wed, Jul 23, 2008 at 12:10:03PM +1000, Mark Goodwin wrote:
> [getting off-topic for this thread, but anyway ..]
> This is already a big issue, obviously, and has been for some time.
>
> Internally, we're attempting to refine our patch acceptance processes,
> (e.g. gitify our internal dev tree and mirror it on oss so it's much
> easier to push back out to oss). But the QA overhead remains a stubborn
> problem. I think we're going to have to ask for QA tests (both regression
> and performance) to be written as part of the patch acceptance policy -
> under this policy, merely passing existing QA will not be sufficient.
> Comments?

For most patches that just clean things up or change internal
implementations adding new testcases doesn't make much sense.  The only
case is when you suspect/know the testcase don't cover this area enough.

What would be nice is if we'd add more testcases for user reported
regressions.

Btw, does anyone have recent gcov data for XFS?

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-23 15:37             ` Russell Cattelan
@ 2008-07-24  6:02               ` Mark Goodwin
  2008-07-25  3:55                 ` Russell Cattelan
  0 siblings, 1 reply; 31+ messages in thread
From: Mark Goodwin @ 2008-07-24  6:02 UTC (permalink / raw)
  To: Russell Cattelan; +Cc: xfs



Russell Cattelan wrote:
>> Internally, we're attempting to refine our patch acceptance processes,
>> (e.g. gitify our internal dev tree and mirror it on oss so it's much
>> easier to push back out to oss).
> I'm sure you have seen this before:
> http://oss.sgi.com/cgi-bin/gitweb.cgi?p=cattelan/xfs-import/.git;a=summary'
> That is a running mirror of the ptools tree into git. (via the cvs tree)

yes. But it's git -> ptools scripts that we need, preserving history, etc.
Niv has some scripts for this - they're not production quality yet, but
we're getting there. Once this transitions, it'll be a *lot* easier for
us to pull in patches from external developer branches because we'll all
be using git for checkin.

> It would be really nice to move all xfs development to git finally shut
> down
> the whole ptools -> cvs update process.

once our internal dev tree is git based and internal git->ptools merging
is fully automatic, there is no actual need to shutdown the cvs crons.
It's worked for years and can stay running, no harm.

> This would help facilitate creation of more "experimental" trees and/or
> branches
> so there would not be such a long delay of getting patches distributed.

I think we'd just end up with a git dev branch on oss, maybe with a
daily pull from the internal dev tree (Russell, that would render
your cvs->git mirror obsolete I guess). In any case, patch flow and
turn-around should be greatly improved.

Anyone have comments on any of the above?

Cheers
-- Mark

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-23 20:49             ` Christoph Hellwig
@ 2008-07-24 11:46               ` Dave Chinner
  0 siblings, 0 replies; 31+ messages in thread
From: Dave Chinner @ 2008-07-24 11:46 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Mark Goodwin, xfs

On Wed, Jul 23, 2008 at 04:49:27PM -0400, Christoph Hellwig wrote:
> Btw, does anyone have recent gcov data for XFS?

Last time I checked (not recent) xfsqa covered about 70% of XFS with
default mkfs and mount options. The code has changed a lot since
then and there's been quite a few additional tests added, so I've
got no idea right now...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-24  6:02               ` Mark Goodwin
@ 2008-07-25  3:55                 ` Russell Cattelan
  2008-07-25  4:08                   ` Mark Goodwin
  2008-07-25  6:55                   ` Niv Sardi
  0 siblings, 2 replies; 31+ messages in thread
From: Russell Cattelan @ 2008-07-25  3:55 UTC (permalink / raw)
  To: markgw; +Cc: xfs

Mark Goodwin wrote:
>
>
> Russell Cattelan wrote:
>>> Internally, we're attempting to refine our patch acceptance processes,
>>> (e.g. gitify our internal dev tree and mirror it on oss so it's much
>>> easier to push back out to oss).
>> I'm sure you have seen this before:
>> http://oss.sgi.com/cgi-bin/gitweb.cgi?p=cattelan/xfs-import/.git;a=summary' 
>>
>> That is a running mirror of the ptools tree into git. (via the cvs tree)
>
> yes. But it's git -> ptools scripts that we need, preserving history, 
> etc.
> Niv has some scripts for this - they're not production quality yet, but
> we're getting there. Once this transitions, it'll be a *lot* easier for
> us to pull in patches from external developer branches because we'll all
> be using git for checkin.
Personally I don't see a reason to keep a ptools tree in lock step with
with a git tree. I all for not losing history (and I spent a bit of time 
when
the tree was re-organized to keep the rcs history in tack).
At this point the git tree has full xfs history and I would think this would
be sufficient for what ever code archeology  comes up.
>
>> It would be really nice to move all xfs development to git finally shut
>> down
>> the whole ptools -> cvs update process.
>
> once our internal dev tree is git based and internal git->ptools merging
> is fully automatic, there is no actual need to shutdown the cvs crons.
> It's worked for years and can stay running, no harm.
Ya I'm still amazed those scripts are holding up given nobody is giving
them any TLC.  :-(

>
>> This would help facilitate creation of more "experimental" trees and/or
>> branches
>> so there would not be such a long delay of getting patches distributed.
>
> I think we'd just end up with a git dev branch on oss, maybe with a
> daily pull from the internal dev tree (Russell, that would render
> your cvs->git mirror obsolete I guess). In any case, patch flow and
> turn-around should be greatly improved.
The one thing about about SCM's that they are entirely a pain in the
ass, but one of the most important tools in software engineering.
So whatever happens it should be simple yet sufficient.

>
> Anyone have comments on any of the above?
>
> Cheers
> -- Mark
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-25  3:55                 ` Russell Cattelan
@ 2008-07-25  4:08                   ` Mark Goodwin
  2008-07-25  5:40                     ` Russell Cattelan
  2008-07-25  6:55                   ` Niv Sardi
  1 sibling, 1 reply; 31+ messages in thread
From: Mark Goodwin @ 2008-07-25  4:08 UTC (permalink / raw)
  To: Russell Cattelan; +Cc: xfs



Russell Cattelan wrote:
> Personally I don't see a reason to keep a ptools tree in lock step with
> with a git tree. ...

you might not, but you're not working for SGI anymore :)
We have loads of other trees to merge stuff into other than
those on oss. Many of the internal scripts for managing this
are very intertwined with ptools. The one thing that will
really help with handling externally contributed patches is
for the primary internal SGI dev tree to become GIT based.
But not having git->ptools auto merge is not an option.

PCP is in the same boat, just ask Nathan :)

Looking at this next week ...

Cheers
-- Mark

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-25  4:08                   ` Mark Goodwin
@ 2008-07-25  5:40                     ` Russell Cattelan
  0 siblings, 0 replies; 31+ messages in thread
From: Russell Cattelan @ 2008-07-25  5:40 UTC (permalink / raw)
  To: markgw; +Cc: xfs

Mark Goodwin wrote:
>
>
> Russell Cattelan wrote:
>> Personally I don't see a reason to keep a ptools tree in lock step with
>> with a git tree. ...
>
> you might not, but you're not working for SGI anymore :)
> We have loads of other trees to merge stuff into other than
> those on oss. 
Ya I'm familiar with the complexity disguising itself as productivity.
(But I will say I experienced a much worse complexity for the sake of
complexity situation)
> Many of the internal scripts for managing this
> are very intertwined with ptools. The one thing that will
> really help with handling externally contributed patches is
> for the primary internal SGI dev tree to become GIT based.
> But not having git->ptools auto merge is not an option.
Actually if auto merge is a requirement I don't really see a shift in
policy. I don't see much difference in a ptool->git vs a git->ptools 
process.

Just because  git is the birthing repo for changes, it seems like that 
is just
drawing more process into things and making change adoption
just as slow if not slower.

It seems like decoupling the process, not just switching SCM's and allowing
experimental  changes to come to light sooner is what people are looking 
for.

>
> PCP is in the same boat, just ask Nathan :)
>
> Looking at this next week ...
>
> Cheers
> -- Mark
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes
  2008-07-25  3:55                 ` Russell Cattelan
  2008-07-25  4:08                   ` Mark Goodwin
@ 2008-07-25  6:55                   ` Niv Sardi
  1 sibling, 0 replies; 31+ messages in thread
From: Niv Sardi @ 2008-07-25  6:55 UTC (permalink / raw)
  To: Russell Cattelan; +Cc: markgw, xfs

Russell Cattelan <cattelan@thebarn.com> writes:
> Mark Goodwin wrote:
>> Russell Cattelan wrote:
>>>> Internally, we're attempting to refine our patch acceptance processes,
>>>> (e.g. gitify our internal dev tree and mirror it on oss so it's much
>>>> easier to push back out to oss).
>>> I'm sure you have seen this before:
>>> http://oss.sgi.com/cgi-bin/gitweb.cgi?p=cattelan/xfs-import/.git;a=summary' 

moving from one SCM that is not syncable with the community to another
is not helping at all. While that clone is great to work *with* git, the
commit ids have nothing to do with the community's one, and we stil have
to convert all the work (to fixup commiter names and so forth too) to be
able to be integrated in the community.

>>> That is a running mirror of the ptools tree into git. (via the cvs tree)
>> yes. But it's git -> ptools scripts that we need, preserving
>> history, etc.
>> Niv has some scripts for this - they're not production quality yet, but
>> we're getting there. Once this transitions, it'll be a *lot* easier for
>> us to pull in patches from external developer branches because we'll all
>> be using git for checkin.
> Personally I don't see a reason to keep a ptools tree in lock step with
> with a git tree. I all for not losing history (and I spent a bit of
> time when
> the tree was re-organized to keep the rcs history in tack).
> At this point the git tree has full xfs history and I would think this would
> be sufficient for what ever code archeology  comes up.

for code archeology, we have many other trees. I'm more interested to be
able to answer things like: 'what commits have never made it to linus ?'
(dmapi, patches that had to be slightly changed) and be able to sort all
our mods in 3 categories: merged-upstream (git-patch-id equality),
partly-merged-upstream (commit title equality), unkonwn-upstream.

I have a tree here, ready to be used to replace the internal ptools
tree, it needs some more work to detect the non-merged part of the
partly-merged-upstream, but well, the diff is mostly whitespaces, and I
guess we can discard them.

Hopefully, this can be widely used RSN.

Cheers,
-- 
Niv Sardi

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2008-07-25  6:54 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-20 12:19 [PATCH 0/4] XFS: replace the mount inode list with radix tree traversals Dave Chinner
2008-07-20 12:19 ` [PATCH 1/4] XFS: Remove xfs_iflush_all and clean up xfs_finish_reclaim_all() Dave Chinner
2008-07-21  7:58   ` Christoph Hellwig
2008-07-21 11:33     ` Dave Chinner
2008-07-22  4:24       ` Christoph Hellwig
2008-07-20 12:19 ` [PATCH 2/4] XFS: Use the inode tree for finding dirty inodes Dave Chinner
2008-07-22  4:28   ` Christoph Hellwig
2008-07-22  5:30     ` Dave Chinner
2008-07-22  7:27       ` Christoph Hellwig
2008-07-23  0:05         ` Dave Chinner
2008-07-23  2:10           ` Mark Goodwin
2008-07-23  3:46             ` Eric Sandeen
2008-07-23  4:04               ` Mark Goodwin
2008-07-23  4:09                 ` Eric Sandeen
2008-07-23  5:00                   ` Mark Goodwin
2008-07-23  4:27                 ` Dave Chinner
2008-07-23  4:18             ` Dave Chinner
2008-07-23 15:37             ` Russell Cattelan
2008-07-24  6:02               ` Mark Goodwin
2008-07-25  3:55                 ` Russell Cattelan
2008-07-25  4:08                   ` Mark Goodwin
2008-07-25  5:40                     ` Russell Cattelan
2008-07-25  6:55                   ` Niv Sardi
2008-07-23 20:49             ` Christoph Hellwig
2008-07-24 11:46               ` Dave Chinner
2008-07-20 12:19 ` [PATCH 3/4] XFS: Traverse inode trees when releasing dquots Dave Chinner
2008-07-20 12:19 ` [PATCH 4/4] XFS: remove the mount inode list Dave Chinner
2008-07-22  4:29   ` Christoph Hellwig
2008-07-22  5:42     ` Dave Chinner
  -- strict thread matches above, loose matches on Subject: below --
2008-07-23  0:41 [PATCH 0/4] XFS: replace the mount inode list with radix tree traversals V2 Dave Chinner
2008-07-23  0:41 ` [PATCH 4/4] XFS: remove the mount inode list Dave Chinner
2008-07-23 20:46   ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox