public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* [review] Remove the xfs refcache
@ 2007-12-17  3:26 Donald Douwsma
  2007-12-17  4:00 ` Lachlan McIlroy
  2008-02-07  8:35 ` Christoph Hellwig
  0 siblings, 2 replies; 13+ messages in thread
From: Donald Douwsma @ 2007-12-17  3:26 UTC (permalink / raw)
  To: xfs-oss

[-- Attachment #1: Type: text/plain, Size: 92 bytes --]

Remove the xfs_refcache, it was only needed while we were still building for
2.4 kernels.



[-- Attachment #2: remove_xfs_refcache.patch --]
[-- Type: text/plain, Size: 7831 bytes --]


--- a/fs/xfs/linux-2.6/xfs_ksyms.c	2007-12-17 11:55:09.000000000 +1100
+++ b/fs/xfs/linux-2.6/xfs_ksyms.c	2007-12-17 11:34:19.000000000 +1100
@@ -46,7 +46,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_refcache.h"
 #include "xfs_dir2_data.h"
 #include "xfs_dir2_leaf.h"
 #include "xfs_dir2_block.h"

--- a/fs/xfs/linux-2.6/xfs_linux.h	2007-12-17 11:55:09.000000000 +1100
+++ b/fs/xfs/linux-2.6/xfs_linux.h	2007-12-17 10:30:53.000000000 +1100
@@ -99,7 +99,6 @@
 /*
  * Feature macros (disable/enable)
  */
-#undef  HAVE_REFCACHE	/* reference cache not needed for NFS in 2.6 */
 #define HAVE_SPLICE	/* a splice(2) exists in 2.6, but not in 2.4 */
 #ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */

--- a/fs/xfs/xfs_inode.h	2007-12-17 11:55:09.000000000 +1100
+++ b/fs/xfs/xfs_inode.h	2007-12-17 10:31:25.000000000 +1100
@@ -240,10 +240,6 @@ typedef struct xfs_inode {
 	atomic_t		i_pincount;	/* inode pin count */
 	wait_queue_head_t	i_ipin_wait;	/* inode pinning wait queue */
 	spinlock_t		i_flags_lock;	/* inode i_flags lock */
-#ifdef HAVE_REFCACHE
-	struct xfs_inode	**i_refcache;	/* ptr to entry in ref cache */
-	struct xfs_inode	*i_release;	/* inode to unref */
-#endif
 	/* Miscellaneous state. */
 	unsigned short		i_flags;	/* see defined flags below */
 	unsigned char		i_update_core;	/* timestamps/size is dirty */

--- a/fs/xfs/xfs_rename.c	2007-12-17 11:55:09.000000000 +1100
+++ b/fs/xfs/xfs_rename.c	2007-12-17 10:33:32.000000000 +1100
@@ -36,7 +36,6 @@
 #include "xfs_bmap.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
-#include "xfs_refcache.h"
 #include "xfs_utils.h"
 #include "xfs_trans_space.h"
 #include "xfs_vnodeops.h"
@@ -580,10 +579,8 @@ xfs_rename(
 	 * the vnode references.
 	 */
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	if (target_ip != NULL) {
-		xfs_refcache_purge_ip(target_ip);
+	if (target_ip != NULL)
 		IRELE(target_ip);
-	}
 	/*
 	 * Let interposed file systems know about removed links.
 	 */

--- a/fs/xfs/xfs_vfsops.c	2007-12-17 11:55:09.000000000 +1100
+++ b/fs/xfs/xfs_vfsops.c	2007-12-17 11:34:19.000000000 +1100
@@ -43,7 +43,6 @@
 #include "xfs_error.h"
 #include "xfs_bmap.h"
 #include "xfs_rw.h"
-#include "xfs_refcache.h"
 #include "xfs_buf_item.h"
 #include "xfs_log_priv.h"
 #include "xfs_dir2_trace.h"
@@ -157,7 +156,6 @@ xfs_cleanup(void)
 
 	xfs_cleanup_procfs();
 	xfs_sysctl_unregister();
-	xfs_refcache_destroy();
 	xfs_filestream_uninit();
 	xfs_mru_cache_uninit();
 	xfs_acl_zone_destroy(xfs_acl_zone);
@@ -585,11 +583,6 @@ xfs_unmount(
 					0 : DM_FLAGS_UNWANTED;
 	}
 #endif
-	/*
-	 * First blow any referenced inode from this file system
-	 * out of the reference cache, and delete the timer.
-	 */
-	xfs_refcache_purge_mp(mp);
 
 	/*
 	 * Blow away any referenced inode in the filestreams cache.
@@ -653,7 +646,6 @@ xfs_quiesce_fs(
 {
 	int			count = 0, pincount;
 
-	xfs_refcache_purge_mp(mp);
 	xfs_flush_buftarg(mp->m_ddev_targp, 0);
 	xfs_finish_reclaim_all(mp, 0);
 
@@ -1344,18 +1336,6 @@ xfs_syncsub(
 	}
 
 	/*
-	 * If this is the periodic sync, then kick some entries out of
-	 * the reference cache.  This ensures that idle entries are
-	 * eventually kicked out of the cache.
-	 */
-	if (flags & SYNC_REFCACHE) {
-		if (flags & SYNC_WAIT)
-			xfs_refcache_purge_mp(mp);
-		else
-			xfs_refcache_purge_some(mp);
-	}
-
-	/*
 	 * If asked, update the disk superblock with incore counter values if we
 	 * are using non-persistent counters so that they don't get too far out
 	 * of sync if we crash or get a forced shutdown. We don't want to force

--- a/fs/xfs/xfs_vnodeops.c	2007-12-17 11:55:09.000000000 +1100
+++ b/fs/xfs/xfs_vnodeops.c	2007-12-17 11:34:20.000000000 +1100
@@ -48,7 +48,6 @@
 #include "xfs_quota.h"
 #include "xfs_utils.h"
 #include "xfs_rtalloc.h"
-#include "xfs_refcache.h"
 #include "xfs_trans_space.h"
 #include "xfs_log_priv.h"
 #include "xfs_filestream.h"
@@ -1541,12 +1540,6 @@ xfs_release(
 			xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
 	}
 
-#ifdef HAVE_REFCACHE
-	/* If we are in the NFS reference cache then don't do this now */
-	if (ip->i_refcache)
-		return 0;
-#endif
-
 	if (ip->i_d.di_nlink != 0) {
 		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
 		     ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
@@ -2476,14 +2469,6 @@ xfs_remove(
 	}
 
 	/*
-	 * Before we drop our extra reference to the inode, purge it
-	 * from the refcache if it is there.  By waiting until afterwards
-	 * to do the IRELE, we ensure that we won't go inactive in the
-	 * xfs_refcache_purge_ip routine (although that would be OK).
-	 */
-	xfs_refcache_purge_ip(ip);
-
-	/*
 	 * If we are using filestreams, kill the stream association.
 	 * If the file is still open it may get a new one but that
 	 * will get killed on last close in xfs_close() so we don't
@@ -2522,14 +2507,6 @@ xfs_remove(
 	cancel_flags |= XFS_TRANS_ABORT;
 	xfs_trans_cancel(tp, cancel_flags);
 
-	/*
-	 * Before we drop our extra reference to the inode, purge it
-	 * from the refcache if it is there.  By waiting until afterwards
-	 * to do the IRELE, we ensure that we won't go inactive in the
-	 * xfs_refcache_purge_ip routine (although that would be OK).
-	 */
-	xfs_refcache_purge_ip(ip);
-
 	IRELE(ip);
 
 	goto std_return;
@@ -3487,16 +3464,7 @@ xfs_rwunlock(
 {
  	if (S_ISDIR(ip->i_d.di_mode))
   		return;
-	if (locktype == VRWLOCK_WRITE) {
-		/*
-		 * In the write case, we may have added a new entry to
-		 * the reference cache.  This might store a pointer to
-		 * an inode to be released in this inode.  If it is there,
-		 * clear the pointer and release the inode after unlocking
-		 * this one.
-		 */
-		xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL);
-	} else {
+	if (locktype != VRWLOCK_WRITE) {
 		ASSERT((locktype == VRWLOCK_READ) ||
 		       (locktype == VRWLOCK_WRITE_DIRECT));
 		xfs_iunlock(ip, XFS_IOLOCK_SHARED);

--- a/fs/xfs/xfs_refcache.h	2007-12-17 11:55:19.000000000 +1100
+++ b/fs/xfs/xfs_refcache.h	2006-06-16 23:54:00.000000000 +1000
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_REFCACHE_H__
-#define __XFS_REFCACHE_H__
-
-#ifdef HAVE_REFCACHE
-/*
- * Maximum size (in inodes) for the NFS reference cache
- */
-#define XFS_REFCACHE_SIZE_MAX	512
-
-struct xfs_inode;
-struct xfs_mount;
-
-extern void xfs_refcache_insert(struct xfs_inode *);
-extern void xfs_refcache_purge_ip(struct xfs_inode *);
-extern void xfs_refcache_purge_mp(struct xfs_mount *);
-extern void xfs_refcache_purge_some(struct xfs_mount *);
-extern void xfs_refcache_resize(int);
-extern void xfs_refcache_destroy(void);
-
-extern void xfs_refcache_iunlock(struct xfs_inode *, uint);
-
-#else
-
-#define xfs_refcache_insert(ip)		do { } while (0)
-#define xfs_refcache_purge_ip(ip)	do { } while (0)
-#define xfs_refcache_purge_mp(mp)	do { } while (0)
-#define xfs_refcache_purge_some(mp)	do { } while (0)
-#define xfs_refcache_resize(size)	do { } while (0)
-#define xfs_refcache_destroy()		do { } while (0)
-
-#define xfs_refcache_iunlock(ip, flags)	xfs_iunlock(ip, flags)
-
-#endif
-
-#endif	/* __XFS_REFCACHE_H__ */

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2007-12-17  3:26 [review] Remove the xfs refcache Donald Douwsma
@ 2007-12-17  4:00 ` Lachlan McIlroy
  2007-12-17  4:12   ` Donald Douwsma
                     ` (2 more replies)
  2008-02-07  8:35 ` Christoph Hellwig
  1 sibling, 3 replies; 13+ messages in thread
From: Lachlan McIlroy @ 2007-12-17  4:00 UTC (permalink / raw)
  To: Donald Douwsma; +Cc: xfs-oss

Hey hold on there buddy!  We may need to reactivate this code to fix some
performance issues.  I've acttually got this code working and proven that
it is one way to fix some of the NAS/NFS issues we have.

The little comment that reads "reference cache not needed for NFS in 2.6"
is wrong - we do need it or something like it.

Donald Douwsma wrote:
> Remove the xfs_refcache, it was only needed while we were still building 
> for
> 2.4 kernels.
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2007-12-17  4:00 ` Lachlan McIlroy
@ 2007-12-17  4:12   ` Donald Douwsma
  2007-12-17  5:48   ` Eric Sandeen
  2007-12-17  7:14   ` Christoph Hellwig
  2 siblings, 0 replies; 13+ messages in thread
From: Donald Douwsma @ 2007-12-17  4:12 UTC (permalink / raw)
  To: lachlan; +Cc: xfs-oss

Lachlan McIlroy wrote:
> Hey hold on there buddy!  We may need to reactivate this code to fix some
> performance issues.  I've acttually got this code working and proven that
> it is one way to fix some of the NAS/NFS issues we have.
> 
> The little comment that reads "reference cache not needed for NFS in 2.6"
> is wrong - we do need it or something like it.

Cool :)

I'm just trying to clean up any 2.4 build leftovers before things go
upstream for .25. Bear in mind that xfs_refcache.c is only present in
ptools/CVS, it was excluded for mainline when it wasn't needed for the
2.6 builds.

Don

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2007-12-17  4:00 ` Lachlan McIlroy
  2007-12-17  4:12   ` Donald Douwsma
@ 2007-12-17  5:48   ` Eric Sandeen
  2007-12-17  7:15     ` Christoph Hellwig
  2007-12-17  7:14   ` Christoph Hellwig
  2 siblings, 1 reply; 13+ messages in thread
From: Eric Sandeen @ 2007-12-17  5:48 UTC (permalink / raw)
  To: lachlan; +Cc: Donald Douwsma, xfs-oss

Lachlan McIlroy wrote:
> Hey hold on there buddy!  We may need to reactivate this code to fix some
> performance issues.  I've acttually got this code working and proven that
> it is one way to fix some of the NAS/NFS issues we have.
> 
> The little comment that reads "reference cache not needed for NFS in 2.6"
> is wrong - we do need it or something like it.

So, the whole reason this was not built for 2.6 was that ostensibly the
2.6 kernel already did this sort of caching for nfs.  But, last time I
looked I didn't see anything obvious.  Anybody know? :)

-Eric


> Donald Douwsma wrote:
>> Remove the xfs_refcache, it was only needed while we were still building 
>> for
>> 2.4 kernels.
>>
>>
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2007-12-17  4:00 ` Lachlan McIlroy
  2007-12-17  4:12   ` Donald Douwsma
  2007-12-17  5:48   ` Eric Sandeen
@ 2007-12-17  7:14   ` Christoph Hellwig
  2007-12-17  7:39     ` Greg Banks
  2007-12-18  4:12     ` Lachlan McIlroy
  2 siblings, 2 replies; 13+ messages in thread
From: Christoph Hellwig @ 2007-12-17  7:14 UTC (permalink / raw)
  To: Lachlan McIlroy; +Cc: Donald Douwsma, xfs-oss, gnb

On Mon, Dec 17, 2007 at 03:00:04PM +1100, Lachlan McIlroy wrote:
> Hey hold on there buddy!  We may need to reactivate this code to fix some
> performance issues.  I've acttually got this code working and proven that
> it is one way to fix some of the NAS/NFS issues we have.
>
> The little comment that reads "reference cache not needed for NFS in 2.6"
> is wrong - we do need it or something like it.

Not in XFS, though.  This thing needs to be done genericly in NFSD.
Greg has been working on an open files cache in nfsd which should be
helping this.

Adding a cache like this back into XFS will get my veto (at least for
mainline where I have a bit of a say :))

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2007-12-17  5:48   ` Eric Sandeen
@ 2007-12-17  7:15     ` Christoph Hellwig
  0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2007-12-17  7:15 UTC (permalink / raw)
  To: Eric Sandeen; +Cc: lachlan, Donald Douwsma, xfs-oss

On Sun, Dec 16, 2007 at 11:48:39PM -0600, Eric Sandeen wrote:
> Lachlan McIlroy wrote:
> > Hey hold on there buddy!  We may need to reactivate this code to fix some
> > performance issues.  I've acttually got this code working and proven that
> > it is one way to fix some of the NAS/NFS issues we have.
> > 
> > The little comment that reads "reference cache not needed for NFS in 2.6"
> > is wrong - we do need it or something like it.
> 
> So, the whole reason this was not built for 2.6 was that ostensibly the
> 2.6 kernel already did this sort of caching for nfs.  But, last time I
> looked I didn't see anything obvious.  Anybody know? :)

nfsd in 2.6 keeps the inodes around, which is 90% of what the open files
cache did.  Now what it also does is to not release preallocations
everytime but keep them while the inode is in the refcache.  For that
we'll need some additional support, and the best would be an open files
cache like the one Greg has been hacking on for quite a while.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2007-12-17  7:14   ` Christoph Hellwig
@ 2007-12-17  7:39     ` Greg Banks
  2007-12-18  4:12     ` Lachlan McIlroy
  1 sibling, 0 replies; 13+ messages in thread
From: Greg Banks @ 2007-12-17  7:39 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Lachlan McIlroy, Donald Douwsma, xfs-oss

On Mon, Dec 17, 2007 at 07:14:27AM +0000, Christoph Hellwig wrote:
> On Mon, Dec 17, 2007 at 03:00:04PM +1100, Lachlan McIlroy wrote:
> > Hey hold on there buddy!  We may need to reactivate this code to fix some
> > performance issues.  I've acttually got this code working and proven that
> > it is one way to fix some of the NAS/NFS issues we have.
> >
> > The little comment that reads "reference cache not needed for NFS in 2.6"
> > is wrong - we do need it or something like it.
> 
> Not in XFS, though.  This thing needs to be done genericly in NFSD.
> Greg has been working on an open files cache in nfsd which should be
> helping this.

BTW, status on that: The current implementation has mutated into
a filehandle-to-dentry mapping cache, because that helps some
metadata-heavy workloads too.  I've been testing it these last few
weeks at a customer site, and it's been useful, but there are still
some issues to be worked out before I can post it.

> Adding a cache like this back into XFS will get my veto (at least for
> mainline where I have a bit of a say :))

Greg.
-- 
Greg Banks, R&D Software Engineer, SGI Australian Software Group.
Apparently, I'm Bedevere.  Which MPHG character are you?
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2007-12-17  7:14   ` Christoph Hellwig
  2007-12-17  7:39     ` Greg Banks
@ 2007-12-18  4:12     ` Lachlan McIlroy
  2007-12-18  7:44       ` David Chinner
  1 sibling, 1 reply; 13+ messages in thread
From: Lachlan McIlroy @ 2007-12-18  4:12 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Donald Douwsma, xfs-oss, gnb

Christoph Hellwig wrote:
> On Mon, Dec 17, 2007 at 03:00:04PM +1100, Lachlan McIlroy wrote:
>> Hey hold on there buddy!  We may need to reactivate this code to fix some
>> performance issues.  I've acttually got this code working and proven that
>> it is one way to fix some of the NAS/NFS issues we have.
>>
>> The little comment that reads "reference cache not needed for NFS in 2.6"
>> is wrong - we do need it or something like it.
> 
> Not in XFS, though.  This thing needs to be done genericly in NFSD.
> Greg has been working on an open files cache in nfsd which should be
> helping this.
> 
> Adding a cache like this back into XFS will get my veto (at least for
> mainline where I have a bit of a say :))
> 

Greg's NFS OFC will provide better performance for strictly NFS workloads
and if all we are trying to fix here is the NFS issues then I agree with
you that the OFC should not go into XFS.

Since I have been able to reproduce some of our NAS/NFS performance problems
without NFS (that is demonstrate that the problems are in XFS) it makes some
sense to fix these in XFS.  I have observed that for some non-NFS workloads
we see a significant reduction in log traffic with the OFC in XFS so for
reasons beyond NFS there may be a need to reactivate the refcache code.  For
the moment we are still analysing the pros/cons.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2007-12-18  4:12     ` Lachlan McIlroy
@ 2007-12-18  7:44       ` David Chinner
  2007-12-19  3:43         ` David Chinner
  0 siblings, 1 reply; 13+ messages in thread
From: David Chinner @ 2007-12-18  7:44 UTC (permalink / raw)
  To: Lachlan McIlroy; +Cc: Christoph Hellwig, Donald Douwsma, xfs-oss, gnb

On Tue, Dec 18, 2007 at 03:12:02PM +1100, Lachlan McIlroy wrote:
> Since I have been able to reproduce some of our NAS/NFS performance problems
> without NFS (that is demonstrate that the problems are in XFS) it makes some
> sense to fix these in XFS.  I have observed that for some non-NFS workloads
> we see a significant reduction in log traffic with the OFC in XFS so for
> reasons beyond NFS there may be a need to reactivate the refcache code.  For
> the moment we are still analysing the pros/cons.

Reactivating the ref cache is fundamentally the wrong thing to do.
Most of these problems come from the mismatch of inode life cycles
between Linux and XFS and this is the basic problem we need to solve.

For example - do the open-write-close related performance issues go
away if you remove the xfs_free_eofblocks() call in xfs_release()?
i.e. are we just being stupid about the way we deal with closing
of file descriptors?

This should work because the linux inode will remain around with a
ref-count of 1 on the unused list due to the dentry pinning it
in place. Only when the dentry gets reclaimed (e.g. memory pressure,
unlink, unmount, etc) will the truncate occur, and hence repeated
single file open-write-close based workloads (like the nfsd) don't
issue a truncate transaction and trash the EOF preallocation on
every close....

And look at the code - the *only thing* the refcache does is avoid
the truncate in xfs_release(). So, the patch below is the equivalent
of re-introducing the refcache into XFS but uses the linux inode
life cycle to keep references around.

FWIW, this means that EOF pre-allocations will not get trimmed
immediately which may have disk usage implications for users with
small quotas, those that create lots of small files, or there are
lots of written inodes with prealocated space cached in memory
when a crash occurs.

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group


Don't truncate EOF blocks on ->release

Avoid truncating EOF blocks on final close of an filp and
delay it till the inode is reclaimed. While this puts more
pressure on xfs_inactive() during reclaim, it means that
we avoid lots of needless transactions on open-write-close
type workloads (eg as done by the nfsd code).

Note that this means that while inodes are cached in memory
they may be consuming more blocks than their size may indicate
and this space will not be reclaimed if the machine crashes.

Signed-off-by: Dave Chinner <dgc@sgi.com>
---
 fs/xfs/xfs_vnodeops.c |   24 ------------------------
 1 file changed, 24 deletions(-)

Index: 2.6.x-xfs-new/fs/xfs/xfs_vnodeops.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/xfs/xfs_vnodeops.c	2007-12-10 12:04:17.000000000 +1100
+++ 2.6.x-xfs-new/fs/xfs/xfs_vnodeops.c	2007-12-18 18:17:57.188146022 +1100
@@ -1504,7 +1504,6 @@ xfs_release(
 {
 	bhv_vnode_t	*vp = XFS_ITOV(ip);
 	xfs_mount_t	*mp = ip->i_mount;
-	int		error;
 
 	if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0))
 		return 0;
@@ -1540,29 +1539,6 @@ xfs_release(
 		if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
 			xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
 	}
-
-#ifdef HAVE_REFCACHE
-	/* If we are in the NFS reference cache then don't do this now */
-	if (ip->i_refcache)
-		return 0;
-#endif
-
-	if (ip->i_d.di_nlink != 0) {
-		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
-		     ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
-		       ip->i_delayed_blks > 0)) &&
-		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
-		    (!(ip->i_d.di_flags &
-				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
-			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
-			if (error)
-				return error;
-			/* Update linux inode block count after free above */
-			vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
-				ip->i_d.di_nblocks + ip->i_delayed_blks);
-		}
-	}
-
 	return 0;
 }
 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2007-12-18  7:44       ` David Chinner
@ 2007-12-19  3:43         ` David Chinner
  0 siblings, 0 replies; 13+ messages in thread
From: David Chinner @ 2007-12-19  3:43 UTC (permalink / raw)
  To: David Chinner
  Cc: Lachlan McIlroy, Christoph Hellwig, Donald Douwsma, xfs-oss, gnb

On Tue, Dec 18, 2007 at 06:44:05PM +1100, David Chinner wrote:
> On Tue, Dec 18, 2007 at 03:12:02PM +1100, Lachlan McIlroy wrote:
> > Since I have been able to reproduce some of our NAS/NFS performance problems
> > without NFS (that is demonstrate that the problems are in XFS) it makes some
> > sense to fix these in XFS.  I have observed that for some non-NFS workloads
> > we see a significant reduction in log traffic with the OFC in XFS so for
> > reasons beyond NFS there may be a need to reactivate the refcache code.  For
> > the moment we are still analysing the pros/cons.
> 
> Reactivating the ref cache is fundamentally the wrong thing to do.
> Most of these problems come from the mismatch of inode life cycles
> between Linux and XFS and this is the basic problem we need to solve.
> 
> For example - do the open-write-close related performance issues go
> away if you remove the xfs_free_eofblocks() call in xfs_release()?
> i.e. are we just being stupid about the way we deal with closing
> of file descriptors?
> 
> This should work because the linux inode will remain around with a
> ref-count of 1 on the unused list due to the dentry pinning it
> in place. Only when the dentry gets reclaimed (e.g. memory pressure,
> unlink, unmount, etc) will the truncate occur, and hence repeated
> single file open-write-close based workloads (like the nfsd) don't
> issue a truncate transaction and trash the EOF preallocation on
> every close....
> 
> And look at the code - the *only thing* the refcache does is avoid
> the truncate in xfs_release(). So, the patch below is the equivalent
> of re-introducing the refcache into XFS but uses the linux inode
> life cycle to keep references around.
> 
> FWIW, this means that EOF pre-allocations will not get trimmed
> immediately which may have disk usage implications for users with
> small quotas, those that create lots of small files, or there are
> lots of written inodes with prealocated space cached in memory
> when a crash occurs.

FYI - numbers to back this up.

As an example of where the failure to truncate EOF blocks (i.e.
speculative preallocation) is bad, try creating several thousand
small files (say 1 byte) and seeing how long they take to sync
to disk.

With EOF truncation, we get all the data blocks allocated adjacently
so the elevator merges them together and we see large I/Os going
to disk (i.e. 512k I/os going to disk where 128 different file datai
writes have been merged).

Without EOF truncation, these files retain their speculative allocation
(default 64k) and so when written out we get a stream of 4k I/Os
separated by 64k. That is, one seek per inode written out instead of
it being large sequential I/O convering 128 files per I/O.

To demonstrate, sequntial creation of 1 byte files in a 30s period
followed by a (timed) sync:

With EOF truncation:

                      Creates                |       Deletes
Loads    Files  rate  usr   sys   intr  csw/s|  rate  usr   sys   intr  csw/s
-----  -------  ---- ----- ----- -----  -----|  ---- ----- ----- -----  -----
    1    39312  1070   6.8  91.8   4.3   1959   1572   0.9 107.1   0.4   1109
    2    68458  1627   9.9 155.4   2.6   1316   2535   1.7 207.5   0.7   1157

Without EOF truncation:

                      Creates                |       Deletes
Loads    Files  rate  usr   sys   intr  csw/s|  rate  usr   sys   intr  csw/s
-----  -------  ---- ----- ----- -----  -----|  ---- ----- ----- -----  -----
    1    42691   461   3.0  37.7   2.2   1535   1579   1.0 123.2   0.6   1105
    2    72785   530   3.3  44.4   2.8   1684   1774  37.8 179.7   5.1   2754

Note that without EOF truncation we create 5-10% more files in the
30s period this test ran for (due to it being CPU bound and not
issuing empty EOF truncation transactions), but the overall rate
includes the time it takes to write the data to disk as well. The
data write is far slower without EOF truncation....

Hence we see that the overall create+data write rate suffers
*greatly* due to the lack of EOF truncation, and hence why
avoiding EOF truncation on file close for local I/O is generally
considered a bad thing.

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2007-12-17  3:26 [review] Remove the xfs refcache Donald Douwsma
  2007-12-17  4:00 ` Lachlan McIlroy
@ 2008-02-07  8:35 ` Christoph Hellwig
  2008-02-11 13:00   ` Donald Douwsma
  1 sibling, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2008-02-07  8:35 UTC (permalink / raw)
  To: Donald Douwsma; +Cc: xfs-oss

On Mon, Dec 17, 2007 at 02:26:30PM +1100, Donald Douwsma wrote:
> Remove the xfs_refcache, it was only needed while we were still building for
> 2.4 kernels.

Given that we finally agreed that this form of refcache shouldn't come
back can you commit the patch?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2008-02-07  8:35 ` Christoph Hellwig
@ 2008-02-11 13:00   ` Donald Douwsma
  2008-02-11 16:32     ` Christoph Hellwig
  0 siblings, 1 reply; 13+ messages in thread
From: Donald Douwsma @ 2008-02-11 13:00 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs-oss

Christoph Hellwig wrote:
> On Mon, Dec 17, 2007 at 02:26:30PM +1100, Donald Douwsma wrote:
>> Remove the xfs_refcache, it was only needed while we were still building for
>> 2.4 kernels.
> 
> Given that we finally agreed that this form of refcache shouldn't come
> back can you commit the patch?

Done. I'll see if we can get it committed with the Makefile cleanups, that
should be the end of the 2.4 crud.

Don

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [review] Remove the xfs refcache
  2008-02-11 13:00   ` Donald Douwsma
@ 2008-02-11 16:32     ` Christoph Hellwig
  0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2008-02-11 16:32 UTC (permalink / raw)
  To: Donald Douwsma; +Cc: xfs-oss

On Tue, Feb 12, 2008 at 12:00:54AM +1100, Donald Douwsma wrote:
> > Given that we finally agreed that this form of refcache shouldn't come
> > back can you commit the patch?
> 
> Done. I'll see if we can get it committed with the Makefile cleanups, that
> should be the end of the 2.4 crud.

Thanks!

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2008-02-11 16:32 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-12-17  3:26 [review] Remove the xfs refcache Donald Douwsma
2007-12-17  4:00 ` Lachlan McIlroy
2007-12-17  4:12   ` Donald Douwsma
2007-12-17  5:48   ` Eric Sandeen
2007-12-17  7:15     ` Christoph Hellwig
2007-12-17  7:14   ` Christoph Hellwig
2007-12-17  7:39     ` Greg Banks
2007-12-18  4:12     ` Lachlan McIlroy
2007-12-18  7:44       ` David Chinner
2007-12-19  3:43         ` David Chinner
2008-02-07  8:35 ` Christoph Hellwig
2008-02-11 13:00   ` Donald Douwsma
2008-02-11 16:32     ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox