linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Lameter <clameter@sgi.com>
To: akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org,
	Alexander Viro <viro@ftp.linux.org.uk>,
	Christoph Hellwig <hch@infradead.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: Mel Gorman <mel@skynet.ie>
Cc: andi@firstfloor.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: mpm@selenic.com
Subject: [patch 11/21] inodes: Support generic defragmentation
Date: Fri, 09 May 2008 20:08:42 -0700	[thread overview]
Message-ID: <20080510030917.178534099@sgi.com> (raw)
In-Reply-To: 20080510030831.796641881@sgi.com

[-- Attachment #1: 0025-inodes-Support-generic-defragmentation.patch --]
[-- Type: text/plain, Size: 5171 bytes --]

This implements the ability to remove inodes in a particular slab
from inode caches. In order to remove an inode we may have to write out
the pages of an inode, the inode itself and remove the dentries referring
to the node.

Provide generic functionality that can be used by filesystems that have
their own inode caches to also tie into the defragmentation functions
that are made available here.

Cc: Alexander Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 fs/inode.c         |  123 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |    6 ++
 2 files changed, 129 insertions(+)

Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c	2008-05-07 20:27:10.563908386 -0700
+++ linux-2.6/fs/inode.c	2008-05-07 20:48:14.473081107 -0700
@@ -1363,6 +1363,128 @@ static int __init set_ihash_entries(char
 __setup("ihash_entries=", set_ihash_entries);
 
 /*
+ * Obtain a refcount on a list of struct inodes pointed to by v. If the
+ * inode is in the process of being freed then zap the v[] entry so that
+ * we skip the freeing attempts later.
+ *
+ * This is a generic function for the ->get slab defrag callback.
+ */
+void *get_inodes(struct kmem_cache *s, int nr, void **v)
+{
+	int i;
+
+	spin_lock(&inode_lock);
+	for (i = 0; i < nr; i++) {
+		struct inode *inode = v[i];
+
+		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+			v[i] = NULL;
+		else
+			__iget(inode);
+	}
+	spin_unlock(&inode_lock);
+	return NULL;
+}
+EXPORT_SYMBOL(get_inodes);
+
+/*
+ * Function for filesystems that embedd struct inode into their own
+ * fs inode. The offset is the offset of the struct inode in the fs inode.
+ *
+ * The function adds to the pointers in v[] in order to make them point to
+ * struct inode. Then get_inodes() is used to get the refcount.
+ * The converted v[] pointers can then also be passed to the kick() callback
+ * without further processing.
+ */
+void *fs_get_inodes(struct kmem_cache *s, int nr, void **v,
+						unsigned long offset)
+{
+	int i;
+
+	for (i = 0; i < nr; i++)
+		v[i] += offset;
+
+	return get_inodes(s, nr, v);
+}
+EXPORT_SYMBOL(fs_get_inodes);
+
+/*
+ * Generic callback function slab defrag ->kick methods. Takes the
+ * array with inodes where we obtained refcounts using fs_get_inodes()
+ * or get_inodes() and tries to free them.
+ */
+void kick_inodes(struct kmem_cache *s, int nr, void **v, void *private)
+{
+	struct inode *inode;
+	int i;
+	int abort = 0;
+	LIST_HEAD(freeable);
+	int active;
+
+	for (i = 0; i < nr; i++) {
+		inode = v[i];
+		if (!inode)
+			continue;
+
+		if (inode_has_buffers(inode) || inode->i_data.nrpages) {
+			if (remove_inode_buffers(inode))
+				/*
+				 * Should be really be doing this? Or
+				 * limit this if there are only a few pages?
+				 *
+				 * Possibly an expensive operation but we
+				 * cannot reclaim the inode if the pages
+				 * are still present.
+				 */
+				invalidate_mapping_pages(&inode->i_data,
+								0, -1);
+		}
+
+		/* Invalidate children and dentry */
+		if (S_ISDIR(inode->i_mode)) {
+			struct dentry *d = d_find_alias(inode);
+
+			if (d) {
+				d_invalidate(d);
+				dput(d);
+			}
+		}
+
+		if (inode->i_state & I_DIRTY)
+			write_inode_now(inode, 1);
+
+		d_prune_aliases(inode);
+	}
+
+	mutex_lock(&iprune_mutex);
+	for (i = 0; i < nr; i++) {
+		inode = v[i];
+
+		if (!inode)
+			/* inode is alrady being freed */
+			continue;
+
+		active = inode->i_sb->s_flags & MS_ACTIVE;
+		iput(inode);
+		if (abort || !active)
+			continue;
+
+		spin_lock(&inode_lock);
+		abort =  !can_unuse(inode);
+
+		if (!abort) {
+			list_move(&inode->i_list, &freeable);
+			inode->i_state |= I_FREEING;
+			inodes_stat.nr_unused--;
+		}
+		spin_unlock(&inode_lock);
+	}
+	dispose_list(&freeable);
+	mutex_unlock(&iprune_mutex);
+}
+EXPORT_SYMBOL(kick_inodes);
+
+/*
  * Initialize the waitqueues and inode hash table.
  */
 void __init inode_init_early(void)
@@ -1401,6 +1523,7 @@ void __init inode_init(void)
 					 SLAB_MEM_SPREAD),
 					 init_once);
 	register_shrinker(&icache_shrinker);
+	kmem_cache_setup_defrag(inode_cachep, get_inodes, kick_inodes);
 
 	/* Hash may have been set up in inode_init_early */
 	if (!hashdist)
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h	2008-05-07 20:27:10.573910205 -0700
+++ linux-2.6/include/linux/fs.h	2008-05-07 20:30:15.153909886 -0700
@@ -1826,6 +1826,12 @@ static inline void insert_inode_hash(str
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
+/* Helper functions for inode defragmentation support in filesystems */
+extern void kick_inodes(struct kmem_cache *, int, void **, void *);
+extern void *get_inodes(struct kmem_cache *, int nr, void **);
+extern void *fs_get_inodes(struct kmem_cache *, int nr, void **,
+						unsigned long offset);
+
 extern struct file * get_empty_filp(void);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);

-- 

  parent reply	other threads:[~2008-05-10  3:09 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-05-10  3:08 [patch 00/21] Slab Fragmentation Reduction V12 Christoph Lameter
2008-05-10  3:08 ` [patch 01/21] slub: Add defrag_ratio field and sysfs support Christoph Lameter
2008-05-10  3:08 ` [patch 02/21] slub: Replace ctor field with ops field in /sys/slab/* Christoph Lameter
2008-05-10  3:08 ` [patch 03/21] slub: Add get() and kick() methods Christoph Lameter
2008-05-10  3:08 ` [patch 04/21] slub: Sort slab cache list and establish maximum objects for defrag slabs Christoph Lameter
2008-05-10  3:08 ` [patch 05/21] slub: Slab defrag core Christoph Lameter
2008-05-10  3:08 ` [patch 06/21] slub: Add KICKABLE to avoid repeated kick() attempts Christoph Lameter
2008-05-10  3:08 ` [patch 07/21] slub: Extend slabinfo to support -D and -F options Christoph Lameter
2008-05-10  3:08 ` [patch 08/21] slub: add defrag statistics Christoph Lameter
2008-05-10  3:08 ` [patch 09/21] slub: Trigger defragmentation from memory reclaim Christoph Lameter
2008-05-10  3:08 ` [patch 10/21] buffer heads: Support slab defrag Christoph Lameter
2008-05-12  0:24   ` David Chinner
2008-05-15 17:42     ` Christoph Lameter
2008-05-15 23:10       ` David Chinner
2008-05-16 17:01         ` Christoph Lameter
2008-05-19  5:45           ` David Chinner
2008-05-19 16:44             ` Christoph Lameter
2008-05-20  0:25               ` David Chinner
2008-05-20  6:56                 ` Evgeniy Polyakov
2008-05-20 21:46                   ` David Chinner
2008-05-20 22:25                     ` Evgeniy Polyakov
2008-05-20 23:19                       ` David Chinner
2008-05-20 23:28                         ` Andrew Morton
2008-05-21  6:15                           ` Evgeniy Polyakov
2008-05-21  6:24                             ` Andrew Morton
2008-05-21 17:52                               ` iput() in reclaim context Hugh Dickins
2008-05-21 17:58                                 ` Evgeniy Polyakov
2008-05-21 18:12                                 ` Andrew Morton
2008-05-20 23:22                       ` [patch 10/21] buffer heads: Support slab defrag Evgeniy Polyakov
2008-05-20 23:30                         ` David Chinner
2008-05-21  6:20                           ` Evgeniy Polyakov
2008-05-21  1:56                         ` Christoph Lameter
2008-05-20 22:53             ` Jamie Lokier
2008-05-10  3:08 ` Christoph Lameter [this message]
2008-05-10  3:08 ` [patch 12/21] Filesystem: Ext2 filesystem defrag Christoph Lameter
2008-05-10  3:08 ` [patch 13/21] Filesystem: Ext3 " Christoph Lameter
2008-05-10  3:08 ` [patch 14/21] Filesystem: Ext4 " Christoph Lameter
2008-05-10  3:08 ` [patch 15/21] Filesystem: XFS slab defragmentation Christoph Lameter
2008-05-10  6:55   ` Christoph Hellwig
2008-05-10  3:08 ` [patch 16/21] Filesystem: /proc filesystem support for slab defrag Christoph Lameter
2008-05-10  3:08 ` [patch 17/21] Filesystem: Slab defrag: Reiserfs support Christoph Lameter
2008-05-10  3:08 ` [patch 18/21] Filesystem: Socket inode defragmentation Christoph Lameter
2008-05-13 13:28   ` Evgeniy Polyakov
2008-05-15 17:40     ` Christoph Lameter
2008-05-15 18:23       ` Evgeniy Polyakov
2008-05-10  3:08 ` [patch 19/21] dentries: Add constructor Christoph Lameter
2008-05-10  3:08 ` [patch 20/21] dentries: dentry defragmentation Christoph Lameter
2008-05-10  3:08 ` [patch 21/21] slab defrag: Obsolete SLAB Christoph Lameter
2008-05-10  9:53   ` Andi Kleen
2008-05-11  2:15     ` Rik van Riel
2008-05-12  7:38       ` KOSAKI Motohiro
2008-05-12  7:54         ` Pekka Enberg
2008-05-12 10:08           ` Andi Kleen
2008-05-12 10:23             ` Pekka Enberg
2008-05-14 17:30               ` Christoph Lameter
2008-05-14 17:29           ` Christoph Lameter
2008-05-14 17:49             ` Andi Kleen
2008-05-14 18:03               ` Christoph Lameter
2008-05-14 18:18                 ` Matt Mackall
2008-05-14 19:21                   ` Christoph Lameter
2008-05-14 19:49                     ` Matt Mackall
2008-05-14 20:33                       ` Christoph Lameter
2008-05-14 21:02                         ` Matt Mackall
2008-05-14 21:26                           ` Christoph Lameter
2008-05-14 21:54                             ` Matt Mackall
2008-05-15 17:15                               ` Christoph Lameter
2008-05-15  3:26                 ` Zhang, Yanmin
2008-05-15 17:05                   ` Christoph Lameter
2008-05-15 17:49                     ` Matthew Wilcox
2008-05-15 17:58                       ` Christoph Lameter
2008-05-15 18:13                         ` Matthew Wilcox
2008-05-15 18:43                           ` Christoph Lameter
2008-05-15 18:51                             ` Matthew Wilcox
2008-05-15 19:09                               ` Christoph Lameter
2008-05-15 19:29                                 ` Matthew Wilcox
2008-05-15 20:14                                   ` Matthew Wilcox
2008-05-15 20:30                                     ` Pekka Enberg
2008-05-16 19:17                                     ` Christoph Lameter
2008-05-16 19:06                                   ` Christoph Lameter
2008-05-15 18:19                       ` Eric Dumazet
2008-05-15 18:29                       ` Vegard Nossum
2008-05-16  5:16                     ` Zhang, Yanmin
2008-05-14 18:05               ` Christoph Lameter
2008-05-14 20:46                 ` Christoph Lameter
2008-05-14 20:58                   ` Matthew Wilcox
2008-05-14 21:00                     ` Christoph Lameter
2008-05-14 21:21                       ` Matthew Wilcox
2008-05-14 21:33                         ` Christoph Lameter
2008-05-14 21:43                           ` Matthew Wilcox
2008-05-14 21:53                             ` Christoph Lameter
2008-05-14 22:00                               ` Matthew Wilcox
2008-05-14 22:32                                 ` Christoph Lameter
2008-05-14 22:34                                 ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080510030917.178534099@sgi.com \
    --to=clameter@sgi.com \
    --cc=akpm@linux-foundation.org \
    --cc=hch@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@ftp.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).