From: Dave Chinner <david@fromorbit.com>
To: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, linux-mm@kvack.org, xfs@oss.sgi.com
Subject: [PATCH 1/5] inode: Make unused inode LRU per superblock
Date: Tue, 25 May 2010 18:53:04 +1000 [thread overview]
Message-ID: <1274777588-21494-2-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1274777588-21494-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
The inode unused list is currently a global LRU. This does not match
the other global filesystem cache - the dentry cache - which uses
per-superblock LRU lists. Hence we have related filesystem object
types using different LRU reclaimatin schemes.
To enable a per-superblock filesystem cache shrinker, both of these
caches need to have per-sb unused object LRU lists. Hence this patch
converts the global inode LRU to per-sb LRUs.
The patch only does rudimentary per-sb propotioning in the shrinker
infrastructure, as this gets removed when the per-sb shrinker
callouts are introduced later on.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/fs-writeback.c | 2 +-
fs/inode.c | 87 +++++++++++++++++++++++++++++++++++++++-----
fs/super.c | 1 +
include/linux/fs.h | 4 ++
include/linux/writeback.h | 1 -
5 files changed, 83 insertions(+), 12 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5c4161f..b1e76ef 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -565,7 +565,7 @@ select_queue:
/*
* The inode is clean, unused
*/
- list_move(&inode->i_list, &inode_unused);
+ list_move(&inode->i_list, &inode->i_sb->s_inode_lru);
}
}
inode_sync_complete(inode);
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20a..3caa758 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,7 @@
#include <linux/mount.h>
#include <linux/async.h>
#include <linux/posix_acl.h>
+#include "internal.h"
/*
* This is needed for the following functions:
@@ -74,7 +75,6 @@ static unsigned int i_hash_shift __read_mostly;
*/
LIST_HEAD(inode_in_use);
-LIST_HEAD(inode_unused);
static struct hlist_head *inode_hashtable __read_mostly;
/*
@@ -292,6 +292,7 @@ void __iget(struct inode *inode)
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
list_move(&inode->i_list, &inode_in_use);
inodes_stat.nr_unused--;
+ inode->i_sb->s_nr_inodes_unused--;
}
/**
@@ -386,6 +387,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
invalidate_inode_buffers(inode);
if (!atomic_read(&inode->i_count)) {
list_move(&inode->i_list, dispose);
+ inode->i_sb->s_nr_inodes_unused--;
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
count++;
@@ -444,32 +446,31 @@ static int can_unuse(struct inode *inode)
*
* Any inodes which are pinned purely because of attached pagecache have their
* pagecache removed. We expect the final iput() on that inode to add it to
- * the front of the inode_unused list. So look for it there and if the
+ * the front of the sb->s_inode_lru list. So look for it there and if the
* inode is still freeable, proceed. The right inode is found 99.9% of the
* time in testing on a 4-way.
*
* If the inode has metadata buffers attached to mapping->private_list then
* try to remove them.
*/
-static void prune_icache(int nr_to_scan)
+static void shrink_icache_sb(struct super_block *sb, int *nr_to_scan)
{
LIST_HEAD(freeable);
int nr_pruned = 0;
int nr_scanned;
unsigned long reap = 0;
- down_read(&iprune_sem);
spin_lock(&inode_lock);
- for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
+ for (nr_scanned = *nr_to_scan; nr_scanned >= 0; nr_scanned--) {
struct inode *inode;
- if (list_empty(&inode_unused))
+ if (list_empty(&sb->s_inode_lru))
break;
- inode = list_entry(inode_unused.prev, struct inode, i_list);
+ inode = list_entry(sb->s_inode_lru.prev, struct inode, i_list);
if (inode->i_state || atomic_read(&inode->i_count)) {
- list_move(&inode->i_list, &inode_unused);
+ list_move(&inode->i_list, &sb->s_inode_lru);
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
@@ -481,7 +482,7 @@ static void prune_icache(int nr_to_scan)
iput(inode);
spin_lock(&inode_lock);
- if (inode != list_entry(inode_unused.next,
+ if (inode != list_entry(sb->s_inode_lru.next,
struct inode, i_list))
continue; /* wrong inode or list_empty */
if (!can_unuse(inode))
@@ -493,13 +494,77 @@ static void prune_icache(int nr_to_scan)
nr_pruned++;
}
inodes_stat.nr_unused -= nr_pruned;
+ sb->s_nr_inodes_unused -= nr_pruned;
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
spin_unlock(&inode_lock);
+ *nr_to_scan = nr_scanned;
dispose_list(&freeable);
+}
+
+static void prune_icache(int count)
+{
+ struct super_block *sb, *n;
+ int w_count;
+ int unused = inodes_stat.nr_unused;
+ int prune_ratio;
+ int pruned;
+
+ if (unused == 0 || count == 0)
+ return;
+ down_read(&iprune_sem);
+ if (count >= unused)
+ prune_ratio = 1;
+ else
+ prune_ratio = unused / count;
+ spin_lock(&sb_lock);
+ list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+ if (list_empty(&sb->s_instances))
+ continue;
+ if (sb->s_nr_inodes_unused == 0)
+ continue;
+ sb->s_count++;
+ /* Now, we reclaim unused dentrins with fairness.
+ * We reclaim them same percentage from each superblock.
+ * We calculate number of dentries to scan on this sb
+ * as follows, but the implementation is arranged to avoid
+ * overflows:
+ * number of dentries to scan on this sb =
+ * count * (number of dentries on this sb /
+ * number of dentries in the machine)
+ */
+ spin_unlock(&sb_lock);
+ if (prune_ratio != 1)
+ w_count = (sb->s_nr_inodes_unused / prune_ratio) + 1;
+ else
+ w_count = sb->s_nr_inodes_unused;
+ pruned = w_count;
+ /*
+ * We need to be sure this filesystem isn't being unmounted,
+ * otherwise we could race with generic_shutdown_super(), and
+ * end up holding a reference to an inode while the filesystem
+ * is unmounted. So we try to get s_umount, and make sure
+ * s_root isn't NULL.
+ */
+ if (down_read_trylock(&sb->s_umount)) {
+ if ((sb->s_root != NULL) &&
+ (!list_empty(&sb->s_inode_lru))) {
+ shrink_icache_sb(sb, &w_count);
+ pruned -= w_count;
+ }
+ up_read(&sb->s_umount);
+ }
+ spin_lock(&sb_lock);
+ count -= pruned;
+ __put_super(sb);
+ /* more work left to do? */
+ if (count <= 0)
+ break;
+ }
+ spin_unlock(&sb_lock);
up_read(&iprune_sem);
}
@@ -1238,8 +1303,9 @@ int generic_detach_inode(struct inode *inode)
if (!hlist_unhashed(&inode->i_hash)) {
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
- list_move(&inode->i_list, &inode_unused);
+ list_move(&inode->i_list, &sb->s_inode_lru);
inodes_stat.nr_unused++;
+ sb->s_nr_inodes_unused++;
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode_lock);
return 0;
@@ -1252,6 +1318,7 @@ int generic_detach_inode(struct inode *inode)
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
inodes_stat.nr_unused--;
+ sb->s_nr_inodes_unused--;
hlist_del_init(&inode->i_hash);
}
list_del_init(&inode->i_list);
diff --git a/fs/super.c b/fs/super.c
index 69688b1..c554c53 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -60,6 +60,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
+ INIT_LIST_HEAD(&s->s_inode_lru);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b336cb9..7b90c43 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1346,6 +1346,10 @@ struct super_block {
struct list_head s_dentry_lru; /* unused dentry lru */
int s_nr_dentry_unused; /* # of dentry on lru */
+ /* s_inode_lru and s_nr_inodes_unused are protected by inode_lock */
+ struct list_head s_inode_lru; /* unused inode lru */
+ int s_nr_inodes_unused; /* # of inodes on lru */
+
struct block_device *s_bdev;
struct backing_dev_info *s_bdi;
struct mtd_info *s_mtd;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index cc97d6c..a74837e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -11,7 +11,6 @@ struct backing_dev_info;
extern spinlock_t inode_lock;
extern struct list_head inode_in_use;
-extern struct list_head inode_unused;
/*
* fs/fs-writeback.c
--
1.5.6.5
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
WARNING: multiple messages have this Message-ID (diff)
From: Dave Chinner <david@fromorbit.com>
To: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, linux-mm@kvack.org, xfs@oss.sgi.com
Subject: [PATCH 1/5] inode: Make unused inode LRU per superblock
Date: Tue, 25 May 2010 18:53:04 +1000 [thread overview]
Message-ID: <1274777588-21494-2-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1274777588-21494-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
The inode unused list is currently a global LRU. This does not match
the other global filesystem cache - the dentry cache - which uses
per-superblock LRU lists. Hence we have related filesystem object
types using different LRU reclaimatin schemes.
To enable a per-superblock filesystem cache shrinker, both of these
caches need to have per-sb unused object LRU lists. Hence this patch
converts the global inode LRU to per-sb LRUs.
The patch only does rudimentary per-sb propotioning in the shrinker
infrastructure, as this gets removed when the per-sb shrinker
callouts are introduced later on.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/fs-writeback.c | 2 +-
fs/inode.c | 87 +++++++++++++++++++++++++++++++++++++++-----
fs/super.c | 1 +
include/linux/fs.h | 4 ++
include/linux/writeback.h | 1 -
5 files changed, 83 insertions(+), 12 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5c4161f..b1e76ef 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -565,7 +565,7 @@ select_queue:
/*
* The inode is clean, unused
*/
- list_move(&inode->i_list, &inode_unused);
+ list_move(&inode->i_list, &inode->i_sb->s_inode_lru);
}
}
inode_sync_complete(inode);
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20a..3caa758 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,7 @@
#include <linux/mount.h>
#include <linux/async.h>
#include <linux/posix_acl.h>
+#include "internal.h"
/*
* This is needed for the following functions:
@@ -74,7 +75,6 @@ static unsigned int i_hash_shift __read_mostly;
*/
LIST_HEAD(inode_in_use);
-LIST_HEAD(inode_unused);
static struct hlist_head *inode_hashtable __read_mostly;
/*
@@ -292,6 +292,7 @@ void __iget(struct inode *inode)
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
list_move(&inode->i_list, &inode_in_use);
inodes_stat.nr_unused--;
+ inode->i_sb->s_nr_inodes_unused--;
}
/**
@@ -386,6 +387,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
invalidate_inode_buffers(inode);
if (!atomic_read(&inode->i_count)) {
list_move(&inode->i_list, dispose);
+ inode->i_sb->s_nr_inodes_unused--;
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
count++;
@@ -444,32 +446,31 @@ static int can_unuse(struct inode *inode)
*
* Any inodes which are pinned purely because of attached pagecache have their
* pagecache removed. We expect the final iput() on that inode to add it to
- * the front of the inode_unused list. So look for it there and if the
+ * the front of the sb->s_inode_lru list. So look for it there and if the
* inode is still freeable, proceed. The right inode is found 99.9% of the
* time in testing on a 4-way.
*
* If the inode has metadata buffers attached to mapping->private_list then
* try to remove them.
*/
-static void prune_icache(int nr_to_scan)
+static void shrink_icache_sb(struct super_block *sb, int *nr_to_scan)
{
LIST_HEAD(freeable);
int nr_pruned = 0;
int nr_scanned;
unsigned long reap = 0;
- down_read(&iprune_sem);
spin_lock(&inode_lock);
- for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
+ for (nr_scanned = *nr_to_scan; nr_scanned >= 0; nr_scanned--) {
struct inode *inode;
- if (list_empty(&inode_unused))
+ if (list_empty(&sb->s_inode_lru))
break;
- inode = list_entry(inode_unused.prev, struct inode, i_list);
+ inode = list_entry(sb->s_inode_lru.prev, struct inode, i_list);
if (inode->i_state || atomic_read(&inode->i_count)) {
- list_move(&inode->i_list, &inode_unused);
+ list_move(&inode->i_list, &sb->s_inode_lru);
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
@@ -481,7 +482,7 @@ static void prune_icache(int nr_to_scan)
iput(inode);
spin_lock(&inode_lock);
- if (inode != list_entry(inode_unused.next,
+ if (inode != list_entry(sb->s_inode_lru.next,
struct inode, i_list))
continue; /* wrong inode or list_empty */
if (!can_unuse(inode))
@@ -493,13 +494,77 @@ static void prune_icache(int nr_to_scan)
nr_pruned++;
}
inodes_stat.nr_unused -= nr_pruned;
+ sb->s_nr_inodes_unused -= nr_pruned;
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
spin_unlock(&inode_lock);
+ *nr_to_scan = nr_scanned;
dispose_list(&freeable);
+}
+
+static void prune_icache(int count)
+{
+ struct super_block *sb, *n;
+ int w_count;
+ int unused = inodes_stat.nr_unused;
+ int prune_ratio;
+ int pruned;
+
+ if (unused == 0 || count == 0)
+ return;
+ down_read(&iprune_sem);
+ if (count >= unused)
+ prune_ratio = 1;
+ else
+ prune_ratio = unused / count;
+ spin_lock(&sb_lock);
+ list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+ if (list_empty(&sb->s_instances))
+ continue;
+ if (sb->s_nr_inodes_unused == 0)
+ continue;
+ sb->s_count++;
+ /* Now, we reclaim unused dentrins with fairness.
+ * We reclaim them same percentage from each superblock.
+ * We calculate number of dentries to scan on this sb
+ * as follows, but the implementation is arranged to avoid
+ * overflows:
+ * number of dentries to scan on this sb =
+ * count * (number of dentries on this sb /
+ * number of dentries in the machine)
+ */
+ spin_unlock(&sb_lock);
+ if (prune_ratio != 1)
+ w_count = (sb->s_nr_inodes_unused / prune_ratio) + 1;
+ else
+ w_count = sb->s_nr_inodes_unused;
+ pruned = w_count;
+ /*
+ * We need to be sure this filesystem isn't being unmounted,
+ * otherwise we could race with generic_shutdown_super(), and
+ * end up holding a reference to an inode while the filesystem
+ * is unmounted. So we try to get s_umount, and make sure
+ * s_root isn't NULL.
+ */
+ if (down_read_trylock(&sb->s_umount)) {
+ if ((sb->s_root != NULL) &&
+ (!list_empty(&sb->s_inode_lru))) {
+ shrink_icache_sb(sb, &w_count);
+ pruned -= w_count;
+ }
+ up_read(&sb->s_umount);
+ }
+ spin_lock(&sb_lock);
+ count -= pruned;
+ __put_super(sb);
+ /* more work left to do? */
+ if (count <= 0)
+ break;
+ }
+ spin_unlock(&sb_lock);
up_read(&iprune_sem);
}
@@ -1238,8 +1303,9 @@ int generic_detach_inode(struct inode *inode)
if (!hlist_unhashed(&inode->i_hash)) {
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
- list_move(&inode->i_list, &inode_unused);
+ list_move(&inode->i_list, &sb->s_inode_lru);
inodes_stat.nr_unused++;
+ sb->s_nr_inodes_unused++;
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode_lock);
return 0;
@@ -1252,6 +1318,7 @@ int generic_detach_inode(struct inode *inode)
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
inodes_stat.nr_unused--;
+ sb->s_nr_inodes_unused--;
hlist_del_init(&inode->i_hash);
}
list_del_init(&inode->i_list);
diff --git a/fs/super.c b/fs/super.c
index 69688b1..c554c53 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -60,6 +60,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
+ INIT_LIST_HEAD(&s->s_inode_lru);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b336cb9..7b90c43 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1346,6 +1346,10 @@ struct super_block {
struct list_head s_dentry_lru; /* unused dentry lru */
int s_nr_dentry_unused; /* # of dentry on lru */
+ /* s_inode_lru and s_nr_inodes_unused are protected by inode_lock */
+ struct list_head s_inode_lru; /* unused inode lru */
+ int s_nr_inodes_unused; /* # of inodes on lru */
+
struct block_device *s_bdev;
struct backing_dev_info *s_bdi;
struct mtd_info *s_mtd;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index cc97d6c..a74837e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -11,7 +11,6 @@ struct backing_dev_info;
extern spinlock_t inode_lock;
extern struct list_head inode_in_use;
-extern struct list_head inode_unused;
/*
* fs/fs-writeback.c
--
1.5.6.5
WARNING: multiple messages have this Message-ID (diff)
From: Dave Chinner <david@fromorbit.com>
To: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, linux-mm@kvack.org, xfs@oss.sgi.com
Subject: [PATCH 1/5] inode: Make unused inode LRU per superblock
Date: Tue, 25 May 2010 18:53:04 +1000 [thread overview]
Message-ID: <1274777588-21494-2-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1274777588-21494-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
The inode unused list is currently a global LRU. This does not match
the other global filesystem cache - the dentry cache - which uses
per-superblock LRU lists. Hence we have related filesystem object
types using different LRU reclaimatin schemes.
To enable a per-superblock filesystem cache shrinker, both of these
caches need to have per-sb unused object LRU lists. Hence this patch
converts the global inode LRU to per-sb LRUs.
The patch only does rudimentary per-sb propotioning in the shrinker
infrastructure, as this gets removed when the per-sb shrinker
callouts are introduced later on.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/fs-writeback.c | 2 +-
fs/inode.c | 87 +++++++++++++++++++++++++++++++++++++++-----
fs/super.c | 1 +
include/linux/fs.h | 4 ++
include/linux/writeback.h | 1 -
5 files changed, 83 insertions(+), 12 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5c4161f..b1e76ef 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -565,7 +565,7 @@ select_queue:
/*
* The inode is clean, unused
*/
- list_move(&inode->i_list, &inode_unused);
+ list_move(&inode->i_list, &inode->i_sb->s_inode_lru);
}
}
inode_sync_complete(inode);
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20a..3caa758 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,7 @@
#include <linux/mount.h>
#include <linux/async.h>
#include <linux/posix_acl.h>
+#include "internal.h"
/*
* This is needed for the following functions:
@@ -74,7 +75,6 @@ static unsigned int i_hash_shift __read_mostly;
*/
LIST_HEAD(inode_in_use);
-LIST_HEAD(inode_unused);
static struct hlist_head *inode_hashtable __read_mostly;
/*
@@ -292,6 +292,7 @@ void __iget(struct inode *inode)
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
list_move(&inode->i_list, &inode_in_use);
inodes_stat.nr_unused--;
+ inode->i_sb->s_nr_inodes_unused--;
}
/**
@@ -386,6 +387,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
invalidate_inode_buffers(inode);
if (!atomic_read(&inode->i_count)) {
list_move(&inode->i_list, dispose);
+ inode->i_sb->s_nr_inodes_unused--;
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
count++;
@@ -444,32 +446,31 @@ static int can_unuse(struct inode *inode)
*
* Any inodes which are pinned purely because of attached pagecache have their
* pagecache removed. We expect the final iput() on that inode to add it to
- * the front of the inode_unused list. So look for it there and if the
+ * the front of the sb->s_inode_lru list. So look for it there and if the
* inode is still freeable, proceed. The right inode is found 99.9% of the
* time in testing on a 4-way.
*
* If the inode has metadata buffers attached to mapping->private_list then
* try to remove them.
*/
-static void prune_icache(int nr_to_scan)
+static void shrink_icache_sb(struct super_block *sb, int *nr_to_scan)
{
LIST_HEAD(freeable);
int nr_pruned = 0;
int nr_scanned;
unsigned long reap = 0;
- down_read(&iprune_sem);
spin_lock(&inode_lock);
- for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
+ for (nr_scanned = *nr_to_scan; nr_scanned >= 0; nr_scanned--) {
struct inode *inode;
- if (list_empty(&inode_unused))
+ if (list_empty(&sb->s_inode_lru))
break;
- inode = list_entry(inode_unused.prev, struct inode, i_list);
+ inode = list_entry(sb->s_inode_lru.prev, struct inode, i_list);
if (inode->i_state || atomic_read(&inode->i_count)) {
- list_move(&inode->i_list, &inode_unused);
+ list_move(&inode->i_list, &sb->s_inode_lru);
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
@@ -481,7 +482,7 @@ static void prune_icache(int nr_to_scan)
iput(inode);
spin_lock(&inode_lock);
- if (inode != list_entry(inode_unused.next,
+ if (inode != list_entry(sb->s_inode_lru.next,
struct inode, i_list))
continue; /* wrong inode or list_empty */
if (!can_unuse(inode))
@@ -493,13 +494,77 @@ static void prune_icache(int nr_to_scan)
nr_pruned++;
}
inodes_stat.nr_unused -= nr_pruned;
+ sb->s_nr_inodes_unused -= nr_pruned;
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
spin_unlock(&inode_lock);
+ *nr_to_scan = nr_scanned;
dispose_list(&freeable);
+}
+
+static void prune_icache(int count)
+{
+ struct super_block *sb, *n;
+ int w_count;
+ int unused = inodes_stat.nr_unused;
+ int prune_ratio;
+ int pruned;
+
+ if (unused == 0 || count == 0)
+ return;
+ down_read(&iprune_sem);
+ if (count >= unused)
+ prune_ratio = 1;
+ else
+ prune_ratio = unused / count;
+ spin_lock(&sb_lock);
+ list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+ if (list_empty(&sb->s_instances))
+ continue;
+ if (sb->s_nr_inodes_unused == 0)
+ continue;
+ sb->s_count++;
+ /* Now, we reclaim unused dentrins with fairness.
+ * We reclaim them same percentage from each superblock.
+ * We calculate number of dentries to scan on this sb
+ * as follows, but the implementation is arranged to avoid
+ * overflows:
+ * number of dentries to scan on this sb =
+ * count * (number of dentries on this sb /
+ * number of dentries in the machine)
+ */
+ spin_unlock(&sb_lock);
+ if (prune_ratio != 1)
+ w_count = (sb->s_nr_inodes_unused / prune_ratio) + 1;
+ else
+ w_count = sb->s_nr_inodes_unused;
+ pruned = w_count;
+ /*
+ * We need to be sure this filesystem isn't being unmounted,
+ * otherwise we could race with generic_shutdown_super(), and
+ * end up holding a reference to an inode while the filesystem
+ * is unmounted. So we try to get s_umount, and make sure
+ * s_root isn't NULL.
+ */
+ if (down_read_trylock(&sb->s_umount)) {
+ if ((sb->s_root != NULL) &&
+ (!list_empty(&sb->s_inode_lru))) {
+ shrink_icache_sb(sb, &w_count);
+ pruned -= w_count;
+ }
+ up_read(&sb->s_umount);
+ }
+ spin_lock(&sb_lock);
+ count -= pruned;
+ __put_super(sb);
+ /* more work left to do? */
+ if (count <= 0)
+ break;
+ }
+ spin_unlock(&sb_lock);
up_read(&iprune_sem);
}
@@ -1238,8 +1303,9 @@ int generic_detach_inode(struct inode *inode)
if (!hlist_unhashed(&inode->i_hash)) {
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
- list_move(&inode->i_list, &inode_unused);
+ list_move(&inode->i_list, &sb->s_inode_lru);
inodes_stat.nr_unused++;
+ sb->s_nr_inodes_unused++;
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode_lock);
return 0;
@@ -1252,6 +1318,7 @@ int generic_detach_inode(struct inode *inode)
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
inodes_stat.nr_unused--;
+ sb->s_nr_inodes_unused--;
hlist_del_init(&inode->i_hash);
}
list_del_init(&inode->i_list);
diff --git a/fs/super.c b/fs/super.c
index 69688b1..c554c53 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -60,6 +60,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
+ INIT_LIST_HEAD(&s->s_inode_lru);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b336cb9..7b90c43 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1346,6 +1346,10 @@ struct super_block {
struct list_head s_dentry_lru; /* unused dentry lru */
int s_nr_dentry_unused; /* # of dentry on lru */
+ /* s_inode_lru and s_nr_inodes_unused are protected by inode_lock */
+ struct list_head s_inode_lru; /* unused inode lru */
+ int s_nr_inodes_unused; /* # of inodes on lru */
+
struct block_device *s_bdev;
struct backing_dev_info *s_bdi;
struct mtd_info *s_mtd;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index cc97d6c..a74837e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -11,7 +11,6 @@ struct backing_dev_info;
extern spinlock_t inode_lock;
extern struct list_head inode_in_use;
-extern struct list_head inode_unused;
/*
* fs/fs-writeback.c
--
1.5.6.5
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2010-05-25 8:51 UTC|newest]
Thread overview: 132+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-25 8:53 [PATCH 0/5] Per superblock shrinkers V2 Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-25 8:53 ` Dave Chinner [this message]
2010-05-25 8:53 ` [PATCH 1/5] inode: Make unused inode LRU per superblock Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-26 16:17 ` Nick Piggin
2010-05-26 16:17 ` Nick Piggin
2010-05-26 16:17 ` Nick Piggin
2010-05-26 23:01 ` Dave Chinner
2010-05-26 23:01 ` Dave Chinner
2010-05-26 23:01 ` Dave Chinner
2010-05-26 23:01 ` Dave Chinner
2010-05-27 2:04 ` Nick Piggin
2010-05-27 2:04 ` Nick Piggin
2010-05-27 2:04 ` Nick Piggin
2010-05-27 2:04 ` Nick Piggin
2010-05-27 4:02 ` Dave Chinner
2010-05-27 4:02 ` Dave Chinner
2010-05-27 4:02 ` Dave Chinner
2010-05-27 4:02 ` Dave Chinner
2010-05-27 4:23 ` Nick Piggin
2010-05-27 4:23 ` Nick Piggin
2010-05-27 4:23 ` Nick Piggin
2010-05-27 4:23 ` Nick Piggin
2010-05-27 20:32 ` Andrew Morton
2010-05-27 20:32 ` Andrew Morton
2010-05-27 20:32 ` Andrew Morton
2010-05-27 22:54 ` Dave Chinner
2010-05-27 22:54 ` Dave Chinner
2010-05-27 22:54 ` Dave Chinner
2010-05-28 10:07 ` Nick Piggin
2010-05-28 10:07 ` Nick Piggin
2010-05-28 10:07 ` Nick Piggin
2010-05-25 8:53 ` [PATCH 2/5] mm: add context argument to shrinker callback Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-25 8:53 ` [PATCH 3/5] superblock: introduce per-sb cache shrinker infrastructure Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-26 16:41 ` Nick Piggin
2010-05-26 16:41 ` Nick Piggin
2010-05-26 16:41 ` Nick Piggin
2010-05-26 16:41 ` Nick Piggin
2010-05-26 23:12 ` Dave Chinner
2010-05-26 23:12 ` Dave Chinner
2010-05-26 23:12 ` Dave Chinner
2010-05-26 23:12 ` Dave Chinner
2010-05-27 1:53 ` [PATCH 3/5 v2] " Dave Chinner
2010-05-27 1:53 ` Dave Chinner
2010-05-27 1:53 ` Dave Chinner
2010-05-27 4:01 ` Al Viro
2010-05-27 4:01 ` Al Viro
2010-05-27 4:01 ` Al Viro
2010-05-27 6:17 ` Dave Chinner
2010-05-27 6:17 ` Dave Chinner
2010-05-27 6:17 ` Dave Chinner
2010-05-27 6:46 ` Nick Piggin
2010-05-27 6:46 ` Nick Piggin
2010-05-27 6:46 ` Nick Piggin
2010-05-27 2:19 ` [PATCH 3/5] " Nick Piggin
2010-05-27 2:19 ` Nick Piggin
2010-05-27 2:19 ` Nick Piggin
2010-05-27 2:19 ` Nick Piggin
2010-05-27 4:07 ` Dave Chinner
2010-05-27 4:07 ` Dave Chinner
2010-05-27 4:07 ` Dave Chinner
2010-05-27 4:24 ` Nick Piggin
2010-05-27 4:24 ` Nick Piggin
2010-05-27 4:24 ` Nick Piggin
2010-05-27 6:35 ` Nick Piggin
2010-05-27 6:35 ` Nick Piggin
2010-05-27 6:35 ` Nick Piggin
2010-05-27 6:35 ` Nick Piggin
2010-05-27 22:40 ` Dave Chinner
2010-05-27 22:40 ` Dave Chinner
2010-05-27 22:40 ` Dave Chinner
2010-05-27 22:40 ` Dave Chinner
2010-05-28 5:19 ` Nick Piggin
2010-05-28 5:19 ` Nick Piggin
2010-05-28 5:19 ` Nick Piggin
2010-05-28 5:19 ` Nick Piggin
2010-05-31 6:39 ` Dave Chinner
2010-05-31 6:39 ` Dave Chinner
2010-05-31 6:39 ` Dave Chinner
2010-05-31 6:39 ` Dave Chinner
2010-05-31 7:28 ` Nick Piggin
2010-05-31 7:28 ` Nick Piggin
2010-05-31 7:28 ` Nick Piggin
2010-05-31 7:28 ` Nick Piggin
2010-05-27 20:32 ` Andrew Morton
2010-05-27 20:32 ` Andrew Morton
2010-05-27 20:32 ` Andrew Morton
2010-05-27 23:01 ` Dave Chinner
2010-05-27 23:01 ` Dave Chinner
2010-05-27 23:01 ` Dave Chinner
2010-05-25 8:53 ` [PATCH 4/5] superblock: add filesystem shrinker operations Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-27 20:32 ` Andrew Morton
2010-05-27 20:32 ` Andrew Morton
2010-05-27 20:32 ` Andrew Morton
2010-05-25 8:53 ` [PATCH 5/5] xfs: make use of new shrinker callout Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-25 8:53 ` Dave Chinner
2010-05-26 16:44 ` [PATCH 0/5] Per superblock shrinkers V2 Nick Piggin
2010-05-26 16:44 ` Nick Piggin
2010-05-26 16:44 ` Nick Piggin
2010-05-27 20:32 ` Andrew Morton
2010-05-27 20:32 ` Andrew Morton
2010-05-27 20:32 ` Andrew Morton
2010-05-28 0:30 ` Dave Chinner
2010-05-28 0:30 ` Dave Chinner
2010-05-28 0:30 ` Dave Chinner
2010-05-28 7:42 ` Artem Bityutskiy
2010-05-28 7:42 ` Artem Bityutskiy
2010-05-28 7:42 ` Artem Bityutskiy
2010-05-28 7:42 ` Artem Bityutskiy
2010-07-02 12:13 ` Christoph Hellwig
2010-07-02 12:13 ` Christoph Hellwig
2010-07-02 12:13 ` Christoph Hellwig
2010-07-12 2:41 ` Dave Chinner
2010-07-12 2:41 ` Dave Chinner
2010-07-12 2:41 ` Dave Chinner
2010-07-12 2:52 ` Christoph Hellwig
2010-07-12 2:52 ` Christoph Hellwig
2010-07-12 2:52 ` Christoph Hellwig
-- strict thread matches above, loose matches on Subject: below --
2010-05-14 7:24 [PATCH 0/5] Per-superblock shrinkers Dave Chinner
2010-05-14 7:24 ` [PATCH 1/5] inode: Make unused inode LRU per superblock Dave Chinner
2010-05-14 7:24 ` Dave Chinner
2010-05-14 7:24 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1274777588-21494-2-git-send-email-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.