From: Dave Chinner <david@fromorbit.com>
To: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH 18/18] fs: Reduce inode I_FREEING and factor inode disposal
Date: Fri, 8 Oct 2010 16:21:32 +1100 [thread overview]
Message-ID: <1286515292-15882-19-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1286515292-15882-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
Inode reclaim can push many inodes into the I_FREEING state before
it actually frees them. During the time it gathers these inodes, it
can call iput(), invalidate_mapping_pages, be preempted, etc. As a
result, holding inodes in I_FREEING can cause pauses.
After the inode scalability work, there is not a big reason to batch
up inodes to reclaim them, so we can dispose them as they are found
from the LRU. With similar reasoning, we can do the same during
unmount, completely removing the need for the dispose_list()
function.
Further, iput_final() does the same inode cleanup as reclaim and
unmount, so convert them all to use a single function for destroying
inodes. This is written such that the callers can optimise list
removals to avoid unneccessary lock round trips when removing inodes
from lists.
Based on a patch originally from Nick Piggin.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/inode.c | 150 +++++++++++++++++++++++++-----------------------------------
1 files changed, 63 insertions(+), 87 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index c778ec4..03ddd19 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -29,6 +29,8 @@
/*
* Locking rules.
*
+ * inode->i_lock is *always* the innermost lock.
+ *
* inode->i_lock protects:
* i_ref i_state
* inode_hash_bucket lock protects:
@@ -46,8 +48,15 @@
*
* sb inode lock
* inode_lru_lock
- * wb->b_lock
- * inode->i_lock
+ * wb->b_lock
+ * inode->i_lock
+ *
+ * wb->b_lock
+ * sb_lock (pin sb for writeback)
+ * inode->i_lock
+ *
+ * inode_lru
+ * inode->i_lock
*/
/*
* This is needed for the following functions:
@@ -434,13 +443,12 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
EXPORT_SYMBOL(__insert_inode_hash);
/**
- * __remove_inode_hash - remove an inode from the hash
+ * remove_inode_hash - remove an inode from the hash
* @inode: inode to unhash
*
- * Remove an inode from the superblock. inode->i_lock must be
- * held.
+ * Remove an inode from the superblock.
*/
-static void __remove_inode_hash(struct inode *inode)
+void remove_inode_hash(struct inode *inode)
{
struct inode_hash_bucket *b;
@@ -449,17 +457,6 @@ static void __remove_inode_hash(struct inode *inode)
hlist_bl_del_init(&inode->i_hash);
spin_unlock_bucket(b);
}
-
-/**
- * remove_inode_hash - remove an inode from the hash
- * @inode: inode to unhash
- *
- * Remove an inode from the superblock.
- */
-void remove_inode_hash(struct inode *inode)
-{
- __remove_inode_hash(inode);
-}
EXPORT_SYMBOL(remove_inode_hash);
void end_writeback(struct inode *inode)
@@ -494,37 +491,53 @@ static void evict(struct inode *inode)
}
/*
- * dispose_list - dispose of the contents of a local list
- * @head: the head of the list to free
+ * Free the inode passed in, removing it from the lists it is still connected
+ * to but avoiding unnecessary lock round-trips for the lists it is no longer
+ * on.
*
- * Dispose-list gets a local list with local inodes in it, so it doesn't
- * need to worry about list corruption and SMP locks.
+ * An inode must already be marked I_FREEING so that we avoid the inode being
+ * moved back onto lists if we race with other code that manipulates the lists
+ * (e.g. writeback_single_inode).
*/
-static void dispose_list(struct list_head *head)
+static void dispose_one_inode(struct inode *inode)
{
- while (!list_empty(head)) {
- struct inode *inode;
+ BUG_ON(!(inode->i_state & I_FREEING));
- inode = list_first_entry(head, struct inode, i_lru);
- list_del_init(&inode->i_lru);
+ /*
+ * move the inode off the IO lists and LRU once
+ * I_FREEING is set so that it won't get moved back on
+ * there if it is dirty.
+ */
+ if (!list_empty(&inode->i_io)) {
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
- evict(inode);
+ spin_lock(&bdi->wb.b_lock);
+ list_del_init(&inode->i_io);
+ spin_unlock(&bdi->wb.b_lock);
+ }
+
+ if (!list_empty(&inode->i_lru))
+ inode_lru_list_del(inode);
- __remove_inode_hash(inode);
+ if (!list_empty(&inode->i_sb_list)) {
spin_lock(&inode->i_sb->s_inodes_lock);
list_del_init(&inode->i_sb_list);
spin_unlock(&inode->i_sb->s_inodes_lock);
-
- wake_up_inode(inode);
- destroy_inode(inode);
}
+
+ evict(inode);
+
+ remove_inode_hash(inode);
+ wake_up_inode(inode);
+ BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
+ destroy_inode(inode);
}
+
/*
* Invalidate all inodes for a device.
*/
-static int invalidate_list(struct super_block *sb, struct list_head *head,
- struct list_head *dispose)
+static int invalidate_list(struct super_block *sb, struct list_head *head)
{
struct list_head *next;
int busy = 0;
@@ -553,30 +566,22 @@ static int invalidate_list(struct super_block *sb, struct list_head *head,
}
invalidate_inode_buffers(inode);
if (!inode->i_ref) {
- struct backing_dev_info *bdi = inode_to_bdi(inode);
-
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
spin_unlock(&inode->i_lock);
- /*
- * move the inode off the IO lists and LRU once
- * I_FREEING is set so that it won't get moved back on
- * there if it is dirty.
- */
- spin_lock(&bdi->wb.b_lock);
- list_del_init(&inode->i_io);
- spin_unlock(&bdi->wb.b_lock);
+ /* save a lock round trip by removing the inode here. */
+ list_del_init(&inode->i_sb_list);
+ spin_unlock(&sb->s_inodes_lock);
- spin_lock(&inode_lru_lock);
- list_move(&inode->i_lru, dispose);
- spin_unlock(&inode_lru_lock);
+ dispose_one_inode(inode);
- percpu_counter_dec(&nr_inodes_unused);
+ spin_lock(&sb->s_inodes_lock);
continue;
}
spin_unlock(&inode->i_lock);
busy = 1;
+
}
return busy;
}
@@ -592,15 +597,12 @@ static int invalidate_list(struct super_block *sb, struct list_head *head,
int invalidate_inodes(struct super_block *sb)
{
int busy;
- LIST_HEAD(throw_away);
down_write(&iprune_sem);
spin_lock(&sb->s_inodes_lock);
fsnotify_unmount_inodes(&sb->s_inodes);
- busy = invalidate_list(sb, &sb->s_inodes, &throw_away);
+ busy = invalidate_list(sb, &sb->s_inodes);
spin_unlock(&sb->s_inodes_lock);
-
- dispose_list(&throw_away);
up_write(&iprune_sem);
return busy;
@@ -636,7 +638,6 @@ static int can_unuse(struct inode *inode)
*/
static void prune_icache(int nr_to_scan)
{
- LIST_HEAD(freeable);
int nr_scanned;
unsigned long reap = 0;
@@ -644,7 +645,6 @@ static void prune_icache(int nr_to_scan)
spin_lock(&inode_lru_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
- struct backing_dev_info *bdi;
if (list_empty(&inode_lru))
break;
@@ -691,18 +691,15 @@ static void prune_icache(int nr_to_scan)
inode->i_state |= I_FREEING;
spin_unlock(&inode->i_lock);
- /*
- * move the inode off the IO lists and LRU once
- * I_FREEING is set so that it won't get moved back on
- * there if it is dirty.
- */
- bdi = inode_to_bdi(inode);
- spin_lock(&bdi->wb.b_lock);
- list_del_init(&inode->i_io);
- spin_unlock(&bdi->wb.b_lock);
-
- list_move(&inode->i_lru, &freeable);
+ /* save a lock round trip by removing the inode here. */
+ list_del_init(&inode->i_lru);
percpu_counter_dec(&nr_inodes_unused);
+ spin_unlock(&inode_lru_lock);
+
+ dispose_one_inode(inode);
+ cond_resched();
+
+ spin_lock(&inode_lru_lock);
}
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
@@ -710,7 +707,6 @@ static void prune_icache(int nr_to_scan)
__count_vm_events(PGINODESTEAL, reap);
spin_unlock(&inode_lru_lock);
- dispose_list(&freeable);
up_read(&iprune_sem);
}
@@ -1449,7 +1445,6 @@ static void iput_final(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
const struct super_operations *op = inode->i_sb->s_op;
- struct backing_dev_info *bdi = inode_to_bdi(inode);
int drop;
assert_spin_locked(&inode->i_lock);
@@ -1475,35 +1470,16 @@ static void iput_final(struct inode *inode)
inode->i_state |= I_WILL_FREE;
spin_unlock(&inode->i_lock);
write_inode_now(inode, 1);
+ remove_inode_hash(inode);
spin_lock(&inode->i_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
- __remove_inode_hash(inode);
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
spin_unlock(&inode->i_lock);
- /*
- * move the inode off the IO lists and LRU once I_FREEING is set so
- * that it won't get moved back on there if it is dirty.
- * around.
- */
- spin_lock(&bdi->wb.b_lock);
- list_del_init(&inode->i_io);
- spin_unlock(&bdi->wb.b_lock);
-
- inode_lru_list_del(inode);
-
- spin_lock(&sb->s_inodes_lock);
- list_del_init(&inode->i_sb_list);
- spin_unlock(&sb->s_inodes_lock);
-
- evict(inode);
- remove_inode_hash(inode);
- wake_up_inode(inode);
- BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
- destroy_inode(inode);
+ dispose_one_inode(inode);
}
/**
--
1.7.1
next prev parent reply other threads:[~2010-10-08 5:22 UTC|newest]
Thread overview: 162+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-10-08 5:21 fs: Inode cache scalability V2 Dave Chinner
2010-10-08 5:21 ` [PATCH 01/18] kernel: add bl_list Dave Chinner
2010-10-08 8:18 ` Andi Kleen
2010-10-08 10:33 ` Dave Chinner
2010-10-08 5:21 ` [PATCH 02/18] fs: Convert nr_inodes and nr_unused to per-cpu counters Dave Chinner
2010-10-08 7:01 ` Christoph Hellwig
2010-10-08 5:21 ` [PATCH 03/18] fs: keep inode with backing-dev Dave Chinner
2010-10-08 7:01 ` Christoph Hellwig
2010-10-08 7:27 ` Dave Chinner
2010-10-08 5:21 ` [PATCH 04/18] fs: Implement lazy LRU updates for inodes Dave Chinner
2010-10-08 7:08 ` Christoph Hellwig
2010-10-08 7:31 ` Dave Chinner
2010-10-08 9:08 ` Al Viro
2010-10-08 9:51 ` Dave Chinner
2010-10-08 5:21 ` [PATCH 05/18] fs: inode split IO and LRU lists Dave Chinner
2010-10-08 7:14 ` Christoph Hellwig
2010-10-08 7:38 ` Dave Chinner
2010-10-08 9:16 ` Al Viro
2010-10-08 9:58 ` Dave Chinner
2010-10-08 5:21 ` [PATCH 06/18] fs: Clean up inode reference counting Dave Chinner
2010-10-08 7:20 ` Christoph Hellwig
2010-10-08 7:46 ` Dave Chinner
2010-10-08 8:15 ` Christoph Hellwig
2010-10-08 5:21 ` [PATCH 07/18] exofs: use iput() for inode reference count decrements Dave Chinner
2010-10-08 7:21 ` Christoph Hellwig
2010-10-16 7:56 ` Nick Piggin
2010-10-16 16:29 ` Christoph Hellwig
2010-10-17 15:41 ` Boaz Harrosh
2010-10-08 5:21 ` [PATCH 08/18] fs: add inode reference coutn read accessor Dave Chinner
2010-10-08 7:24 ` Christoph Hellwig
2010-10-08 5:21 ` [PATCH 09/18] fs: rework icount to be a locked variable Dave Chinner
2010-10-08 7:27 ` Christoph Hellwig
2010-10-08 7:50 ` Dave Chinner
2010-10-08 8:17 ` Christoph Hellwig
2010-10-08 13:16 ` Chris Mason
2010-10-08 9:32 ` Al Viro
2010-10-08 10:15 ` Dave Chinner
2010-10-08 13:14 ` Chris Mason
2010-10-08 13:53 ` Christoph Hellwig
2010-10-08 14:09 ` Dave Chinner
2010-10-08 5:21 ` [PATCH 10/18] fs: Factor inode hash operations into functions Dave Chinner
2010-10-08 7:29 ` Christoph Hellwig
2010-10-08 9:41 ` Al Viro
2010-10-08 5:21 ` [PATCH 11/18] fs: Introduce per-bucket inode hash locks Dave Chinner
2010-10-08 7:33 ` Christoph Hellwig
2010-10-08 7:51 ` Dave Chinner
2010-10-08 9:49 ` Al Viro
2010-10-08 9:51 ` Christoph Hellwig
2010-10-08 13:43 ` Christoph Hellwig
2010-10-08 14:17 ` Dave Chinner
2010-10-08 18:54 ` Christoph Hellwig
2010-10-16 7:57 ` Nick Piggin
2010-10-16 16:16 ` Christoph Hellwig
2010-10-16 17:12 ` Nick Piggin
2010-10-17 0:45 ` Christoph Hellwig
2010-10-17 2:06 ` Nick Piggin
2010-10-17 0:46 ` Dave Chinner
2010-10-17 2:25 ` Nick Piggin
2010-10-18 16:16 ` Andi Kleen
2010-10-18 16:21 ` Christoph Hellwig
2010-10-19 7:00 ` Nick Piggin
2010-10-19 16:50 ` Christoph Hellwig
2010-10-20 3:11 ` Nick Piggin
2010-10-24 15:44 ` Thomas Gleixner
2010-10-24 21:17 ` Nick Piggin
2010-10-25 4:41 ` Thomas Gleixner
2010-10-25 7:04 ` Thomas Gleixner
2010-10-26 0:12 ` Nick Piggin
2010-10-26 0:06 ` Nick Piggin
2010-10-08 5:21 ` [PATCH 12/18] fs: add a per-superblock lock for the inode list Dave Chinner
2010-10-08 7:35 ` Christoph Hellwig
2010-10-08 5:21 ` [PATCH 13/18] fs: split locking of inode writeback and LRU lists Dave Chinner
2010-10-08 7:42 ` Christoph Hellwig
2010-10-08 8:00 ` Dave Chinner
2010-10-08 8:18 ` Christoph Hellwig
2010-10-16 7:57 ` Nick Piggin
2010-10-16 16:20 ` Christoph Hellwig
2010-10-16 17:19 ` Nick Piggin
2010-10-17 1:00 ` Dave Chinner
2010-10-17 2:20 ` Nick Piggin
2010-10-08 5:21 ` [PATCH 14/18] fs: Protect inode->i_state with th einode->i_lock Dave Chinner
2010-10-08 7:49 ` Christoph Hellwig
2010-10-08 8:04 ` Dave Chinner
2010-10-08 8:18 ` Christoph Hellwig
2010-10-16 7:57 ` Nick Piggin
2010-10-16 16:19 ` Christoph Hellwig
2010-10-09 8:05 ` Christoph Hellwig
2010-10-09 14:52 ` Matthew Wilcox
2010-10-10 2:01 ` Dave Chinner
2010-10-08 5:21 ` [PATCH 15/18] fs: introduce a per-cpu last_ino allocator Dave Chinner
2010-10-08 7:53 ` Christoph Hellwig
2010-10-08 8:05 ` Dave Chinner
2010-10-08 8:22 ` Andi Kleen
2010-10-08 8:44 ` Christoph Hellwig
2010-10-08 9:58 ` Al Viro
2010-10-08 10:09 ` Andi Kleen
2010-10-08 10:19 ` Al Viro
2010-10-08 10:20 ` Eric Dumazet
2010-10-08 9:56 ` Al Viro
2010-10-08 10:03 ` Christoph Hellwig
2010-10-08 10:20 ` Eric Dumazet
2010-10-08 13:48 ` Christoph Hellwig
2010-10-08 14:06 ` Eric Dumazet
2010-10-08 19:10 ` Christoph Hellwig
2010-10-09 17:14 ` Matthew Wilcox
2010-10-16 7:57 ` Nick Piggin
2010-10-16 16:22 ` Christoph Hellwig
2010-10-16 17:21 ` Nick Piggin
2010-10-08 5:21 ` [PATCH 16/18] fs: Make iunique independent of inode_lock Dave Chinner
2010-10-08 7:55 ` Christoph Hellwig
2010-10-08 8:06 ` Dave Chinner
2010-10-08 8:19 ` Christoph Hellwig
2010-10-08 5:21 ` [PATCH 17/18] fs: icache remove inode_lock Dave Chinner
2010-10-08 8:03 ` Christoph Hellwig
2010-10-08 8:09 ` Dave Chinner
2010-10-13 7:20 ` Nick Piggin
2010-10-13 7:27 ` Nick Piggin
2010-10-13 11:28 ` Christoph Hellwig
2010-10-13 12:03 ` Nick Piggin
2010-10-13 12:20 ` Christoph Hellwig
2010-10-13 12:25 ` Nick Piggin
2010-10-13 10:42 ` Eric Dumazet
2010-10-13 12:07 ` Nick Piggin
2010-10-13 11:25 ` Christoph Hellwig
2010-10-13 12:30 ` Nick Piggin
2010-10-13 23:23 ` Dave Chinner
2010-10-14 9:06 ` Nick Piggin
2010-10-14 9:13 ` Nick Piggin
2010-10-14 14:41 ` Christoph Hellwig
2010-10-15 0:14 ` Nick Piggin
2010-10-15 3:13 ` Dave Chinner
2010-10-15 3:30 ` Nick Piggin
2010-10-15 3:44 ` Nick Piggin
2010-10-15 6:41 ` Nick Piggin
2010-10-15 10:59 ` Dave Chinner
2010-10-15 13:03 ` Nick Piggin
2010-10-15 13:29 ` Nick Piggin
2010-10-15 17:33 ` Nick Piggin
2010-10-15 17:52 ` Christoph Hellwig
2010-10-15 18:02 ` Nick Piggin
2010-10-15 18:14 ` Nick Piggin
2010-10-16 2:09 ` Nick Piggin
2010-10-15 14:11 ` Nick Piggin
2010-10-15 20:50 ` Nick Piggin
2010-10-15 20:56 ` Nick Piggin
2010-10-15 4:04 ` Nick Piggin
2010-10-15 11:33 ` Dave Chinner
2010-10-15 13:14 ` Nick Piggin
2010-10-15 15:38 ` Nick Piggin
2010-10-16 7:57 ` Nick Piggin
2010-10-08 5:21 ` Dave Chinner [this message]
2010-10-08 8:11 ` [PATCH 18/18] fs: Reduce inode I_FREEING and factor inode disposal Christoph Hellwig
2010-10-08 10:18 ` Al Viro
2010-10-08 10:52 ` Dave Chinner
2010-10-08 12:10 ` Al Viro
2010-10-08 13:55 ` Dave Chinner
2010-10-09 17:22 ` Matthew Wilcox
2010-10-09 8:08 ` [PATCH 19/18] fs: split __inode_add_to_list Christoph Hellwig
2010-10-12 10:47 ` Dave Chinner
2010-10-12 11:31 ` Christoph Hellwig
2010-10-12 12:05 ` Dave Chinner
2010-10-09 11:18 ` [PATCH 20/18] fs: do not assign default i_ino in new_inode Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1286515292-15882-19-git-send-email-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).