From mboxrd@z Thu Jan 1 00:00:00 1970 From: Christian Stroetmann Subject: Re: [PATCH 04/21] fs: Implement lazy LRU updates for inodes Date: Thu, 21 Oct 2010 04:14:23 +0200 Message-ID: <4CBFA1FF.5030109@ontolinux.com> References: <1287622186-1935-1-git-send-email-david@fromorbit.com> <1287622186-1935-5-git-send-email-david@fromorbit.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-15; format=flowed Content-Transfer-Encoding: 7bit Cc: linux-fsdevel To: Dave Chinner Return-path: Received: from moutng.kundenserver.de ([212.227.126.187]:51805 "EHLO moutng.kundenserver.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756775Ab0JUCQF (ORCPT ); Wed, 20 Oct 2010 22:16:05 -0400 In-Reply-To: <1287622186-1935-5-git-send-email-david@fromorbit.com> Sender: linux-fsdevel-owner@vger.kernel.org List-ID: Just only 2 typos On the 21.10.2010 02:49, Dave Chinner wrote: > From: Nick Piggin > > Convert the inode LRU to use lazy updates to reduce lock and > cacheline traffic. We avoid moving inodes around in the LRU list > during iget/iput operations so these frequent operations don't need > to access the LRUs. Instead, we defer the refcount checks to > reclaim-time and use a per-inode state flag, I_REFERENCED, to tell > reclaim that iget has touched the inode in the past. This means that > only reclaim should be touching the LRU with any frequency, hence > significantly reducing lock acquisitions and the amount contention > on LRU updates. > > This also removes the inode_in_use list, which means we now only > have one list for tracking the inode LRU status. This makes it much > simpler to split out the LRU list operations under it's own lock. > > Signed-off-by: Nick Piggin > Signed-off-by: Dave Chinner > Reviewed-by: Christoph Hellwig > --- > fs/fs-writeback.c | 14 +++--- > fs/inode.c | 111 +++++++++++++++++++++++++++----------------- > fs/internal.h | 6 +++ > include/linux/fs.h | 13 +++--- > include/linux/writeback.h | 1 - > 5 files changed, 88 insertions(+), 57 deletions(-) > > diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c > index 58a95b7..33e9857 100644 > --- a/fs/fs-writeback.c > +++ b/fs/fs-writeback.c > @@ -408,16 +408,16 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) > * completion. > */ > redirty_tail(inode); > - } else if (atomic_read(&inode->i_count)) { > - /* > - * The inode is clean, inuse > - */ > - list_move(&inode->i_list,&inode_in_use); > } else { > /* > - * The inode is clean, unused > + * The inode is clean. If it is unused, then make sure > + * that it is put on the LRU correctly as iput_final() > + * does not move dirty inodes to the LRU and dirty > + * inodes are removed from the LRU during scanning. > */ > - list_move(&inode->i_list,&inode_unused); > + list_del_init(&inode->i_list); > + if (!atomic_read(&inode->i_count)) > + inode_lru_list_add(inode); > } > } > inode_sync_complete(inode); > diff --git a/fs/inode.c b/fs/inode.c > index b3b6a4b..f47ec71 100644 > --- a/fs/inode.c > +++ b/fs/inode.c > @@ -72,7 +72,6 @@ static unsigned int i_hash_shift __read_mostly; > * allowing for low-overhead inode sync() operations. > */ > > -LIST_HEAD(inode_in_use); > LIST_HEAD(inode_unused); > static struct hlist_head *inode_hashtable __read_mostly; > > @@ -291,6 +290,7 @@ void inode_init_once(struct inode *inode) > INIT_HLIST_NODE(&inode->i_hash); > INIT_LIST_HEAD(&inode->i_dentry); > INIT_LIST_HEAD(&inode->i_devices); > + INIT_LIST_HEAD(&inode->i_list); > INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); > spin_lock_init(&inode->i_data.tree_lock); > spin_lock_init(&inode->i_data.i_mmap_lock); > @@ -317,12 +317,21 @@ static void init_once(void *foo) > */ > void __iget(struct inode *inode) > { > - if (atomic_inc_return(&inode->i_count) != 1) > - return; > + atomic_inc(&inode->i_count); > +} > + > +void inode_lru_list_add(struct inode *inode) > +{ > + list_add(&inode->i_list,&inode_unused); > + percpu_counter_inc(&nr_inodes_unused); > +} > > - if (!(inode->i_state& (I_DIRTY|I_SYNC))) > - list_move(&inode->i_list,&inode_in_use); > - percpu_counter_dec(&nr_inodes_unused); > +void inode_lru_list_del(struct inode *inode) > +{ > + if (!list_empty(&inode->i_list)) { > + list_del_init(&inode->i_list); > + percpu_counter_dec(&nr_inodes_unused); > + } > } > > void end_writeback(struct inode *inode) > @@ -367,7 +376,7 @@ static void dispose_list(struct list_head *head) > struct inode *inode; > > inode = list_first_entry(head, struct inode, i_list); > - list_del(&inode->i_list); > + list_del_init(&inode->i_list); > > evict(inode); > > @@ -410,9 +419,9 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) > continue; > invalidate_inode_buffers(inode); > if (!atomic_read(&inode->i_count)) { > - list_move(&inode->i_list, dispose); > WARN_ON(inode->i_state& I_NEW); > inode->i_state |= I_FREEING; > + list_move(&inode->i_list, dispose); > percpu_counter_dec(&nr_inodes_unused); > continue; > } > @@ -447,31 +456,21 @@ int invalidate_inodes(struct super_block *sb) > } > EXPORT_SYMBOL(invalidate_inodes); > > -static int can_unuse(struct inode *inode) > -{ > - if (inode->i_state) > - return 0; > - if (inode_has_buffers(inode)) > - return 0; > - if (atomic_read(&inode->i_count)) > - return 0; > - if (inode->i_data.nrpages) > - return 0; > - return 1; > -} > - > /* > - * Scan `goal' inodes on the unused list for freeable ones. They are moved to > - * a temporary list and then are freed outside inode_lock by dispose_list(). > + * Scan `goal' inodes on the unused list for freeable ones. They are moved to a > + * temporary list and then are freed outside inode_lock by dispose_list(). > * > * Any inodes which are pinned purely because of attached pagecache have their > - * pagecache removed. We expect the final iput() on that inode to add it to > - * the front of the inode_unused list. So look for it there and if the > - * inode is still freeable, proceed. The right inode is found 99.9% of the > - * time in testing on a 4-way. > + * pagecache removed. If the inode has metadata buffers attached to > + * mapping->private_list then try to remove them. > * > - * If the inode has metadata buffers attached to mapping->private_list then > - * try to remove them. > + * If the inode has the I_REFERENCED flag set, it means that it has been used , then it means > + * recently - the flag is set in iput_final(). When we encounter such an inode, > + * clear the flag and move it to the back of the LRU so it gets another pass > + * through the LRU before it gets reclaimed. This is necessary because of the > + * fact we are doing lazy LRU updates to minimise lock contention so the LRU > + * does not have strict ordering. Hence we don't want to reclaim inodes with > + * this flag set because they are the inodes that are out of order. > */ > static void prune_icache(int nr_to_scan) > { > @@ -489,8 +488,21 @@ static void prune_icache(int nr_to_scan) > > inode = list_entry(inode_unused.prev, struct inode, i_list); > > - if (inode->i_state || atomic_read(&inode->i_count)) { > + /* > + * Referenced or dirty inodes are still in use. Give them > + * another pass through the LRU as we canot reclaim them now. > + */ > + if (atomic_read(&inode->i_count) || > + (inode->i_state& ~I_REFERENCED)) { > + list_del_init(&inode->i_list); > + percpu_counter_dec(&nr_inodes_unused); > + continue; > + } > + > + /* recently referenced inodes get one more pass */ > + if (inode->i_state& I_REFERENCED) { > list_move(&inode->i_list,&inode_unused); > + inode->i_state&= ~I_REFERENCED; > continue; > } > if (inode_has_buffers(inode) || inode->i_data.nrpages) { > @@ -500,13 +512,19 @@ static void prune_icache(int nr_to_scan) > reap += invalidate_mapping_pages(&inode->i_data, > 0, -1); > iput(inode); > - spin_lock(&inode_lock); > > - if (inode != list_entry(inode_unused.next, > - struct inode, i_list)) > - continue; /* wrong inode or list_empty */ > - if (!can_unuse(inode)) > - continue; > + /* > + * Rather than try to determine if we can still use the > + * inode after calling iput(), leave the inode where it > + * is on the LRU. If we race with another reclaimer, , then that reclaimer > + * that reclaimer will either see a reference count > + * or the I_REFERENCED flag, and move the inode to the > + * back of the LRU. If we don't race, then we'll see > + * the I_REFERENCED flag on the next pass and do the > + * same. Either way, we won't spin on it in this loop. > + */ > + spin_lock(&inode_lock); > + continue; [...]