From mboxrd@z Thu Jan 1 00:00:00 1970 From: Christian Stroetmann Subject: Re: [PATCH 15/20] fs: split locking of inode writeback and LRU lists Date: Tue, 19 Oct 2010 01:53:32 +0200 Message-ID: <4CBCDDFC.9040000@ontolinux.com> References: <1287382856-29529-1-git-send-email-david@fromorbit.com> <1287382856-29529-16-git-send-email-david@fromorbit.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-15; format=flowed Content-Transfer-Encoding: 7bit Cc: linux-fsdevel To: Dave Chinner Return-path: Received: from moutng.kundenserver.de ([212.227.17.10]:49315 "EHLO moutng.kundenserver.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932155Ab0JRXzR (ORCPT ); Mon, 18 Oct 2010 19:55:17 -0400 In-Reply-To: <1287382856-29529-16-git-send-email-david@fromorbit.com> Sender: linux-fsdevel-owner@vger.kernel.org List-ID: Just only typos On the 18.10.2010 08:20, Dave Chinner wrote: > From: Dave Chinner > > Given that the inode LRU and IO lists are split apart, they do not > need to be protected by the same lock. So in preparation for removal > of the inode_lock, add new locks for them. The writeback lists are > only ever accessed in the context of a bdi, so add a per-BDI lock to > protect manipulations of these lists. > > For the inode LRU, introduce a simple global lock to protect it. > While this could be made per-sb, it is unclear yet as to what is the > next step for optimising/parallelising reclaim of inodes. Rather > than optimise now, leave it as a global list and lock until further > analysis can be done. > > Because there will now be a situation where the inode is on > different lists protected by different locks during the freeing of > the inode (i.e. not an atomic state transition), we need to ensure > that we set the I_FREEING state flag before we start removing inodes > from the IO and LRU lists. This ensures that if we race with other > threads during freeing, they will notice the I_FREEING flag is set > and be able to take appropriate action to avoid problems. > > Based on a patch originally from Nick Piggin. > > Signed-off-by: Dave Chinner > Reviewed-by: Christoph Hellwig > --- > fs/block_dev.c | 5 +++ > fs/fs-writeback.c | 51 +++++++++++++++++++++++++++++++++--- > fs/inode.c | 61 ++++++++++++++++++++++++++++++++++++++----- > fs/internal.h | 5 +++ > include/linux/backing-dev.h | 3 ++ > mm/backing-dev.c | 19 +++++++++++++ > 6 files changed, 133 insertions(+), 11 deletions(-) > > diff --git a/fs/block_dev.c b/fs/block_dev.c > index 11ad53d..7909775 100644 > --- a/fs/block_dev.c > +++ b/fs/block_dev.c > @@ -56,10 +56,15 @@ EXPORT_SYMBOL(I_BDEV); > static void bdev_inode_switch_bdi(struct inode *inode, > struct backing_dev_info *dst) > { > + struct backing_dev_info *old = inode->i_data.backing_dev_info; > + > spin_lock(&inode_lock); > + bdi_lock_two(old, dst); > inode->i_data.backing_dev_info = dst; > if (!list_empty(&inode->i_wb_list)) > list_move(&inode->i_wb_list,&dst->wb.b_dirty); > + spin_unlock(&old->wb.b_lock); > + spin_unlock(&dst->wb.b_lock); > spin_unlock(&inode_lock); > } > > diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c > index 676e048..f159141 100644 > --- a/fs/fs-writeback.c > +++ b/fs/fs-writeback.c > @@ -157,6 +157,18 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) > } > > /* > + * Remove the inode from the writeback list it is on. > + */ > +void inode_wb_list_del(struct inode *inode) > +{ > + struct backing_dev_info *bdi = inode_to_bdi(inode); > + > + spin_lock(&bdi->wb.b_lock); > + list_del_init(&inode->i_wb_list); > + spin_unlock(&bdi->wb.b_lock); > +} > + > +/* > * Redirty an inode: set its when-it-was dirtied timestamp and move it to the > * furthest end of its superblock's dirty-inode list. > * > @@ -169,6 +181,7 @@ static void redirty_tail(struct inode *inode) > { > struct bdi_writeback *wb =&inode_to_bdi(inode)->wb; > > + assert_spin_locked(&wb->b_lock); > if (!list_empty(&wb->b_dirty)) { > struct inode *tail; > > @@ -186,6 +199,7 @@ static void requeue_io(struct inode *inode) > { > struct bdi_writeback *wb =&inode_to_bdi(inode)->wb; > > + assert_spin_locked(&wb->b_lock); > list_move(&inode->i_wb_list,&wb->b_more_io); > } > > @@ -269,6 +283,7 @@ static void move_expired_inodes(struct list_head *delaying_queue, > */ > static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) > { > + assert_spin_locked(&wb->b_lock); > list_splice_init(&wb->b_more_io,&wb->b_io); > move_expired_inodes(&wb->b_dirty,&wb->b_io, older_than_this); > } > @@ -311,6 +326,7 @@ static void inode_wait_for_writeback(struct inode *inode) > static int > writeback_single_inode(struct inode *inode, struct writeback_control *wbc) > { > + struct backing_dev_info *bdi = inode_to_bdi(inode); > struct address_space *mapping = inode->i_mapping; > unsigned dirty; > int ret; > @@ -330,7 +346,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) > * completed a full scan of b_io. > */ > if (wbc->sync_mode != WB_SYNC_ALL) { > + spin_lock(&bdi->wb.b_lock); > requeue_io(inode); > + spin_unlock(&bdi->wb.b_lock); > return 0; > } > > @@ -385,6 +403,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) > * sometimes bales out without doing anything. > */ > inode->i_state |= I_DIRTY_PAGES; > + spin_lock(&bdi->wb.b_lock); > if (wbc->nr_to_write<= 0) { > /* > * slice used up: queue for next turn > @@ -400,6 +419,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) > */ > redirty_tail(inode); > } > + spin_unlock(&bdi->wb.b_lock); > } else if (inode->i_state& I_DIRTY) { > /* > * Filesystems can dirty the inode during writeback > @@ -407,7 +427,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) > * submission or metadata updates after data IO > * completion. > */ > + spin_lock(&bdi->wb.b_lock); > redirty_tail(inode); > + spin_unlock(&bdi->wb.b_lock); > } else { > /* > * The inode is clean. If it is unused, then make sure > @@ -415,7 +437,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) > * does not move dirty inodes to the LRU and dirty > * inodes are removed from the LRU during scanning. > */ > - list_del_init(&inode->i_wb_list); > + inode_wb_list_del(inode); > if (!inode->i_ref) > inode_lru_list_add(inode); > } > @@ -463,6 +485,7 @@ static bool pin_sb_for_writeback(struct super_block *sb) > static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, > struct writeback_control *wbc, bool only_this_sb) > { > + assert_spin_locked(&wb->b_lock); > while (!list_empty(&wb->b_io)) { > long pages_skipped; > struct inode *inode = list_entry(wb->b_io.prev, > @@ -478,7 +501,6 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, > redirty_tail(inode); > continue; > } > - > /* > * The inode belongs to a different superblock. > * Bounce back to the caller to unpin this and > @@ -487,7 +509,15 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, > return 0; > } > > - if (inode->i_state& (I_NEW | I_WILL_FREE)) { > + /* > + * We can see I_FREEING here when the inod isin the process of is in > + * being reclaimed. In that case the freer is waiting on the > + * wb->b_lock that we currently hold to remove the inode from > + * the writeback list. So we don't spin on it here, requeue it > + * and move on to the next inode, which will allow the other > + * thread to free the inode when we drop the lock. > + */ [...]