From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from aserp1040.oracle.com ([141.146.126.69]:18710 "EHLO aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752242AbcEIW5s (ORCPT ); Mon, 9 May 2016 18:57:48 -0400 Date: Mon, 9 May 2016 15:59:33 -0700 From: Liu Bo To: fdmanana@kernel.org Cc: linux-btrfs@vger.kernel.org Subject: Re: [PATCH 2/3] Btrfs: don't wait for unrelated IO to finish before relocation Message-ID: <20160509225933.GF4954@localhost.localdomain> Reply-To: bo.li.liu@oracle.com References: <1461546072-11154-1-git-send-email-fdmanana@kernel.org> <1461546072-11154-3-git-send-email-fdmanana@kernel.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii In-Reply-To: <1461546072-11154-3-git-send-email-fdmanana@kernel.org> Sender: linux-btrfs-owner@vger.kernel.org List-ID: On Mon, Apr 25, 2016 at 02:01:11AM +0100, fdmanana@kernel.org wrote: > From: Filipe Manana > > Before the relocation process of a block group starts, it sets the block > group to readonly mode, then flushes all delalloc writes and then finally > it waits for all ordered extents to complete. This last step includes > waiting for ordered extents destinated at extents allocated in other block > groups, making us waste unecessary time. > > So fix this by waiting only for ordered extents that fall into the block > group's range. > > Signed-off-by: Filipe Manana > --- > fs/btrfs/dev-replace.c | 4 ++-- > fs/btrfs/extent-tree.c | 11 +++++++---- > fs/btrfs/ioctl.c | 2 +- > fs/btrfs/ordered-data.c | 26 +++++++++++++++++++------- > fs/btrfs/ordered-data.h | 6 ++++-- > fs/btrfs/relocation.c | 4 +++- > fs/btrfs/super.c | 2 +- > fs/btrfs/transaction.c | 2 +- > 8 files changed, 38 insertions(+), 19 deletions(-) > > diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c > index a1d6652..a0c1016 100644 > --- a/fs/btrfs/dev-replace.c > +++ b/fs/btrfs/dev-replace.c > @@ -401,7 +401,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, > if (ret) > btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret); > > - btrfs_wait_ordered_roots(root->fs_info, -1); > + btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1); > > /* force writing the updated state information to disk */ > trans = btrfs_start_transaction(root, 0); > @@ -493,7 +493,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, > mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); > return ret; > } > - btrfs_wait_ordered_roots(root->fs_info, -1); > + btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1); > > trans = btrfs_start_transaction(root, 0); > if (IS_ERR(trans)) { > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index 53e1297..0544f70 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -4141,7 +4141,7 @@ commit_trans: > > if (need_commit > 0) { > btrfs_start_delalloc_roots(fs_info, 0, -1); > - btrfs_wait_ordered_roots(fs_info, -1); > + btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); > } > > trans = btrfs_join_transaction(root); > @@ -4583,7 +4583,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, > */ > btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); > if (!current->journal_info) > - btrfs_wait_ordered_roots(root->fs_info, nr_items); > + btrfs_wait_ordered_roots(root->fs_info, nr_items, > + 0, (u64)-1); > } > } > > @@ -4632,7 +4633,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, > if (trans) > return; > if (wait_ordered) > - btrfs_wait_ordered_roots(root->fs_info, items); > + btrfs_wait_ordered_roots(root->fs_info, items, > + 0, (u64)-1); > return; > } > > @@ -4671,7 +4673,8 @@ skip_async: > > loops++; > if (wait_ordered && !trans) { > - btrfs_wait_ordered_roots(root->fs_info, items); > + btrfs_wait_ordered_roots(root->fs_info, items, > + 0, (u64)-1); > } else { > time_left = schedule_timeout_killable(1); > if (time_left) > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > index 053e677..4d71273 100644 > --- a/fs/btrfs/ioctl.c > +++ b/fs/btrfs/ioctl.c > @@ -681,7 +681,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, > if (ret) > goto dec_and_free; > > - btrfs_wait_ordered_extents(root, -1); > + btrfs_wait_ordered_extents(root, -1, 0, (u64)-1); > > btrfs_init_block_rsv(&pending_snapshot->block_rsv, > BTRFS_BLOCK_RSV_TEMP); > diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c > index 41101b2..86c1cbb 100644 > --- a/fs/btrfs/ordered-data.c > +++ b/fs/btrfs/ordered-data.c > @@ -661,14 +661,15 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work) > * wait for all the ordered extents in a root. This is done when balancing > * space between drives. > */ > -int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) > +int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, > + const u64 range_start, const u64 range_len) > { > - struct list_head splice, works; > + LIST_HEAD(splice); > + LIST_HEAD(skipped); > + LIST_HEAD(works); > struct btrfs_ordered_extent *ordered, *next; > int count = 0; > - > - INIT_LIST_HEAD(&splice); > - INIT_LIST_HEAD(&works); > + const u64 range_end = range_start + range_len; > > mutex_lock(&root->ordered_extent_mutex); > spin_lock(&root->ordered_extent_lock); > @@ -676,6 +677,14 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) > while (!list_empty(&splice) && nr) { > ordered = list_first_entry(&splice, struct btrfs_ordered_extent, > root_extent_list); > + > + if (range_end <= ordered->start || > + ordered->start + ordered->disk_len <= range_start) { > + list_move_tail(&ordered->root_extent_list, &skipped); > + cond_resched_lock(&root->ordered_extent_lock); > + continue; > + } > + > list_move_tail(&ordered->root_extent_list, > &root->ordered_extents); > atomic_inc(&ordered->refs); > @@ -694,6 +703,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) > nr--; > count++; > } > + list_splice_tail(&skipped, &root->ordered_extents); It seems that 'skipped' is not necessary, we don't have to keep the order or something, however it's not a big deal. Others look sane to me. Reviewed-by: Liu Bo Thanks, -liubo > list_splice_tail(&splice, &root->ordered_extents); > spin_unlock(&root->ordered_extent_lock); > > @@ -708,7 +718,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) > return count; > } > > -int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) > +int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, > + const u64 range_start, const u64 range_len) > { > struct btrfs_root *root; > struct list_head splice; > @@ -729,7 +740,8 @@ int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) > &fs_info->ordered_roots); > spin_unlock(&fs_info->ordered_root_lock); > > - done = btrfs_wait_ordered_extents(root, nr); > + done = btrfs_wait_ordered_extents(root, nr, > + range_start, range_len); > btrfs_put_fs_root(root); > total_done += done; > > diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h > index f29e08e..82c011b 100644 > --- a/fs/btrfs/ordered-data.h > +++ b/fs/btrfs/ordered-data.h > @@ -197,8 +197,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, > struct btrfs_ordered_extent *ordered); > int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, > u32 *sum, int len); > -int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); > -int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); > +int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, > + const u64 range_start, const u64 range_len); > +int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, > + const u64 range_start, const u64 range_len); > void btrfs_get_logged_extents(struct inode *inode, > struct list_head *logged_list, > const loff_t start, > diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c > index f689be2..d768364 100644 > --- a/fs/btrfs/relocation.c > +++ b/fs/btrfs/relocation.c > @@ -4258,7 +4258,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) > err = ret; > goto out; > } > - ret = btrfs_wait_ordered_roots(fs_info, -1); > + ret = btrfs_wait_ordered_roots(fs_info, -1, > + rc->block_group->key.objectid, > + rc->block_group->key.offset); > /* > * We might have waited for an ordered extent to complete which was > * destined at an extent allocated from our block group. The extent item > diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c > index 00b8f37..89d1347 100644 > --- a/fs/btrfs/super.c > +++ b/fs/btrfs/super.c > @@ -1160,7 +1160,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) > return 0; > } > > - btrfs_wait_ordered_roots(fs_info, -1); > + btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); > > trans = btrfs_attach_transaction_barrier(root); > if (IS_ERR(trans)) { > diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c > index 43885e5..f0bb54a 100644 > --- a/fs/btrfs/transaction.c > +++ b/fs/btrfs/transaction.c > @@ -1821,7 +1821,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) > static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) > { > if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) > - btrfs_wait_ordered_roots(fs_info, -1); > + btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); > } > > static inline void > -- > 2.7.0.rc3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html