* [PATCH 0/3] flush delalloc by multi-task
@ 2012-10-25 9:20 Miao Xie
2012-10-25 9:28 ` [PATCH 1/3] Btrfs: make delalloc inodes be flushed " Miao Xie
` (4 more replies)
0 siblings, 5 replies; 10+ messages in thread
From: Miao Xie @ 2012-10-25 9:20 UTC (permalink / raw)
To: Linux Btrfs
This patchset introduce multi-task delalloc flush, it can make the delalloc
flush more faster. And besides that, it also can fix the problem that we join
the same transaction handler more than 2 times.
Implementation:
- Create a new worker pool.
- Queue the inode with pending delalloc into the work queue of the worker pool
when we want to force them into the disk, and then we will wait till all the
works we submit are done.
- The ordered extents also can be queued into this work queue. The process is
similar to the second one.
Miao Xie (3):
Btrfs: make delalloc inodes be flushed by multi-task
Btrfs: make ordered operations be handled by multi-task
Btrfs: make ordered extent be flushed by multi-task
fs/btrfs/ctree.h | 14 +++++++
fs/btrfs/disk-io.c | 7 ++++
fs/btrfs/inode.c | 78 ++++++++++++++++++++++++++++++++++++++---
fs/btrfs/ordered-data.c | 87 ++++++++++++++++++++++++++++++++++-------------
fs/btrfs/ordered-data.h | 7 +++-
fs/btrfs/relocation.c | 6 +++-
fs/btrfs/transaction.c | 24 ++++++++++---
7 files changed, 185 insertions(+), 38 deletions(-)
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 1/3] Btrfs: make delalloc inodes be flushed by multi-task
2012-10-25 9:20 [PATCH 0/3] flush delalloc by multi-task Miao Xie
@ 2012-10-25 9:28 ` Miao Xie
2012-10-25 9:31 ` [PATCH 2/3] Btrfs: make ordered operations be handled " Miao Xie
` (3 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: Miao Xie @ 2012-10-25 9:28 UTC (permalink / raw)
To: Linux Btrfs
This patch introduce a new worker pool named "flush_workers", and if we
want to force all the inode with pending delalloc to the disks, we can
queue those inodes into the work queue of the worker pool, in this way,
those inodes will be flushed by multi-task.
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
fs/btrfs/ctree.h | 14 ++++++++
fs/btrfs/disk-io.c | 7 ++++
fs/btrfs/inode.c | 78 ++++++++++++++++++++++++++++++++++++++++++++----
fs/btrfs/relocation.c | 6 +++-
fs/btrfs/transaction.c | 6 +++-
5 files changed, 103 insertions(+), 8 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 34c5a44..cd0c6d6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1333,6 +1333,7 @@ struct btrfs_fs_info {
struct btrfs_workers generic_worker;
struct btrfs_workers workers;
struct btrfs_workers delalloc_workers;
+ struct btrfs_workers flush_workers;
struct btrfs_workers endio_workers;
struct btrfs_workers endio_meta_workers;
struct btrfs_workers endio_meta_write_workers;
@@ -3271,6 +3272,19 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
/* inode.c */
+struct btrfs_delalloc_work {
+ struct inode *inode;
+ int wait;
+ int delay_iput;
+ struct completion completion;
+ struct list_head list;
+ struct btrfs_work work;
+};
+
+struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
+ int wait, int delay_iput);
+void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
+
struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
size_t pg_offset, u64 start, u64 len,
int create);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7cda519..bd70c28 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2279,6 +2279,10 @@ int open_ctree(struct super_block *sb,
fs_info->thread_pool_size,
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
+ fs_info->thread_pool_size,
+ &fs_info->generic_worker);
+
btrfs_init_workers(&fs_info->submit_workers, "submit",
min_t(u64, fs_devices->num_devices,
fs_info->thread_pool_size),
@@ -2350,6 +2354,7 @@ int open_ctree(struct super_block *sb,
ret |= btrfs_start_workers(&fs_info->delayed_workers);
ret |= btrfs_start_workers(&fs_info->caching_workers);
ret |= btrfs_start_workers(&fs_info->readahead_workers);
+ ret |= btrfs_start_workers(&fs_info->flush_workers);
if (ret) {
err = -ENOMEM;
goto fail_sb_buffer;
@@ -2667,6 +2672,7 @@ fail_sb_buffer:
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers);
btrfs_stop_workers(&fs_info->caching_workers);
+ btrfs_stop_workers(&fs_info->flush_workers);
fail_alloc:
fail_iput:
btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3339,6 +3345,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->delayed_workers);
btrfs_stop_workers(&fs_info->caching_workers);
btrfs_stop_workers(&fs_info->readahead_workers);
+ btrfs_stop_workers(&fs_info->flush_workers);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(root, CHECK_INTEGRITY))
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f92def2..290cd77 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -71,6 +71,7 @@ static const struct file_operations btrfs_dir_file_operations;
static struct extent_io_ops btrfs_extent_io_ops;
static struct kmem_cache *btrfs_inode_cachep;
+static struct kmem_cache *btrfs_delalloc_work_cachep;
struct kmem_cache *btrfs_trans_handle_cachep;
struct kmem_cache *btrfs_transaction_cachep;
struct kmem_cache *btrfs_path_cachep;
@@ -7199,6 +7200,8 @@ void btrfs_destroy_cachep(void)
kmem_cache_destroy(btrfs_path_cachep);
if (btrfs_free_space_cachep)
kmem_cache_destroy(btrfs_free_space_cachep);
+ if (btrfs_delalloc_work_cachep)
+ kmem_cache_destroy(btrfs_delalloc_work_cachep);
}
int btrfs_init_cachep(void)
@@ -7233,6 +7236,13 @@ int btrfs_init_cachep(void)
if (!btrfs_free_space_cachep)
goto fail;
+ btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work",
+ sizeof(struct btrfs_delalloc_work), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ NULL);
+ if (!btrfs_delalloc_work_cachep)
+ goto fail;
+
return 0;
fail:
btrfs_destroy_cachep();
@@ -7443,6 +7453,49 @@ out_notrans:
return ret;
}
+static void btrfs_run_delalloc_work(struct btrfs_work *work)
+{
+ struct btrfs_delalloc_work *delalloc_work;
+
+ delalloc_work = container_of(work, struct btrfs_delalloc_work,
+ work);
+ if (delalloc_work->wait)
+ btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1);
+ else
+ filemap_flush(delalloc_work->inode->i_mapping);
+
+ if (delalloc_work->delay_iput)
+ btrfs_add_delayed_iput(delalloc_work->inode);
+ else
+ iput(delalloc_work->inode);
+ complete(&delalloc_work->completion);
+}
+
+struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
+ int wait, int delay_iput)
+{
+ struct btrfs_delalloc_work *work;
+
+ work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS);
+ if (!work)
+ return NULL;
+
+ init_completion(&work->completion);
+ INIT_LIST_HEAD(&work->list);
+ work->inode = inode;
+ work->wait = wait;
+ work->delay_iput = delay_iput;
+ work->work.func = btrfs_run_delalloc_work;
+
+ return work;
+}
+
+void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
+{
+ wait_for_completion(&work->completion);
+ kmem_cache_free(btrfs_delalloc_work_cachep, work);
+}
+
/*
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
@@ -7452,10 +7505,15 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
struct list_head *head = &root->fs_info->delalloc_inodes;
struct btrfs_inode *binode;
struct inode *inode;
+ struct btrfs_delalloc_work *work, *next;
+ struct list_head works;
+ int ret = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
+ INIT_LIST_HEAD(&works);
+
spin_lock(&root->fs_info->delalloc_lock);
while (!list_empty(head)) {
binode = list_entry(head->next, struct btrfs_inode,
@@ -7465,11 +7523,14 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
list_del_init(&binode->delalloc_inodes);
spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
- filemap_flush(inode->i_mapping);
- if (delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
+ work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+ if (!work) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ list_add_tail(&work->list, &works);
+ btrfs_queue_worker(&root->fs_info->flush_workers,
+ &work->work);
}
cond_resched();
spin_lock(&root->fs_info->delalloc_lock);
@@ -7488,7 +7549,12 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
atomic_read(&root->fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&root->fs_info->async_submit_draining);
- return 0;
+out:
+ list_for_each_entry_safe(work, next, &works, list) {
+ list_del_init(&work->list);
+ btrfs_wait_and_free_delalloc_work(work);
+ }
+ return ret;
}
static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 776f0aa..5bef816 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4057,7 +4057,11 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
(unsigned long long)rc->block_group->key.objectid,
(unsigned long long)rc->block_group->flags);
- btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
+ ret = btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
+ if (ret < 0) {
+ err = ret;
+ goto out;
+ }
btrfs_wait_ordered_extents(fs_info->tree_root, 0);
while (1) {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 77db875..4aed529 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1499,7 +1499,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
WARN_ON(cur_trans != trans->transaction);
if (flush_on_commit || snap_pending) {
- btrfs_start_delalloc_inodes(root, 1);
+ ret = btrfs_start_delalloc_inodes(root, 1);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto cleanup_transaction;
+ }
btrfs_wait_ordered_extents(root, 1);
}
--
1.6.5.2
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/3] Btrfs: make ordered operations be handled by multi-task
2012-10-25 9:20 [PATCH 0/3] flush delalloc by multi-task Miao Xie
2012-10-25 9:28 ` [PATCH 1/3] Btrfs: make delalloc inodes be flushed " Miao Xie
@ 2012-10-25 9:31 ` Miao Xie
2012-10-25 9:41 ` [PATCH 3/3] Btrfs: make ordered extent be flushed " Miao Xie
` (2 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: Miao Xie @ 2012-10-25 9:31 UTC (permalink / raw)
To: Linux Btrfs
The process of the ordered operations is similar to the delalloc inode flush, so
we handle them by flush workers.
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
fs/btrfs/ordered-data.c | 46 ++++++++++++++++++++++++++++++----------------
fs/btrfs/ordered-data.h | 2 +-
fs/btrfs/transaction.c | 18 ++++++++++++++----
3 files changed, 45 insertions(+), 21 deletions(-)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7772f02..ab2a3c0 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -519,13 +519,17 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
* extra check to make sure the ordered operation list really is empty
* before we return
*/
-void btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
+int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
{
struct btrfs_inode *btrfs_inode;
struct inode *inode;
struct list_head splice;
+ struct list_head works;
+ struct btrfs_delalloc_work *work, *next;
+ int ret = 0;
INIT_LIST_HEAD(&splice);
+ INIT_LIST_HEAD(&works);
mutex_lock(&root->fs_info->ordered_operations_mutex);
spin_lock(&root->fs_info->ordered_extent_lock);
@@ -533,6 +537,7 @@ again:
list_splice_init(&root->fs_info->ordered_operations, &splice);
while (!list_empty(&splice)) {
+
btrfs_inode = list_entry(splice.next, struct btrfs_inode,
ordered_operations);
@@ -549,15 +554,26 @@ again:
list_add_tail(&BTRFS_I(inode)->ordered_operations,
&root->fs_info->ordered_operations);
}
+
+ if (!inode)
+ continue;
spin_unlock(&root->fs_info->ordered_extent_lock);
- if (inode) {
- if (wait)
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
- else
- filemap_flush(inode->i_mapping);
- btrfs_add_delayed_iput(inode);
+ work = btrfs_alloc_delalloc_work(inode, wait, 1);
+ if (!work) {
+ if (list_empty(&BTRFS_I(inode)->ordered_operations))
+ list_add_tail(&btrfs_inode->ordered_operations,
+ &splice);
+ spin_lock(&root->fs_info->ordered_extent_lock);
+ list_splice_tail(&splice,
+ &root->fs_info->ordered_operations);
+ spin_unlock(&root->fs_info->ordered_extent_lock);
+ ret = -ENOMEM;
+ goto out;
}
+ list_add_tail(&work->list, &works);
+ btrfs_queue_worker(&root->fs_info->flush_workers,
+ &work->work);
cond_resched();
spin_lock(&root->fs_info->ordered_extent_lock);
@@ -566,7 +582,13 @@ again:
goto again;
spin_unlock(&root->fs_info->ordered_extent_lock);
+out:
+ list_for_each_entry_safe(work, next, &works, list) {
+ list_del_init(&work->list);
+ btrfs_wait_and_free_delalloc_work(work);
+ }
mutex_unlock(&root->fs_info->ordered_operations_mutex);
+ return ret;
}
/*
@@ -934,15 +956,6 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
if (last_mod < root->fs_info->last_trans_committed)
return;
- /*
- * the transaction is already committing. Just start the IO and
- * don't bother with all of this list nonsense
- */
- if (trans && root->fs_info->running_transaction->blocked) {
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
- return;
- }
-
spin_lock(&root->fs_info->ordered_extent_lock);
if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
list_add_tail(&BTRFS_I(inode)->ordered_operations,
@@ -959,6 +972,7 @@ int __init ordered_data_init(void)
NULL);
if (!btrfs_ordered_extent_cache)
return -ENOMEM;
+
return 0;
}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index dd27a0b..e8dcec6 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -186,7 +186,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
-void btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
+int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4aed529..621790e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1414,15 +1414,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_transaction *prev_trans = NULL;
DEFINE_WAIT(wait);
- int ret = -EIO;
+ int ret;
int should_grow = 0;
unsigned long now = get_seconds();
int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
- btrfs_run_ordered_operations(root, 0);
+ ret = btrfs_run_ordered_operations(root, 0);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto cleanup_transaction;
+ }
- if (cur_trans->aborted)
+ if (cur_trans->aborted) {
+ ret = cur_trans->aborted;
goto cleanup_transaction;
+ }
/* make a pass through all the delayed refs we have so far
* any runnings procs may add more while we are here
@@ -1525,7 +1531,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
* it here and no for sure that nothing new will be added
* to the list
*/
- btrfs_run_ordered_operations(root, 1);
+ ret = btrfs_run_ordered_operations(root, 1);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto cleanup_transaction;
+ }
prepare_to_wait(&cur_trans->writer_wait, &wait,
TASK_UNINTERRUPTIBLE);
--
1.6.5.2
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/3] Btrfs: make ordered extent be flushed by multi-task
2012-10-25 9:20 [PATCH 0/3] flush delalloc by multi-task Miao Xie
2012-10-25 9:28 ` [PATCH 1/3] Btrfs: make delalloc inodes be flushed " Miao Xie
2012-10-25 9:31 ` [PATCH 2/3] Btrfs: make ordered operations be handled " Miao Xie
@ 2012-10-25 9:41 ` Miao Xie
2012-10-25 11:53 ` [PATCH 0/3] flush delalloc " Liu Bo
2012-11-01 7:43 ` Miao Xie
4 siblings, 0 replies; 10+ messages in thread
From: Miao Xie @ 2012-10-25 9:41 UTC (permalink / raw)
To: Linux Btrfs
Though the process of the ordered extents is a bit different with the delalloc inode
flush, but we can see it as a subset of the delalloc inode flush, so we also handle
them by flush workers.
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
fs/btrfs/ordered-data.c | 41 +++++++++++++++++++++++++++++++++--------
fs/btrfs/ordered-data.h | 5 ++++-
2 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index ab2a3c0..eecc20f 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -211,6 +211,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
init_waitqueue_head(&entry->wait);
INIT_LIST_HEAD(&entry->list);
INIT_LIST_HEAD(&entry->root_extent_list);
+ INIT_LIST_HEAD(&entry->work_list);
+ init_completion(&entry->completion);
trace_btrfs_ordered_extent_add(inode, entry);
@@ -464,18 +466,28 @@ void btrfs_remove_ordered_extent(struct inode *inode,
wake_up(&entry->wait);
}
+static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
+{
+ struct btrfs_ordered_extent *ordered;
+
+ ordered = container_of(work, struct btrfs_ordered_extent, flush_work);
+ btrfs_start_ordered_extent(ordered->inode, ordered, 1);
+ complete(&ordered->completion);
+}
+
/*
* wait for all the ordered extents in a root. This is done when balancing
* space between drives.
*/
void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
{
- struct list_head splice;
+ struct list_head splice, works;
struct list_head *cur;
- struct btrfs_ordered_extent *ordered;
+ struct btrfs_ordered_extent *ordered, *next;
struct inode *inode;
INIT_LIST_HEAD(&splice);
+ INIT_LIST_HEAD(&works);
spin_lock(&root->fs_info->ordered_extent_lock);
list_splice_init(&root->fs_info->ordered_extents, &splice);
@@ -494,19 +506,32 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
spin_unlock(&root->fs_info->ordered_extent_lock);
if (inode) {
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- if (delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
+ ordered->flush_work.func = btrfs_run_ordered_extent_work;
+ list_add_tail(&ordered->work_list, &works);
+ btrfs_queue_worker(&root->fs_info->flush_workers,
+ &ordered->flush_work);
} else {
btrfs_put_ordered_extent(ordered);
}
+ cond_resched();
spin_lock(&root->fs_info->ordered_extent_lock);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
+
+ list_for_each_entry_safe(ordered, next, &works, work_list) {
+ list_del_init(&ordered->work_list);
+ wait_for_completion(&ordered->completion);
+
+ inode = ordered->inode;
+ btrfs_put_ordered_extent(ordered);
+ if (delay_iput)
+ btrfs_add_delayed_iput(inode);
+ else
+ iput(inode);
+
+ cond_resched();
+ }
}
/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index e8dcec6..efc7c29 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -128,8 +128,11 @@ struct btrfs_ordered_extent {
struct list_head root_extent_list;
struct btrfs_work work;
-};
+ struct completion completion;
+ struct btrfs_work flush_work;
+ struct list_head work_list;
+};
/*
* calculates the total size you need to allocate for an ordered sum
--
1.6.5.2
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 0/3] flush delalloc by multi-task
2012-10-25 9:20 [PATCH 0/3] flush delalloc by multi-task Miao Xie
` (2 preceding siblings ...)
2012-10-25 9:41 ` [PATCH 3/3] Btrfs: make ordered extent be flushed " Miao Xie
@ 2012-10-25 11:53 ` Liu Bo
2012-10-26 1:56 ` Miao Xie
2012-11-01 7:43 ` Miao Xie
4 siblings, 1 reply; 10+ messages in thread
From: Liu Bo @ 2012-10-25 11:53 UTC (permalink / raw)
To: miaox; +Cc: Linux Btrfs
On 10/25/2012 05:20 PM, Miao Xie wrote:
> This patchset introduce multi-task delalloc flush, it can make the delalloc
> flush more faster. And besides that, it also can fix the problem that we join
> the same transaction handler more than 2 times.
>
> Implementation:
> - Create a new worker pool.
> - Queue the inode with pending delalloc into the work queue of the worker pool
> when we want to force them into the disk, and then we will wait till all the
> works we submit are done.
> - The ordered extents also can be queued into this work queue. The process is
> similar to the second one.
>
I can see the potential improvements brought by flushing inodes this way.
But I don't think it makes much sense by making waiting process multi-task,
since even we spread wait order extents into different cpus, they just occpied
the cpu and went on waiting and scheduled then, I mean, the bottleneck is on
what we're waiting for.
Besides, considering that this patchset is about to getting us better performance,
I'm expecting any performance numbers (I'm a little worried about context switches
overhead).
btw, cool ideas indeed.
thanks,
liubo
> Miao Xie (3):
> Btrfs: make delalloc inodes be flushed by multi-task
> Btrfs: make ordered operations be handled by multi-task
> Btrfs: make ordered extent be flushed by multi-task
>
> fs/btrfs/ctree.h | 14 +++++++
> fs/btrfs/disk-io.c | 7 ++++
> fs/btrfs/inode.c | 78 ++++++++++++++++++++++++++++++++++++++---
> fs/btrfs/ordered-data.c | 87 ++++++++++++++++++++++++++++++++++-------------
> fs/btrfs/ordered-data.h | 7 +++-
> fs/btrfs/relocation.c | 6 +++-
> fs/btrfs/transaction.c | 24 ++++++++++---
> 7 files changed, 185 insertions(+), 38 deletions(-)
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 0/3] flush delalloc by multi-task
2012-10-25 11:53 ` [PATCH 0/3] flush delalloc " Liu Bo
@ 2012-10-26 1:56 ` Miao Xie
2012-10-26 2:05 ` Liu Bo
0 siblings, 1 reply; 10+ messages in thread
From: Miao Xie @ 2012-10-26 1:56 UTC (permalink / raw)
To: Liu Bo; +Cc: Linux Btrfs
On thu, 25 Oct 2012 19:53:05 +0800, Liu Bo wrote:
> On 10/25/2012 05:20 PM, Miao Xie wrote:
>> This patchset introduce multi-task delalloc flush, it can make the delalloc
>> flush more faster. And besides that, it also can fix the problem that we join
>> the same transaction handler more than 2 times.
>>
>> Implementation:
>> - Create a new worker pool.
>> - Queue the inode with pending delalloc into the work queue of the worker pool
>> when we want to force them into the disk, and then we will wait till all the
>> works we submit are done.
>> - The ordered extents also can be queued into this work queue. The process is
>> similar to the second one.
>>
>
> I can see the potential improvements brought by flushing inodes this way.
>
> But I don't think it makes much sense by making waiting process multi-task,
> since even we spread wait order extents into different cpus, they just occpied
> the cpu and went on waiting and scheduled then, I mean, the bottleneck is on
> what we're waiting for.
Thanks for your comment, I think only btrfs_run_ordered_operations(root, 0) needn't
wait for the works, the others must wait.
The first reason is to avoid changing the semantic of those tree function. The second
reason is we have to wait for the completion of all works, if not, the file data in
snapshots may be different with the source suvolumes because the flush may not end
before the snapshot creation.
> Besides, considering that this patchset is about to getting us better performance,
> I'm expecting any performance numbers (I'm a little worried about context switches
> overhead).
OK, I'll send it out later.
Thanks
Miao
> btw, cool ideas indeed.
>
> thanks,
> liubo
>
>> Miao Xie (3):
>> Btrfs: make delalloc inodes be flushed by multi-task
>> Btrfs: make ordered operations be handled by multi-task
>> Btrfs: make ordered extent be flushed by multi-task
>>
>> fs/btrfs/ctree.h | 14 +++++++
>> fs/btrfs/disk-io.c | 7 ++++
>> fs/btrfs/inode.c | 78 ++++++++++++++++++++++++++++++++++++++---
>> fs/btrfs/ordered-data.c | 87 ++++++++++++++++++++++++++++++++++-------------
>> fs/btrfs/ordered-data.h | 7 +++-
>> fs/btrfs/relocation.c | 6 +++-
>> fs/btrfs/transaction.c | 24 ++++++++++---
>> 7 files changed, 185 insertions(+), 38 deletions(-)
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 0/3] flush delalloc by multi-task
2012-10-26 1:56 ` Miao Xie
@ 2012-10-26 2:05 ` Liu Bo
2012-10-26 3:25 ` Miao Xie
0 siblings, 1 reply; 10+ messages in thread
From: Liu Bo @ 2012-10-26 2:05 UTC (permalink / raw)
To: miaox; +Cc: Linux Btrfs
On 10/26/2012 09:56 AM, Miao Xie wrote:
>> I can see the potential improvements brought by flushing inodes this way.
>> >
>> > But I don't think it makes much sense by making waiting process multi-task,
>> > since even we spread wait order extents into different cpus, they just occpied
>> > the cpu and went on waiting and scheduled then, I mean, the bottleneck is on
>> > what we're waiting for.
> Thanks for your comment, I think only btrfs_run_ordered_operations(root, 0) needn't
> wait for the works, the others must wait.
>
> The first reason is to avoid changing the semantic of those tree function. The second
> reason is we have to wait for the completion of all works, if not, the file data in
> snapshots may be different with the source suvolumes because the flush may not end
> before the snapshot creation.
>
Yes, it's right that they must wait for all workers to finish.
But I don't mean that(sorry for my confusing words).
IMO we don't need to let *btrfs_wait_ordered_extents()* run as multi-task.
thanks,
liubo
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 0/3] flush delalloc by multi-task
2012-10-26 2:05 ` Liu Bo
@ 2012-10-26 3:25 ` Miao Xie
2012-10-26 7:01 ` Liu Bo
0 siblings, 1 reply; 10+ messages in thread
From: Miao Xie @ 2012-10-26 3:25 UTC (permalink / raw)
To: Liu Bo; +Cc: Linux Btrfs
On Fri, 26 Oct 2012 10:05:55 +0800, Liu Bo wrote:
> On 10/26/2012 09:56 AM, Miao Xie wrote:
>>> I can see the potential improvements brought by flushing inodes this way.
>>>>
>>>> But I don't think it makes much sense by making waiting process multi-task,
>>>> since even we spread wait order extents into different cpus, they just occpied
>>>> the cpu and went on waiting and scheduled then, I mean, the bottleneck is on
>>>> what we're waiting for.
>> Thanks for your comment, I think only btrfs_run_ordered_operations(root, 0) needn't
>> wait for the works, the others must wait.
>>
>> The first reason is to avoid changing the semantic of those tree function. The second
>> reason is we have to wait for the completion of all works, if not, the file data in
>> snapshots may be different with the source suvolumes because the flush may not end
>> before the snapshot creation.
>>
>
> Yes, it's right that they must wait for all workers to finish.
>
> But I don't mean that(sorry for my confusing words).
>
> IMO we don't need to let *btrfs_wait_ordered_extents()* run as multi-task.
It also need to be done by multi-task because btrfs_wait_ordered_extents() doesn't imply
that all the dirty pages in the ordered extent have been written into the disk, that is
it also need do lots of things before waiting for the event - BTRFS_ORDERED_COMPLETE, so
the multi-task process is useful, I think.
Anyway, we need test to validate it.
Thanks
Miao
>
> thanks,
> liubo
>
>
>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 0/3] flush delalloc by multi-task
2012-10-26 3:25 ` Miao Xie
@ 2012-10-26 7:01 ` Liu Bo
0 siblings, 0 replies; 10+ messages in thread
From: Liu Bo @ 2012-10-26 7:01 UTC (permalink / raw)
To: miaox; +Cc: Linux Btrfs
On 10/26/2012 11:25 AM, Miao Xie wrote:
> On Fri, 26 Oct 2012 10:05:55 +0800, Liu Bo wrote:
>> On 10/26/2012 09:56 AM, Miao Xie wrote:
>>>> I can see the potential improvements brought by flushing inodes this way.
>>>>>
>>>>> But I don't think it makes much sense by making waiting process multi-task,
>>>>> since even we spread wait order extents into different cpus, they just occpied
>>>>> the cpu and went on waiting and scheduled then, I mean, the bottleneck is on
>>>>> what we're waiting for.
>>> Thanks for your comment, I think only btrfs_run_ordered_operations(root, 0) needn't
>>> wait for the works, the others must wait.
>>>
>>> The first reason is to avoid changing the semantic of those tree function. The second
>>> reason is we have to wait for the completion of all works, if not, the file data in
>>> snapshots may be different with the source suvolumes because the flush may not end
>>> before the snapshot creation.
>>>
>>
>> Yes, it's right that they must wait for all workers to finish.
>>
>> But I don't mean that(sorry for my confusing words).
>>
>> IMO we don't need to let *btrfs_wait_ordered_extents()* run as multi-task.
>
> It also need to be done by multi-task because btrfs_wait_ordered_extents() doesn't imply
> that all the dirty pages in the ordered extent have been written into the disk, that is
> it also need do lots of things before waiting for the event - BTRFS_ORDERED_COMPLETE, so
> the multi-task process is useful, I think.
>
Well, I missed the flushing part.
> Anyway, we need test to validate it.
>
> Thanks
> Miao
>
>>
>> thanks,
>> liubo
>>
>>
>>
>>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 0/3] flush delalloc by multi-task
2012-10-25 9:20 [PATCH 0/3] flush delalloc by multi-task Miao Xie
` (3 preceding siblings ...)
2012-10-25 11:53 ` [PATCH 0/3] flush delalloc " Liu Bo
@ 2012-11-01 7:43 ` Miao Xie
4 siblings, 0 replies; 10+ messages in thread
From: Miao Xie @ 2012-11-01 7:43 UTC (permalink / raw)
To: Josef Bacik; +Cc: Linux Btrfs
Hi, Josef
Please drop this patchset from your btrfs-next tree because it may cause the
performance regression in some cases. I'll improve it later.
Thanks
Miao
On thu, 25 Oct 2012 17:20:29 +0800, Miao Xie wrote:
> This patchset introduce multi-task delalloc flush, it can make the delalloc
> flush more faster. And besides that, it also can fix the problem that we join
> the same transaction handler more than 2 times.
>
> Implementation:
> - Create a new worker pool.
> - Queue the inode with pending delalloc into the work queue of the worker pool
> when we want to force them into the disk, and then we will wait till all the
> works we submit are done.
> - The ordered extents also can be queued into this work queue. The process is
> similar to the second one.
>
> Miao Xie (3):
> Btrfs: make delalloc inodes be flushed by multi-task
> Btrfs: make ordered operations be handled by multi-task
> Btrfs: make ordered extent be flushed by multi-task
>
> fs/btrfs/ctree.h | 14 +++++++
> fs/btrfs/disk-io.c | 7 ++++
> fs/btrfs/inode.c | 78 ++++++++++++++++++++++++++++++++++++++---
> fs/btrfs/ordered-data.c | 87 ++++++++++++++++++++++++++++++++++-------------
> fs/btrfs/ordered-data.h | 7 +++-
> fs/btrfs/relocation.c | 6 +++-
> fs/btrfs/transaction.c | 24 ++++++++++---
> 7 files changed, 185 insertions(+), 38 deletions(-)
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2012-11-01 7:43 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-10-25 9:20 [PATCH 0/3] flush delalloc by multi-task Miao Xie
2012-10-25 9:28 ` [PATCH 1/3] Btrfs: make delalloc inodes be flushed " Miao Xie
2012-10-25 9:31 ` [PATCH 2/3] Btrfs: make ordered operations be handled " Miao Xie
2012-10-25 9:41 ` [PATCH 3/3] Btrfs: make ordered extent be flushed " Miao Xie
2012-10-25 11:53 ` [PATCH 0/3] flush delalloc " Liu Bo
2012-10-26 1:56 ` Miao Xie
2012-10-26 2:05 ` Liu Bo
2012-10-26 3:25 ` Miao Xie
2012-10-26 7:01 ` Liu Bo
2012-11-01 7:43 ` Miao Xie
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).