From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from cn.fujitsu.com ([59.151.112.132]:45816 "EHLO heian.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-FAIL) by vger.kernel.org with ESMTP id S1751780AbcC3A04 (ORCPT ); Tue, 29 Mar 2016 20:26:56 -0400 Subject: Re: [PATCH v8 11/27] btrfs: dedupe: Introduce interfaces to resume and cleanup dedupe info To: Alex Lyakas References: <1458610552-9845-1-git-send-email-quwenruo@cn.fujitsu.com> <1458610552-9845-12-git-send-email-quwenruo@cn.fujitsu.com> CC: linux-btrfs , Wang Xiaoguang From: Qu Wenruo Message-ID: <56FB1D43.4080302@cn.fujitsu.com> Date: Wed, 30 Mar 2016 08:26:43 +0800 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset="utf-8"; format=flowed Sender: linux-btrfs-owner@vger.kernel.org List-ID: Alex Lyakas wrote on 2016/03/29 19:31 +0200: > Hi Qu, Wang, > > On Tue, Mar 22, 2016 at 3:35 AM, Qu Wenruo wrote: >> Since we will introduce a new on-disk based dedupe method, introduce new >> interfaces to resume previous dedupe setup. >> >> And since we introduce a new tree for status, also add disable handler >> for it. >> >> Signed-off-by: Wang Xiaoguang >> Signed-off-by: Qu Wenruo >> --- >> fs/btrfs/dedupe.c | 269 +++++++++++++++++++++++++++++++++++++++++++++++++---- >> fs/btrfs/dedupe.h | 13 +++ >> fs/btrfs/disk-io.c | 21 ++++- >> fs/btrfs/disk-io.h | 1 + >> 4 files changed, 283 insertions(+), 21 deletions(-) >> >> diff --git a/fs/btrfs/dedupe.c b/fs/btrfs/dedupe.c >> index 7ef2c37..1112fec 100644 >> --- a/fs/btrfs/dedupe.c >> +++ b/fs/btrfs/dedupe.c >> @@ -21,6 +21,8 @@ >> #include "transaction.h" >> #include "delayed-ref.h" >> #include "qgroup.h" >> +#include "disk-io.h" >> +#include "locking.h" >> >> struct inmem_hash { >> struct rb_node hash_node; >> @@ -41,10 +43,103 @@ static inline struct inmem_hash *inmem_alloc_hash(u16 type) >> GFP_NOFS); >> } >> >> +static int init_dedupe_info(struct btrfs_dedupe_info **ret_info, u16 type, >> + u16 backend, u64 blocksize, u64 limit) >> +{ >> + struct btrfs_dedupe_info *dedupe_info; >> + >> + dedupe_info = kzalloc(sizeof(*dedupe_info), GFP_NOFS); >> + if (!dedupe_info) >> + return -ENOMEM; >> + >> + dedupe_info->hash_type = type; >> + dedupe_info->backend = backend; >> + dedupe_info->blocksize = blocksize; >> + dedupe_info->limit_nr = limit; >> + >> + /* only support SHA256 yet */ >> + dedupe_info->dedupe_driver = crypto_alloc_shash("sha256", 0, 0); >> + if (IS_ERR(dedupe_info->dedupe_driver)) { >> + int ret; >> + >> + ret = PTR_ERR(dedupe_info->dedupe_driver); >> + kfree(dedupe_info); >> + return ret; >> + } >> + >> + dedupe_info->hash_root = RB_ROOT; >> + dedupe_info->bytenr_root = RB_ROOT; >> + dedupe_info->current_nr = 0; >> + INIT_LIST_HEAD(&dedupe_info->lru_list); >> + mutex_init(&dedupe_info->lock); >> + >> + *ret_info = dedupe_info; >> + return 0; >> +} >> + >> +static int init_dedupe_tree(struct btrfs_fs_info *fs_info, >> + struct btrfs_dedupe_info *dedupe_info) >> +{ >> + struct btrfs_root *dedupe_root; >> + struct btrfs_key key; >> + struct btrfs_path *path; >> + struct btrfs_dedupe_status_item *status; >> + struct btrfs_trans_handle *trans; >> + int ret; >> + >> + path = btrfs_alloc_path(); >> + if (!path) >> + return -ENOMEM; >> + >> + trans = btrfs_start_transaction(fs_info->tree_root, 2); >> + if (IS_ERR(trans)) { >> + ret = PTR_ERR(trans); >> + goto out; >> + } >> + dedupe_root = btrfs_create_tree(trans, fs_info, >> + BTRFS_DEDUPE_TREE_OBJECTID); >> + if (IS_ERR(dedupe_root)) { >> + ret = PTR_ERR(dedupe_root); >> + btrfs_abort_transaction(trans, fs_info->tree_root, ret); >> + goto out; >> + } >> + dedupe_info->dedupe_root = dedupe_root; >> + >> + key.objectid = 0; >> + key.type = BTRFS_DEDUPE_STATUS_ITEM_KEY; >> + key.offset = 0; >> + >> + ret = btrfs_insert_empty_item(trans, dedupe_root, path, &key, >> + sizeof(*status)); >> + if (ret < 0) { >> + btrfs_abort_transaction(trans, fs_info->tree_root, ret); >> + goto out; >> + } >> + >> + status = btrfs_item_ptr(path->nodes[0], path->slots[0], >> + struct btrfs_dedupe_status_item); >> + btrfs_set_dedupe_status_blocksize(path->nodes[0], status, >> + dedupe_info->blocksize); >> + btrfs_set_dedupe_status_limit(path->nodes[0], status, >> + dedupe_info->limit_nr); >> + btrfs_set_dedupe_status_hash_type(path->nodes[0], status, >> + dedupe_info->hash_type); >> + btrfs_set_dedupe_status_backend(path->nodes[0], status, >> + dedupe_info->backend); >> + btrfs_mark_buffer_dirty(path->nodes[0]); >> +out: >> + btrfs_free_path(path); >> + if (ret == 0) >> + btrfs_commit_transaction(trans, fs_info->tree_root); >> + return ret; >> +} >> + >> int btrfs_dedupe_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend, >> u64 blocksize, u64 limit_nr) >> { >> struct btrfs_dedupe_info *dedupe_info; >> + int create_tree; >> + u64 compat_ro_flag = btrfs_super_compat_ro_flags(fs_info->super_copy); >> u64 limit = limit_nr; >> int ret = 0; >> >> @@ -63,6 +158,14 @@ int btrfs_dedupe_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend, >> limit = BTRFS_DEDUPE_LIMIT_NR_DEFAULT; >> if (backend == BTRFS_DEDUPE_BACKEND_ONDISK && limit_nr != 0) >> limit = 0; >> + /* Ondisk backend needs DEDUP RO compat feature */ >> + if (!(compat_ro_flag & BTRFS_FEATURE_COMPAT_RO_DEDUPE) && >> + backend == BTRFS_DEDUPE_BACKEND_ONDISK) >> + return -EOPNOTSUPP; >> + >> + /* Meaningless and unable to enable dedupe for RO fs */ >> + if (fs_info->sb->s_flags & MS_RDONLY) >> + return -EROFS; >> >> dedupe_info = fs_info->dedupe_info; >> if (dedupe_info) { >> @@ -81,29 +184,71 @@ int btrfs_dedupe_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend, >> return 0; >> } >> >> + dedupe_info = NULL; >> enable: >> - dedupe_info = kzalloc(sizeof(*dedupe_info), GFP_NOFS); >> - if (dedupe_info) >> + create_tree = compat_ro_flag & BTRFS_FEATURE_COMPAT_RO_DEDUPE; >> + >> + ret = init_dedupe_info(&dedupe_info, type, backend, blocksize, limit); >> + if (ret < 0) >> + return ret; >> + if (create_tree) { >> + ret = init_dedupe_tree(fs_info, dedupe_info); >> + if (ret < 0) >> + goto out; >> + } >> + >> + fs_info->dedupe_info = dedupe_info; > I think this leaks memory. If previously we had a valid > fs_info->dedupe_info, it will remain allocated. Please check previous patch, or the final dedupe.c. Just before enable tag: ------ dedupe_info = fs_info->dedupe_info; if (dedupe_info) { /* Check if we are re-enable for different dedupe config */ if (dedupe_info->blocksize != blocksize || dedupe_info->hash_type != type || dedupe_info->backend != backend) { btrfs_dedupe_disable(fs_info); goto enable; } /* On-fly limit change is OK */ mutex_lock(&dedupe_info->lock); fs_info->dedupe_info->limit_nr = limit; mutex_unlock(&dedupe_info->lock); return 0; } dedupe_info = NULL; ------ For any existing dedupe_info, btrfs_dedupe_enable() will either disable it or, just modify limit on-fly. So no leaking. Thanks for the review. Qu > > >> + /* We must ensure dedupe_enabled is modified after dedupe_info */ >> + smp_wmb(); >> + fs_info->dedupe_enabled = 1; >> +out: >> + if (ret < 0) { >> + crypto_free_shash(dedupe_info->dedupe_driver); >> + kfree(dedupe_info); >> + } >> + return ret; >> +} >> + >> +int btrfs_dedupe_resume(struct btrfs_fs_info *fs_info, >> + struct btrfs_root *dedupe_root) >> +{ >> + struct btrfs_dedupe_info *dedupe_info; >> + struct btrfs_dedupe_status_item *status; >> + struct btrfs_key key; >> + struct btrfs_path *path; >> + u64 blocksize; >> + u64 limit; >> + u16 type; >> + u16 backend; >> + int ret = 0; >> + >> + path = btrfs_alloc_path(); >> + if (!path) >> return -ENOMEM; >> >> - dedupe_info->hash_type = type; >> - dedupe_info->backend = backend; >> - dedupe_info->blocksize = blocksize; >> - dedupe_info->limit_nr = limit; >> + key.objectid = 0; >> + key.type = BTRFS_DEDUPE_STATUS_ITEM_KEY; >> + key.offset = 0; >> >> - /* Only support SHA256 yet */ >> - dedupe_info->dedupe_driver = crypto_alloc_shash("sha256", 0, 0); >> - if (IS_ERR(dedupe_info->dedupe_driver)) { >> - btrfs_err(fs_info, "failed to init sha256 driver"); >> - ret = PTR_ERR(dedupe_info->dedupe_driver); >> + ret = btrfs_search_slot(NULL, dedupe_root, &key, path, 0, 0); >> + if (ret > 0) { >> + ret = -ENOENT; >> + goto out; >> + } else if (ret < 0) { >> goto out; >> } >> >> - dedupe_info->hash_root = RB_ROOT; >> - dedupe_info->bytenr_root = RB_ROOT; >> - dedupe_info->current_nr = 0; >> - INIT_LIST_HEAD(&dedupe_info->lru_list); >> - mutex_init(&dedupe_info->lock); >> + status = btrfs_item_ptr(path->nodes[0], path->slots[0], >> + struct btrfs_dedupe_status_item); >> + blocksize = btrfs_dedupe_status_blocksize(path->nodes[0], status); >> + limit = btrfs_dedupe_status_limit(path->nodes[0], status); >> + type = btrfs_dedupe_status_hash_type(path->nodes[0], status); >> + backend = btrfs_dedupe_status_backend(path->nodes[0], status); >> + >> + ret = init_dedupe_info(&dedupe_info, type, backend, blocksize, limit); >> + if (ret < 0) >> + goto out; >> + dedupe_info->dedupe_root = dedupe_root; >> >> fs_info->dedupe_info = dedupe_info; >> /* We must ensure dedupe_enabled is modified after dedupe_info */ >> @@ -111,11 +256,36 @@ enable: >> fs_info->dedupe_enabled = 1; >> >> out: >> - if (ret < 0) >> - kfree(dedupe_info); >> + btrfs_free_path(path); >> return ret; >> } >> >> +static void inmem_destroy(struct btrfs_dedupe_info *dedupe_info); >> +int btrfs_dedupe_cleanup(struct btrfs_fs_info *fs_info) >> +{ >> + struct btrfs_dedupe_info *dedupe_info; >> + >> + fs_info->dedupe_enabled = 0; >> + >> + /* same as disable */ >> + smp_wmb(); >> + dedupe_info = fs_info->dedupe_info; >> + fs_info->dedupe_info = NULL; >> + >> + if (!dedupe_info) >> + return 0; >> + >> + if (dedupe_info->backend == BTRFS_DEDUPE_BACKEND_INMEMORY) >> + inmem_destroy(dedupe_info); >> + if (dedupe_info->dedupe_root) { >> + free_root_extent_buffers(dedupe_info->dedupe_root); >> + kfree(dedupe_info->dedupe_root); >> + } >> + crypto_free_shash(dedupe_info->dedupe_driver); >> + kfree(dedupe_info); >> + return 0; >> +} >> + >> static int inmem_insert_hash(struct rb_root *root, >> struct inmem_hash *hash, int hash_len) >> { >> @@ -325,6 +495,65 @@ static void inmem_destroy(struct btrfs_dedupe_info *dedupe_info) >> mutex_unlock(&dedupe_info->lock); >> } >> >> +static int remove_dedupe_tree(struct btrfs_root *dedupe_root) >> +{ >> + struct btrfs_trans_handle *trans; >> + struct btrfs_fs_info *fs_info = dedupe_root->fs_info; >> + struct btrfs_path *path; >> + struct btrfs_key key; >> + struct extent_buffer *node; >> + int ret; >> + int nr; >> + >> + path = btrfs_alloc_path(); >> + if (!path) >> + return -ENOMEM; >> + trans = btrfs_start_transaction(fs_info->tree_root, 2); >> + if (IS_ERR(trans)) { >> + ret = PTR_ERR(trans); >> + goto out; >> + } >> + >> + path->leave_spinning = 1; >> + key.objectid = 0; >> + key.offset = 0; >> + key.type = 0; >> + >> + while (1) { >> + ret = btrfs_search_slot(trans, dedupe_root, &key, path, -1, 1); >> + if (ret < 0) >> + goto out; >> + node = path->nodes[0]; >> + nr = btrfs_header_nritems(node); >> + if (nr == 0) { >> + btrfs_release_path(path); >> + break; >> + } >> + path->slots[0] = 0; >> + ret = btrfs_del_items(trans, dedupe_root, path, 0, nr); >> + if (ret) >> + goto out; >> + btrfs_release_path(path); >> + } >> + >> + ret = btrfs_del_root(trans, fs_info->tree_root, &dedupe_root->root_key); >> + if (ret) >> + goto out; >> + >> + list_del(&dedupe_root->dirty_list); >> + btrfs_tree_lock(dedupe_root->node); >> + clean_tree_block(trans, fs_info, dedupe_root->node); >> + btrfs_tree_unlock(dedupe_root->node); >> + btrfs_free_tree_block(trans, dedupe_root, dedupe_root->node, 0, 1); >> + free_extent_buffer(dedupe_root->node); >> + free_extent_buffer(dedupe_root->commit_root); >> + kfree(dedupe_root); >> + ret = btrfs_commit_transaction(trans, fs_info->tree_root); >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> int btrfs_dedupe_disable(struct btrfs_fs_info *fs_info) >> { >> struct btrfs_dedupe_info *dedupe_info; >> @@ -358,10 +587,12 @@ int btrfs_dedupe_disable(struct btrfs_fs_info *fs_info) >> /* now we are OK to clean up everything */ >> if (dedupe_info->backend == BTRFS_DEDUPE_BACKEND_INMEMORY) >> inmem_destroy(dedupe_info); >> + if (dedupe_info->dedupe_root) >> + ret = remove_dedupe_tree(dedupe_info->dedupe_root); >> >> crypto_free_shash(dedupe_info->dedupe_driver); >> kfree(dedupe_info); >> - return 0; >> + return ret; >> } >> >> /* >> diff --git a/fs/btrfs/dedupe.h b/fs/btrfs/dedupe.h >> index 537f0b8..120e630 100644 >> --- a/fs/btrfs/dedupe.h >> +++ b/fs/btrfs/dedupe.h >> @@ -112,6 +112,19 @@ int btrfs_dedupe_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend, >> */ >> int btrfs_dedupe_disable(struct btrfs_fs_info *fs_info); >> >> + /* >> + * Restore previous dedupe setup from disk >> + * Called at mount time >> + */ >> +int btrfs_dedupe_resume(struct btrfs_fs_info *fs_info, >> + struct btrfs_root *dedupe_root); >> + >> +/* >> + * Cleanup current btrfs_dedupe_info >> + * Called in umount time >> + */ >> +int btrfs_dedupe_cleanup(struct btrfs_fs_info *fs_info); >> + >> /* >> * Calculate hash for dedup. >> * Caller must ensure [start, start + dedupe_bs) has valid data. >> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c >> index 57ae928..44d098d 100644 >> --- a/fs/btrfs/disk-io.c >> +++ b/fs/btrfs/disk-io.c >> @@ -51,6 +51,7 @@ >> #include "sysfs.h" >> #include "qgroup.h" >> #include "compression.h" >> +#include "dedupe.h" >> >> #ifdef CONFIG_X86 >> #include >> @@ -2156,7 +2157,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) >> btrfs_destroy_workqueue(fs_info->extent_workers); >> } >> >> -static void free_root_extent_buffers(struct btrfs_root *root) >> +void free_root_extent_buffers(struct btrfs_root *root) >> { >> if (root) { >> free_extent_buffer(root->node); >> @@ -2490,7 +2491,21 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info, >> fs_info->free_space_root = root; >> } >> >> - return 0; >> + location.objectid = BTRFS_DEDUPE_TREE_OBJECTID; >> + root = btrfs_read_tree_root(tree_root, &location); >> + if (IS_ERR(root)) { >> + ret = PTR_ERR(root); >> + if (ret != -ENOENT) >> + return ret; >> + return 0; >> + } >> + set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); >> + ret = btrfs_dedupe_resume(fs_info, root); >> + if (ret < 0) { >> + free_root_extent_buffers(root); >> + kfree(root); >> + } >> + return ret; >> } >> >> int open_ctree(struct super_block *sb, >> @@ -3885,6 +3900,8 @@ void close_ctree(struct btrfs_root *root) >> >> btrfs_free_qgroup_config(fs_info); >> >> + btrfs_dedupe_cleanup(fs_info); >> + >> if (percpu_counter_sum(&fs_info->delalloc_bytes)) { >> btrfs_info(fs_info, "at unmount delalloc count %lld", >> percpu_counter_sum(&fs_info->delalloc_bytes)); >> diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h >> index 8e79d00..42c4ff2 100644 >> --- a/fs/btrfs/disk-io.h >> +++ b/fs/btrfs/disk-io.h >> @@ -70,6 +70,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, >> int btrfs_init_fs_root(struct btrfs_root *root); >> int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, >> struct btrfs_root *root); >> +void free_root_extent_buffers(struct btrfs_root *root); >> void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); >> >> struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, >> -- >> 2.7.3 >> >> >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > >