From mboxrd@z Thu Jan 1 00:00:00 1970 From: tristan Date: Mon, 08 Feb 2010 10:12:51 +0800 Subject: [Ocfs2-devel] [PATCH 1/2] Ocfs2: Optimize truncting codes for ocfs2 to use ocfs2_remove_btree_range instead. In-Reply-To: <1265366732-15895-1-git-send-email-tristan.ye@oracle.com> References: <1265366732-15895-1-git-send-email-tristan.ye@oracle.com> Message-ID: <4B6F7323.9090400@oracle.com> List-Id: MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: ocfs2-devel@oss.oracle.com Hi Joel, [PATCH 1/2] Ocfs2: Optimize truncting codes for ocfs2 to use ocfs2_remove_btree_range instead. [PATCH 2/2] Ocfs2: Fix punching hole codes to correctly do CoW during cluster zeroing. These 2 patches is the latest version about truncating optimization and punching-hole bugfix, they are sent on "02/05/2010 06:45 PM". They're tested comprehensively and also with tao's SOB, I think they're ready to go now:) Regards, Tristan. Tristan Ye wrote: > As we known, truncate is just a special case of punching holes(from new i_size > to end), we therefore could take advantage of existing ocfs2_remove_btree_range() > codes to reduce the comlexity and redundancy in alloc.c, the goal here is to make > truncate codes more generic and straightforward. > > Several former functions only used by ocfs2_commit_truncate() will be simply wiped off. > > ocfs2_remove_btree_range() was originally used by punching holes codes, which didn't > take refcount into account(definitely it's a BUG), we therefore need to change that > func a bit to handle refcount treee lock, calculate and reserve block for refcount > tree changes, also decrease refcount at the end, to move these logics in, we needs > to replace the ocfs2_lock_allocators() by adding a new func ocfs2_reserve_blocks_for_rec_trunc() > which accepts some extra blocks to reserve. such changes will not hurt any other codes > who're using ocfs2_remove_btree_range(such as dir truncate and punching holes), actually > punching holes codes do benefit from this. > > I merge the following steps into one patch since they may be logically doing one thing, > Though I knew it looks a little bit fat to review. > > 1). Remove redundant codes used by ocfs2_commit_truncate before, since we're moving to > ocfs2_remove_btree_range anyway. > > 2). Add a new func ocfs2_reserve_blocks_for_rec_trunc() for purpose of accepting some > extra blocks to reserve. > > 3). Change ocfs2_prepare_refcount_change_for_del() a bit to fit our needs, it's safe to > do this since it's only being called by truncating codes. > > 4). Change ocfs2_remove_btree_range() a bit to take refcount case into account. > > 5). Finally, we change ocfs2_commit_truncate() to call ocfs2_remove_btree_range() in > a proper way. > > The patch has been tested normally for sanity check, stress tests with heavier workload > will be expected. > > Based on this patch, our fixing to punching holes bug will be fairly easy. > > Signed-off-by: Tristan Ye > --- > fs/ocfs2/alloc.c | 691 +++++++++++------------------------------------ > fs/ocfs2/alloc.h | 6 +- > fs/ocfs2/dir.c | 2 +- > fs/ocfs2/file.c | 11 +- > fs/ocfs2/inode.c | 9 +- > fs/ocfs2/refcounttree.c | 29 +-- > fs/ocfs2/refcounttree.h | 4 +- > 7 files changed, 173 insertions(+), 579 deletions(-) > > diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c > index 38a42f5..e564999 100644 > --- a/fs/ocfs2/alloc.c > +++ b/fs/ocfs2/alloc.c > @@ -5670,19 +5670,97 @@ out: > return ret; > } > > +/* > + * ocfs2_reserve_blocks_for_rec_trunc() would look basically the > + * same as ocfs2_lock_alloctors(), except for it accepts a blocks > + * number to reserve some extra blocks, and it only handles meta > + * data allocations. > + * > + * Currently, only ocfs2_remove_btree_range() uses it for truncating > + * and punching holes. > + */ > +static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode, > + struct ocfs2_extent_tree *et, > + u32 extents_to_split, > + struct ocfs2_alloc_context **ac, > + int extra_blocks) > +{ > + int ret = 0, num_free_extents, blocks = extra_blocks; > + unsigned int max_recs_needed = 2 * extents_to_split; > + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); > + > + *ac = NULL; > + > + num_free_extents = ocfs2_num_free_extents(osb, et); > + if (num_free_extents < 0) { > + ret = num_free_extents; > + mlog_errno(ret); > + goto out; > + } > + > + if (!num_free_extents || > + (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) > + blocks += ocfs2_extend_meta_needed(et->et_root_el); > + > + if (blocks) { > + ret = ocfs2_reserve_new_metadata_blocks(osb, blocks, ac); > + if (ret < 0) { > + if (ret != -ENOSPC) > + mlog_errno(ret); > + goto out; > + } > + } > + > +out: > + if (ret) { > + if (*ac) { > + ocfs2_free_alloc_context(*ac); > + *ac = NULL; > + } > + } > + > + return ret; > +} > + > int ocfs2_remove_btree_range(struct inode *inode, > struct ocfs2_extent_tree *et, > u32 cpos, u32 phys_cpos, u32 len, > - struct ocfs2_cached_dealloc_ctxt *dealloc) > + struct ocfs2_cached_dealloc_ctxt *dealloc, > + u64 refcount_loc, int flags) > { > - int ret; > + int ret, credits = 0, extra_blocks = 0; > u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); > struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); > struct inode *tl_inode = osb->osb_tl_inode; > handle_t *handle; > struct ocfs2_alloc_context *meta_ac = NULL; > + struct ocfs2_refcount_tree *ref_tree = NULL; > + > + if ((flags & OCFS2_EXT_REFCOUNTED) && len) { > + BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & > + OCFS2_HAS_REFCOUNT_FL)); > + > + ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, > + &ref_tree, NULL); > + if (ret) { > + mlog_errno(ret); > + goto out; > + } > + > + ret = ocfs2_prepare_refcount_change_for_del(inode, > + refcount_loc, > + phys_blkno, > + len, > + &credits, > + &extra_blocks); > + if (ret < 0) { > + mlog_errno(ret); > + goto out; > + } > + } > > - ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac); > + ret = ocfs2_reserve_blocks_for_rec_trunc(inode, et, 1, &meta_ac, > + extra_blocks); > if (ret) { > mlog_errno(ret); > return ret; > @@ -5698,7 +5776,8 @@ int ocfs2_remove_btree_range(struct inode *inode, > } > } > > - handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); > + handle = ocfs2_start_trans(osb, > + ocfs2_remove_extent_credits(osb->sb) + credits); > if (IS_ERR(handle)) { > ret = PTR_ERR(handle); > mlog_errno(ret); > @@ -5729,9 +5808,20 @@ int ocfs2_remove_btree_range(struct inode *inode, > goto out_commit; > } > > - ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); > - if (ret) > - mlog_errno(ret); > + if (phys_blkno) { > + if (flags & OCFS2_EXT_REFCOUNTED) > + ret = ocfs2_decrease_refcount(inode, handle, > + ocfs2_blocks_to_clusters(osb->sb, > + phys_blkno), > + len, meta_ac, > + dealloc, 1); > + else > + ret = ocfs2_truncate_log_append(osb, handle, > + phys_blkno, len); > + if (ret) > + mlog_errno(ret); > + > + } > > out_commit: > ocfs2_commit_trans(osb, handle); > @@ -5741,6 +5831,9 @@ out: > if (meta_ac) > ocfs2_free_alloc_context(meta_ac); > > + if (ref_tree) > + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); > + > return ret; > } > > @@ -6576,429 +6669,6 @@ static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, > le16_to_cpu(eb->h_suballoc_bit)); > } > > -/* This function will figure out whether the currently last extent > - * block will be deleted, and if it will, what the new last extent > - * block will be so we can update his h_next_leaf_blk field, as well > - * as the dinodes i_last_eb_blk */ > -static int ocfs2_find_new_last_ext_blk(struct inode *inode, > - unsigned int clusters_to_del, > - struct ocfs2_path *path, > - struct buffer_head **new_last_eb) > -{ > - int next_free, ret = 0; > - u32 cpos; > - struct ocfs2_extent_rec *rec; > - struct ocfs2_extent_block *eb; > - struct ocfs2_extent_list *el; > - struct buffer_head *bh = NULL; > - > - *new_last_eb = NULL; > - > - /* we have no tree, so of course, no last_eb. */ > - if (!path->p_tree_depth) > - goto out; > - > - /* trunc to zero special case - this makes tree_depth = 0 > - * regardless of what it is. */ > - if (OCFS2_I(inode)->ip_clusters == clusters_to_del) > - goto out; > - > - el = path_leaf_el(path); > - BUG_ON(!el->l_next_free_rec); > - > - /* > - * Make sure that this extent list will actually be empty > - * after we clear away the data. We can shortcut out if > - * there's more than one non-empty extent in the > - * list. Otherwise, a check of the remaining extent is > - * necessary. > - */ > - next_free = le16_to_cpu(el->l_next_free_rec); > - rec = NULL; > - if (ocfs2_is_empty_extent(&el->l_recs[0])) { > - if (next_free > 2) > - goto out; > - > - /* We may have a valid extent in index 1, check it. */ > - if (next_free == 2) > - rec = &el->l_recs[1]; > - > - /* > - * Fall through - no more nonempty extents, so we want > - * to delete this leaf. > - */ > - } else { > - if (next_free > 1) > - goto out; > - > - rec = &el->l_recs[0]; > - } > - > - if (rec) { > - /* > - * Check it we'll only be trimming off the end of this > - * cluster. > - */ > - if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del) > - goto out; > - } > - > - ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); > - if (ret) { > - mlog_errno(ret); > - goto out; > - } > - > - ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh); > - if (ret) { > - mlog_errno(ret); > - goto out; > - } > - > - eb = (struct ocfs2_extent_block *) bh->b_data; > - el = &eb->h_list; > - > - /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block(). > - * Any corruption is a code bug. */ > - BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); > - > - *new_last_eb = bh; > - get_bh(*new_last_eb); > - mlog(0, "returning block %llu, (cpos: %u)\n", > - (unsigned long long)le64_to_cpu(eb->h_blkno), cpos); > -out: > - brelse(bh); > - > - return ret; > -} > - > -/* > - * Trim some clusters off the rightmost edge of a tree. Only called > - * during truncate. > - * > - * The caller needs to: > - * - start journaling of each path component. > - * - compute and fully set up any new last ext block > - */ > -static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, > - handle_t *handle, struct ocfs2_truncate_context *tc, > - u32 clusters_to_del, u64 *delete_start, u8 *flags) > -{ > - int ret, i, index = path->p_tree_depth; > - u32 new_edge = 0; > - u64 deleted_eb = 0; > - struct buffer_head *bh; > - struct ocfs2_extent_list *el; > - struct ocfs2_extent_rec *rec; > - > - *delete_start = 0; > - *flags = 0; > - > - while (index >= 0) { > - bh = path->p_node[index].bh; > - el = path->p_node[index].el; > - > - mlog(0, "traveling tree (index = %d, block = %llu)\n", > - index, (unsigned long long)bh->b_blocknr); > - > - BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); > - > - if (index != > - (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) { > - ocfs2_error(inode->i_sb, > - "Inode %lu has invalid ext. block %llu", > - inode->i_ino, > - (unsigned long long)bh->b_blocknr); > - ret = -EROFS; > - goto out; > - } > - > -find_tail_record: > - i = le16_to_cpu(el->l_next_free_rec) - 1; > - rec = &el->l_recs[i]; > - > - mlog(0, "Extent list before: record %d: (%u, %u, %llu), " > - "next = %u\n", i, le32_to_cpu(rec->e_cpos), > - ocfs2_rec_clusters(el, rec), > - (unsigned long long)le64_to_cpu(rec->e_blkno), > - le16_to_cpu(el->l_next_free_rec)); > - > - BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del); > - > - if (le16_to_cpu(el->l_tree_depth) == 0) { > - /* > - * If the leaf block contains a single empty > - * extent and no records, we can just remove > - * the block. > - */ > - if (i == 0 && ocfs2_is_empty_extent(rec)) { > - memset(rec, 0, > - sizeof(struct ocfs2_extent_rec)); > - el->l_next_free_rec = cpu_to_le16(0); > - > - goto delete; > - } > - > - /* > - * Remove any empty extents by shifting things > - * left. That should make life much easier on > - * the code below. This condition is rare > - * enough that we shouldn't see a performance > - * hit. > - */ > - if (ocfs2_is_empty_extent(&el->l_recs[0])) { > - le16_add_cpu(&el->l_next_free_rec, -1); > - > - for(i = 0; > - i < le16_to_cpu(el->l_next_free_rec); i++) > - el->l_recs[i] = el->l_recs[i + 1]; > - > - memset(&el->l_recs[i], 0, > - sizeof(struct ocfs2_extent_rec)); > - > - /* > - * We've modified our extent list. The > - * simplest way to handle this change > - * is to being the search from the > - * start again. > - */ > - goto find_tail_record; > - } > - > - le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del); > - > - /* > - * We'll use "new_edge" on our way back up the > - * tree to know what our rightmost cpos is. > - */ > - new_edge = le16_to_cpu(rec->e_leaf_clusters); > - new_edge += le32_to_cpu(rec->e_cpos); > - > - /* > - * The caller will use this to delete data blocks. > - */ > - *delete_start = le64_to_cpu(rec->e_blkno) > - + ocfs2_clusters_to_blocks(inode->i_sb, > - le16_to_cpu(rec->e_leaf_clusters)); > - *flags = rec->e_flags; > - > - /* > - * If it's now empty, remove this record. > - */ > - if (le16_to_cpu(rec->e_leaf_clusters) == 0) { > - memset(rec, 0, > - sizeof(struct ocfs2_extent_rec)); > - le16_add_cpu(&el->l_next_free_rec, -1); > - } > - } else { > - if (le64_to_cpu(rec->e_blkno) == deleted_eb) { > - memset(rec, 0, > - sizeof(struct ocfs2_extent_rec)); > - le16_add_cpu(&el->l_next_free_rec, -1); > - > - goto delete; > - } > - > - /* Can this actually happen? */ > - if (le16_to_cpu(el->l_next_free_rec) == 0) > - goto delete; > - > - /* > - * We never actually deleted any clusters > - * because our leaf was empty. There's no > - * reason to adjust the rightmost edge then. > - */ > - if (new_edge == 0) > - goto delete; > - > - rec->e_int_clusters = cpu_to_le32(new_edge); > - le32_add_cpu(&rec->e_int_clusters, > - -le32_to_cpu(rec->e_cpos)); > - > - /* > - * A deleted child record should have been > - * caught above. > - */ > - BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0); > - } > - > -delete: > - ret = ocfs2_journal_dirty(handle, bh); > - if (ret) { > - mlog_errno(ret); > - goto out; > - } > - > - mlog(0, "extent list container %llu, after: record %d: " > - "(%u, %u, %llu), next = %u.\n", > - (unsigned long long)bh->b_blocknr, i, > - le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec), > - (unsigned long long)le64_to_cpu(rec->e_blkno), > - le16_to_cpu(el->l_next_free_rec)); > - > - /* > - * We must be careful to only attempt delete of an > - * extent block (and not the root inode block). > - */ > - if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) { > - struct ocfs2_extent_block *eb = > - (struct ocfs2_extent_block *)bh->b_data; > - > - /* > - * Save this for use when processing the > - * parent block. > - */ > - deleted_eb = le64_to_cpu(eb->h_blkno); > - > - mlog(0, "deleting this extent block.\n"); > - > - ocfs2_remove_from_cache(INODE_CACHE(inode), bh); > - > - BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0])); > - BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); > - BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); > - > - ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb); > - /* An error here is not fatal. */ > - if (ret < 0) > - mlog_errno(ret); > - } else { > - deleted_eb = 0; > - } > - > - index--; > - } > - > - ret = 0; > -out: > - return ret; > -} > - > -static int ocfs2_do_truncate(struct ocfs2_super *osb, > - unsigned int clusters_to_del, > - struct inode *inode, > - struct buffer_head *fe_bh, > - handle_t *handle, > - struct ocfs2_truncate_context *tc, > - struct ocfs2_path *path, > - struct ocfs2_alloc_context *meta_ac) > -{ > - int status; > - struct ocfs2_dinode *fe; > - struct ocfs2_extent_block *last_eb = NULL; > - struct ocfs2_extent_list *el; > - struct buffer_head *last_eb_bh = NULL; > - u64 delete_blk = 0; > - u8 rec_flags; > - > - fe = (struct ocfs2_dinode *) fe_bh->b_data; > - > - status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del, > - path, &last_eb_bh); > - if (status < 0) { > - mlog_errno(status); > - goto bail; > - } > - > - /* > - * Each component will be touched, so we might as well journal > - * here to avoid having to handle errors later. > - */ > - status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path); > - if (status < 0) { > - mlog_errno(status); > - goto bail; > - } > - > - if (last_eb_bh) { > - status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh, > - OCFS2_JOURNAL_ACCESS_WRITE); > - if (status < 0) { > - mlog_errno(status); > - goto bail; > - } > - > - last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; > - } > - > - el = &(fe->id2.i_list); > - > - /* > - * Lower levels depend on this never happening, but it's best > - * to check it up here before changing the tree. > - */ > - if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) { > - ocfs2_error(inode->i_sb, > - "Inode %lu has an empty extent record, depth %u\n", > - inode->i_ino, le16_to_cpu(el->l_tree_depth)); > - status = -EROFS; > - goto bail; > - } > - > - vfs_dq_free_space_nodirty(inode, > - ocfs2_clusters_to_bytes(osb->sb, clusters_to_del)); > - spin_lock(&OCFS2_I(inode)->ip_lock); > - OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - > - clusters_to_del; > - spin_unlock(&OCFS2_I(inode)->ip_lock); > - le32_add_cpu(&fe->i_clusters, -clusters_to_del); > - inode->i_blocks = ocfs2_inode_sector_count(inode); > - > - status = ocfs2_trim_tree(inode, path, handle, tc, > - clusters_to_del, &delete_blk, &rec_flags); > - if (status) { > - mlog_errno(status); > - goto bail; > - } > - > - if (le32_to_cpu(fe->i_clusters) == 0) { > - /* trunc to zero is a special case. */ > - el->l_tree_depth = 0; > - fe->i_last_eb_blk = 0; > - } else if (last_eb) > - fe->i_last_eb_blk = last_eb->h_blkno; > - > - status = ocfs2_journal_dirty(handle, fe_bh); > - if (status < 0) { > - mlog_errno(status); > - goto bail; > - } > - > - if (last_eb) { > - /* If there will be a new last extent block, then by > - * definition, there cannot be any leaves to the right of > - * him. */ > - last_eb->h_next_leaf_blk = 0; > - status = ocfs2_journal_dirty(handle, last_eb_bh); > - if (status < 0) { > - mlog_errno(status); > - goto bail; > - } > - } > - > - if (delete_blk) { > - if (rec_flags & OCFS2_EXT_REFCOUNTED) > - status = ocfs2_decrease_refcount(inode, handle, > - ocfs2_blocks_to_clusters(osb->sb, > - delete_blk), > - clusters_to_del, meta_ac, > - &tc->tc_dealloc, 1); > - else > - status = ocfs2_truncate_log_append(osb, handle, > - delete_blk, > - clusters_to_del); > - if (status < 0) { > - mlog_errno(status); > - goto bail; > - } > - } > - status = 0; > -bail: > - brelse(last_eb_bh); > - mlog_exit(status); > - return status; > -} > - > static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) > { > set_buffer_uptodate(bh); > @@ -7406,22 +7076,25 @@ out: > */ > int ocfs2_commit_truncate(struct ocfs2_super *osb, > struct inode *inode, > - struct buffer_head *fe_bh, > - struct ocfs2_truncate_context *tc) > + struct buffer_head *fe_bh) > { > - int status, i, credits, tl_sem = 0; > - u32 clusters_to_del, new_highest_cpos, range; > + int status = 0, i, flags = 0; > + u32 new_highest_cpos, range, trunc_cpos, trunc_len, phys_cpos, coff; > u64 blkno = 0; > struct ocfs2_extent_list *el; > - handle_t *handle = NULL; > - struct inode *tl_inode = osb->osb_tl_inode; > + struct ocfs2_extent_rec *rec; > struct ocfs2_path *path = NULL; > struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; > - struct ocfs2_alloc_context *meta_ac = NULL; > - struct ocfs2_refcount_tree *ref_tree = NULL; > + struct ocfs2_extent_list *root_el = &(di->id2.i_list); > + u64 refcount_loc = le64_to_cpu(di->i_refcount_loc); > + struct ocfs2_extent_tree et; > + struct ocfs2_cached_dealloc_ctxt dealloc; > > mlog_entry_void(); > > + ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh); > + ocfs2_init_dealloc_ctxt(&dealloc); > + > new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, > i_size_read(inode)); > > @@ -7444,8 +7117,6 @@ start: > goto bail; > } > > - credits = 0; > - > /* > * Truncate always works against the rightmost tree branch. > */ > @@ -7480,101 +7151,62 @@ start: > } > > i = le16_to_cpu(el->l_next_free_rec) - 1; > - range = le32_to_cpu(el->l_recs[i].e_cpos) + > - ocfs2_rec_clusters(el, &el->l_recs[i]); > - if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { > - clusters_to_del = 0; > - } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { > - clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); > - blkno = le64_to_cpu(el->l_recs[i].e_blkno); > + rec = &el->l_recs[i]; > + flags = rec->e_flags; > + range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); > + > + if (i == 0 && ocfs2_is_empty_extent(rec)) { > + /* > + * Lower levels depend on this never happening, but it's best > + * to check it up here before changing the tree. > + */ > + if (root_el->l_tree_depth && rec->e_int_clusters == 0) { > + ocfs2_error(inode->i_sb, "Inode %lu has an empty " > + "extent record, depth %u\n", inode->i_ino, > + le16_to_cpu(root_el->l_tree_depth)); > + status = -EROFS; > + goto bail; > + } > + trunc_cpos = le32_to_cpu(rec->e_cpos); > + trunc_len = 0; > + blkno = 0; > + } else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) { > + /* > + * Truncate entire record. > + */ > + trunc_cpos = le32_to_cpu(rec->e_cpos); > + trunc_len = ocfs2_rec_clusters(el, rec); > + blkno = le64_to_cpu(rec->e_blkno); > } else if (range > new_highest_cpos) { > - clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + > - le32_to_cpu(el->l_recs[i].e_cpos)) - > - new_highest_cpos; > - blkno = le64_to_cpu(el->l_recs[i].e_blkno) + > - ocfs2_clusters_to_blocks(inode->i_sb, > - ocfs2_rec_clusters(el, &el->l_recs[i]) - > - clusters_to_del); > + /* > + * Partial truncate. it also should be > + * the last truncate we're doing. > + */ > + trunc_cpos = new_highest_cpos; > + trunc_len = range - new_highest_cpos; > + coff = new_highest_cpos - le32_to_cpu(rec->e_cpos); > + blkno = le64_to_cpu(rec->e_blkno) + > + ocfs2_clusters_to_blocks(inode->i_sb, coff); > } else { > + /* > + * Truncate completed, leave happily. > + */ > status = 0; > goto bail; > } > > - mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", > - clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); > - > - if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) { > - BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & > - OCFS2_HAS_REFCOUNT_FL)); > - > - status = ocfs2_lock_refcount_tree(osb, > - le64_to_cpu(di->i_refcount_loc), > - 1, &ref_tree, NULL); > - if (status) { > - mlog_errno(status); > - goto bail; > - } > - > - status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh, > - blkno, > - clusters_to_del, > - &credits, > - &meta_ac); > - if (status < 0) { > - mlog_errno(status); > - goto bail; > - } > - } > - > - mutex_lock(&tl_inode->i_mutex); > - tl_sem = 1; > - /* ocfs2_truncate_log_needs_flush guarantees us at least one > - * record is free for use. If there isn't any, we flush to get > - * an empty truncate log. */ > - if (ocfs2_truncate_log_needs_flush(osb)) { > - status = __ocfs2_flush_truncate_log(osb); > - if (status < 0) { > - mlog_errno(status); > - goto bail; > - } > - } > - > - credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, > - (struct ocfs2_dinode *)fe_bh->b_data, > - el); > - handle = ocfs2_start_trans(osb, credits); > - if (IS_ERR(handle)) { > - status = PTR_ERR(handle); > - handle = NULL; > - mlog_errno(status); > - goto bail; > - } > + phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno); > > - status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, > - tc, path, meta_ac); > + status = ocfs2_remove_btree_range(inode, &et, trunc_cpos, > + phys_cpos, trunc_len, &dealloc, > + refcount_loc, flags); > if (status < 0) { > mlog_errno(status); > goto bail; > } > > - mutex_unlock(&tl_inode->i_mutex); > - tl_sem = 0; > - > - ocfs2_commit_trans(osb, handle); > - handle = NULL; > - > ocfs2_reinit_path(path, 1); > > - if (meta_ac) { > - ocfs2_free_alloc_context(meta_ac); > - meta_ac = NULL; > - } > - > - if (ref_tree) { > - ocfs2_unlock_refcount_tree(osb, ref_tree, 1); > - ref_tree = NULL; > - } > - > /* > * The check above will catch the case where we've truncated > * away all allocation. > @@ -7585,25 +7217,10 @@ bail: > > ocfs2_schedule_truncate_log_flush(osb, 1); > > - if (tl_sem) > - mutex_unlock(&tl_inode->i_mutex); > - > - if (handle) > - ocfs2_commit_trans(osb, handle); > - > - if (meta_ac) > - ocfs2_free_alloc_context(meta_ac); > - > - if (ref_tree) > - ocfs2_unlock_refcount_tree(osb, ref_tree, 1); > - > - ocfs2_run_deallocs(osb, &tc->tc_dealloc); > + ocfs2_run_deallocs(osb, &dealloc); > > ocfs2_free_path(path); > > - /* This will drop the ext_alloc cluster lock for us */ > - ocfs2_free_truncate_context(tc); > - > mlog_exit(status); > return status; > } > diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h > index 9c122d5..0cc7e7f 100644 > --- a/fs/ocfs2/alloc.h > +++ b/fs/ocfs2/alloc.h > @@ -141,7 +141,8 @@ int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et, > int ocfs2_remove_btree_range(struct inode *inode, > struct ocfs2_extent_tree *et, > u32 cpos, u32 phys_cpos, u32 len, > - struct ocfs2_cached_dealloc_ctxt *dealloc); > + struct ocfs2_cached_dealloc_ctxt *dealloc, > + u64 refcount_loc, int flags); > > int ocfs2_num_free_extents(struct ocfs2_super *osb, > struct ocfs2_extent_tree *et); > @@ -233,8 +234,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, > struct ocfs2_truncate_context **tc); > int ocfs2_commit_truncate(struct ocfs2_super *osb, > struct inode *inode, > - struct buffer_head *fe_bh, > - struct ocfs2_truncate_context *tc); > + struct buffer_head *fe_bh); > int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, > unsigned int start, unsigned int end, int trunc); > > diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c > index 28c3ec2..c45533b 100644 > --- a/fs/ocfs2/dir.c > +++ b/fs/ocfs2/dir.c > @@ -4557,7 +4557,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh) > p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); > > ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, > - &dealloc); > + &dealloc, 0, 0); > if (ret) { > mlog_errno(ret); > goto out; > diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c > index 89fc8ee..e0c9d1c 100644 > --- a/fs/ocfs2/file.c > +++ b/fs/ocfs2/file.c > @@ -446,7 +446,6 @@ static int ocfs2_truncate_file(struct inode *inode, > int status = 0; > struct ocfs2_dinode *fe = NULL; > struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); > - struct ocfs2_truncate_context *tc = NULL; > > mlog_entry("(inode = %llu, new_i_size = %llu\n", > (unsigned long long)OCFS2_I(inode)->ip_blkno, > @@ -514,13 +513,7 @@ static int ocfs2_truncate_file(struct inode *inode, > goto bail_unlock_sem; > } > > - status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); > - if (status < 0) { > - mlog_errno(status); > - goto bail_unlock_sem; > - } > - > - status = ocfs2_commit_truncate(osb, inode, di_bh, tc); > + status = ocfs2_commit_truncate(osb, inode, di_bh); > if (status < 0) { > mlog_errno(status); > goto bail_unlock_sem; > @@ -1499,7 +1492,7 @@ static int ocfs2_remove_inode_range(struct inode *inode, > if (phys_cpos != 0) { > ret = ocfs2_remove_btree_range(inode, &et, cpos, > phys_cpos, alloc_size, > - &dealloc); > + &dealloc, 0, 0); > if (ret) { > mlog_errno(ret); > goto out; > diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c > index 0297fb8..8ccea63 100644 > --- a/fs/ocfs2/inode.c > +++ b/fs/ocfs2/inode.c > @@ -540,7 +540,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, > struct buffer_head *fe_bh) > { > int status = 0; > - struct ocfs2_truncate_context *tc = NULL; > struct ocfs2_dinode *fe; > handle_t *handle = NULL; > > @@ -582,13 +581,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, > ocfs2_commit_trans(osb, handle); > handle = NULL; > > - status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc); > - if (status < 0) { > - mlog_errno(status); > - goto out; > - } > - > - status = ocfs2_commit_truncate(osb, inode, fe_bh, tc); > + status = ocfs2_commit_truncate(osb, inode, fe_bh); > if (status < 0) { > mlog_errno(status); > goto out; > diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c > index 60287fc..90a6fa3 100644 > --- a/fs/ocfs2/refcounttree.c > +++ b/fs/ocfs2/refcounttree.c > @@ -2432,20 +2432,19 @@ out: > * > * Normally the refcount blocks store these refcount should be > * continguous also, so that we can get the number easily. > - * As for meta_ac, we will at most add split 2 refcount record and > - * 2 more refcount block, so just check it in a rough way. > + * We will at most add split 2 refcount records and 2 more > + * refcount blocks, so just check it in a rough way. > * > * Caller must hold refcount tree lock. > */ > int ocfs2_prepare_refcount_change_for_del(struct inode *inode, > - struct buffer_head *di_bh, > + u64 refcount_loc, > u64 phys_blkno, > u32 clusters, > int *credits, > - struct ocfs2_alloc_context **meta_ac) > + int *ref_blocks) > { > - int ret, ref_blocks = 0; > - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; > + int ret; > struct ocfs2_inode_info *oi = OCFS2_I(inode); > struct buffer_head *ref_root_bh = NULL; > struct ocfs2_refcount_tree *tree; > @@ -2462,14 +2461,13 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, > BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); > > ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), > - le64_to_cpu(di->i_refcount_loc), &tree); > + refcount_loc, &tree); > if (ret) { > mlog_errno(ret); > goto out; > } > > - ret = ocfs2_read_refcount_block(&tree->rf_ci, > - le64_to_cpu(di->i_refcount_loc), > + ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc, > &ref_root_bh); > if (ret) { > mlog_errno(ret); > @@ -2480,21 +2478,14 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, > &tree->rf_ci, > ref_root_bh, > start_cpos, clusters, > - &ref_blocks, credits); > + ref_blocks, credits); > if (ret) { > mlog_errno(ret); > goto out; > } > > - mlog(0, "reserve new metadata %d, credits = %d\n", > - ref_blocks, *credits); > - > - if (ref_blocks) { > - ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), > - ref_blocks, meta_ac); > - if (ret) > - mlog_errno(ret); > - } > + mlog(0, "reserve new metadata %d blocks, credits = %d\n", > + *ref_blocks, *credits); > > out: > brelse(ref_root_bh); > diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h > index c1d19b1..9983ba1 100644 > --- a/fs/ocfs2/refcounttree.h > +++ b/fs/ocfs2/refcounttree.h > @@ -47,11 +47,11 @@ int ocfs2_decrease_refcount(struct inode *inode, > struct ocfs2_cached_dealloc_ctxt *dealloc, > int delete); > int ocfs2_prepare_refcount_change_for_del(struct inode *inode, > - struct buffer_head *di_bh, > + u64 refcount_loc, > u64 phys_blkno, > u32 clusters, > int *credits, > - struct ocfs2_alloc_context **meta_ac); > + int *ref_blocks); > int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, > u32 cpos, u32 write_len, u32 max_cpos); > >