* [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1
@ 2008-01-04 0:33 Tao Ma
2008-01-04 0:45 ` [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 Tao Ma
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Tao Ma @ 2008-01-04 0:33 UTC (permalink / raw)
To: ocfs2-devel
The old extent merging code down underneath "ocfs2_mark_extent_written()"
can't merge extents between leaf blocks. This patch resolve this.
So that a large unwritten extent which has been split up with a bunch of
writes can be merged together once all unwritten regions have been written to.
^ permalink raw reply [flat|nested] 9+ messages in thread* [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 2008-01-04 0:33 [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Tao Ma @ 2008-01-04 0:45 ` Tao Ma 2008-01-06 21:52 ` Mark Fasheh 2008-01-04 0:47 ` [Ocfs2-devel] [PATCH 2/2] Enable " Tao Ma 2008-01-06 13:44 ` [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Mark Fasheh 2 siblings, 1 reply; 9+ messages in thread From: Tao Ma @ 2008-01-04 0:45 UTC (permalink / raw) To: ocfs2-devel In ocfs2_merge_rec_left, when we find the merge extent is "CONTIG_RIGHT" with the first extent record of the next extent block, we will merge it to the next extent block and change all the related extent blocks accordingly. In ocfs2_merge_rec_right, when we find the merge extent is "CONTIG_LEFT" with the last extent record of the previous extent block, we will merge it to the prevoius extent block and change all the related extent blocks accordingly. As for CONTIG_LEFTRIGHT, we will handle CONTIG_RIGHT first when the split_index is zero since it is more efficient and easier. Signed-off-by: Tao Ma <tao.ma@oracle.com> --- fs/ocfs2/alloc.c | 367 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 332 insertions(+), 35 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 23c8cda..c256750 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1450,6 +1450,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, * - When our insert into the right path leaf is at the leftmost edge * and requires an update of the path immediately to it's left. This * can occur at the end of some types of rotation and appending inserts. + * - When we've adjusted the last extent record in the left path leaf and the + * 1st extent record in the right path leaf during cross extent block merge. */ static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, struct ocfs2_path *left_path, @@ -2712,24 +2714,119 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, } } +static int ocfs2_get_right_path(struct inode *inode, + struct ocfs2_path *left_path, + struct ocfs2_path **ret_right_path) +{ + int ret; + u32 right_cpos; + struct ocfs2_path *right_path = NULL; + struct ocfs2_extent_list *left_el; + + *ret_right_path = NULL; + + /* This function shouldn't be called for non-trees. */ + BUG_ON(left_path->p_tree_depth == 0); + + left_el = path_leaf_el(left_path); + BUG_ON(left_el->l_next_free_rec != left_el->l_count); + + ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, + &right_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* This function shouldn't be called for the rightmost leaf. */ + BUG_ON(right_cpos == 0); + + right_path = ocfs2_new_path(path_root_bh(left_path), + path_root_el(left_path)); + if (!right_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_path(inode, right_path, right_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + *ret_right_path = right_path; +out: + if (ret) + ocfs2_free_path(right_path); + return ret; +} + /* * Remove split_rec clusters from the record at index and merge them * onto the beginning of the record at index + 1. */ -static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, - handle_t *handle, - struct ocfs2_extent_rec *split_rec, - struct ocfs2_extent_list *el, int index) +static int ocfs2_merge_rec_right(struct inode *inode, + struct ocfs2_path *left_path, + handle_t *handle, + struct ocfs2_extent_rec *split_rec, + struct ocfs2_extent_list *el, int index) { - int ret; + int ret, next_free, i; unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); struct ocfs2_extent_rec *left_rec; struct ocfs2_extent_rec *right_rec; + struct ocfs2_extent_list *right_el; + struct ocfs2_path *right_path = NULL; + int subtree_index = 0; + struct buffer_head *bh = path_leaf_bh(left_path); + struct buffer_head *root_bh = NULL; BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); - left_rec = &el->l_recs[index]; - right_rec = &el->l_recs[index + 1]; + + if (index == le16_to_cpu(el->l_next_free_rec - 1) && + le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { + /* we meet with a cross extent block merge. */ + ret = ocfs2_get_right_path(inode, left_path, &right_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + right_el = path_leaf_el(right_path); + next_free = le16_to_cpu(right_el->l_next_free_rec); + BUG_ON(next_free <= 0); + right_rec = &right_el->l_recs[0]; + if (ocfs2_is_empty_extent(right_rec)) { + BUG_ON(le16_to_cpu(next_free) <= 1); + right_rec = &right_el->l_recs[1]; + } + + BUG_ON(le32_to_cpu(left_rec->e_cpos) + + le16_to_cpu(left_rec->e_leaf_clusters) != + le32_to_cpu(right_rec->e_cpos)); + + subtree_index = ocfs2_find_subtree_root(inode, + left_path, right_path); + + ret = ocfs2_extend_rotate_transaction(handle, subtree_index, + handle->h_buffer_credits, + right_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + path_leaf_bh(right_path), + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } else + right_rec = &el->l_recs[index + 1]; ret = ocfs2_journal_access(handle, inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -2751,7 +2848,90 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, if (ret) mlog_errno(ret); + if (right_path) { + ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); + if (ret) + mlog_errno(ret); + + root_bh = left_path->p_node[subtree_index].bh; + BUG_ON(root_bh != right_path->p_node[subtree_index].bh); + + ret = ocfs2_journal_access(handle, inode, root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + for (i = subtree_index + 1; + i < path_num_items(right_path); i++) { + ret = ocfs2_journal_access(handle, inode, + right_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + left_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + ocfs2_complete_edge_insert(inode, handle, left_path, + right_path, subtree_index); + } out: + if (right_path) + ocfs2_free_path(right_path); + return ret; +} + +static int ocfs2_get_left_path(struct inode *inode, + struct ocfs2_path *right_path, + struct ocfs2_path **ret_left_path) +{ + int ret; + u32 left_cpos; + struct ocfs2_path *left_path = NULL; + + *ret_left_path = NULL; + + /* This function shouldn't be called for non-trees. */ + BUG_ON(right_path->p_tree_depth == 0); + + ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, + right_path, &left_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* This function shouldn't be called for the leftmost leave. */ + BUG_ON(left_cpos == 0); + + left_path = ocfs2_new_path(path_root_bh(right_path), + path_root_el(right_path)); + if (!left_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_path(inode, left_path, left_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + *ret_left_path = left_path; +out: + if (ret) + ocfs2_free_path(left_path); return ret; } @@ -2759,22 +2939,64 @@ out: * Remove split_rec clusters from the record at index and merge them * onto the tail of the record at index - 1. */ -static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, +static int ocfs2_merge_rec_left(struct inode *inode, + struct ocfs2_path *right_path, handle_t *handle, struct ocfs2_extent_rec *split_rec, struct ocfs2_extent_list *el, int index) { - int ret, has_empty_extent = 0; + int ret, i, subtree_index = 0, has_empty_extent = 0; unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); struct ocfs2_extent_rec *left_rec; struct ocfs2_extent_rec *right_rec; + struct buffer_head *bh = path_leaf_bh(right_path); + struct buffer_head *root_bh = NULL; + struct ocfs2_path *left_path = NULL; + struct ocfs2_extent_list *left_el; - BUG_ON(index <= 0); + BUG_ON(index < 0); - left_rec = &el->l_recs[index - 1]; right_rec = &el->l_recs[index]; - if (ocfs2_is_empty_extent(&el->l_recs[0])) - has_empty_extent = 1; + if (index == 0) { + /* we meet with a cross extent block merge. */ + ret = ocfs2_get_left_path(inode, right_path, &left_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + left_el = path_leaf_el(left_path); + BUG_ON(le16_to_cpu(left_el->l_next_free_rec) != + le16_to_cpu(left_el->l_count)); + + left_rec = &left_el->l_recs[le16_to_cpu(left_el->l_count) - 1]; + BUG_ON(le32_to_cpu(left_rec->e_cpos) + + le16_to_cpu(left_rec->e_leaf_clusters) != + le32_to_cpu(split_rec->e_cpos)); + + subtree_index = ocfs2_find_subtree_root(inode, + left_path, right_path); + + ret = ocfs2_extend_rotate_transaction(handle, subtree_index, + handle->h_buffer_credits, + left_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + path_leaf_bh(left_path), + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } else { + left_rec = &el->l_recs[index - 1]; + if (ocfs2_is_empty_extent(&el->l_recs[0])) + has_empty_extent = 1; + } ret = ocfs2_journal_access(handle, inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -2802,24 +3024,77 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, ocfs2_cleanup_merge(el, index); ret = ocfs2_journal_dirty(handle, bh); - if (ret) + if (ret) { mlog_errno(ret); + goto out; + } + if (left_path) { + ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* + * In the situation that the right_rec is empty and the extent + * block is empty also, ocfs2_complete_edge_insert can't handle + * it, while ocfs2_rotate_tree_left can be called and the extent + * block will be deleted since c_split_covers_rec is set. + */ + if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && + le16_to_cpu(el->l_next_free_rec) == 1) + goto out; + + root_bh = left_path->p_node[subtree_index].bh; + BUG_ON(root_bh != right_path->p_node[subtree_index].bh); + + ret = ocfs2_journal_access(handle, inode, root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + for (i = subtree_index + 1; + i < path_num_items(right_path); i++) { + ret = ocfs2_journal_access(handle, inode, + right_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + left_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + ocfs2_complete_edge_insert(inode, handle, left_path, + right_path, subtree_index); + } out: + if (left_path) + ocfs2_free_path(left_path); return ret; } static int ocfs2_try_to_merge_extent(struct inode *inode, handle_t *handle, - struct ocfs2_path *left_path, + struct ocfs2_path *path, int split_index, struct ocfs2_extent_rec *split_rec, struct ocfs2_cached_dealloc_ctxt *dealloc, struct ocfs2_merge_ctxt *ctxt) { - int ret = 0; - struct ocfs2_extent_list *el = path_leaf_el(left_path); + int ret = 0, left_first = 1; + struct ocfs2_extent_list *el = path_leaf_el(path); struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; BUG_ON(ctxt->c_contig_type == CONTIG_NONE); @@ -2832,7 +3107,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * extents - having more than one in a leaf is * illegal. */ - ret = ocfs2_rotate_tree_left(inode, handle, left_path, + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); if (ret) { mlog_errno(ret); @@ -2847,7 +3122,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * Left-right contig implies this. */ BUG_ON(!ctxt->c_split_covers_rec); - BUG_ON(split_index == 0); /* * Since the leftright insert always covers the entire @@ -2858,9 +3132,21 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * Since the adding of an empty extent shifts * everything back to the right, there's no need to * update split_index here. - */ - ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), - handle, split_rec, el, split_index); + * + * When the split_index is zero, we need to merge it to the + * prevoius extent block. It is more efficient and easier + * if we do merge_right first and merge_left later. + */ + if (!split_index) + left_first = 0; + if (left_first) + ret = ocfs2_merge_rec_left(inode, path, + handle, split_rec, + el, split_index); + else + ret = ocfs2_merge_rec_right(inode, path, + handle, split_rec, + el, split_index); if (ret) { mlog_errno(ret); goto out; @@ -2871,24 +3157,35 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, */ BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); - /* - * The left merge left us with an empty extent, remove - * it. - */ - ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc); + /* The merge left us with an empty extent, remove it. */ + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); if (ret) { mlog_errno(ret); goto out; } - split_index--; + + /* + * For split_index = 0, we have done merge_right first and + * want to merge it to the previous extent block now. + * So no need to decrease it. + */ + if (left_first) + split_index--; rec = &el->l_recs[split_index]; /* * Note that we don't pass split_rec here on purpose - - * we've merged it into the left side. + * we've merged it into the rec already. */ - ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), - handle, rec, el, split_index); + if (left_first) + ret = ocfs2_merge_rec_right(inode, path, + handle, rec, + el, split_index); + else + ret = ocfs2_merge_rec_left(inode, path, + handle, rec, + el, split_index); + if (ret) { mlog_errno(ret); goto out; @@ -2896,7 +3193,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); - ret = ocfs2_rotate_tree_left(inode, handle, left_path, + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); /* * Error from this last rotate is not critical, so @@ -2915,7 +3212,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, */ if (ctxt->c_contig_type == CONTIG_RIGHT) { ret = ocfs2_merge_rec_left(inode, - path_leaf_bh(left_path), + path, handle, split_rec, el, split_index); if (ret) { @@ -2924,7 +3221,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, } } else { ret = ocfs2_merge_rec_right(inode, - path_leaf_bh(left_path), + path, handle, split_rec, el, split_index); if (ret) { @@ -2938,7 +3235,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * The merge may have left an empty extent in * our leaf. Try to rotate it away. */ - ret = ocfs2_rotate_tree_left(inode, handle, left_path, + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); if (ret) mlog_errno(ret); -- 1.5.3.GIT ^ permalink raw reply related [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 2008-01-04 0:45 ` [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 Tao Ma @ 2008-01-06 21:52 ` Mark Fasheh 2008-01-07 1:06 ` tao.ma 0 siblings, 1 reply; 9+ messages in thread From: Mark Fasheh @ 2008-01-06 21:52 UTC (permalink / raw) To: ocfs2-devel On Fri, Jan 04, 2008 at 04:45:38PM +0800, tao.ma wrote: > In ocfs2_merge_rec_left, when we find the merge extent is "CONTIG_RIGHT" > with the first extent record of the next extent block, we will merge it to > the next extent block and change all the related extent blocks accordingly. > > In ocfs2_merge_rec_right, when we find the merge extent is "CONTIG_LEFT" > with the last extent record of the previous extent block, we will merge > it to the prevoius extent block and change all the related extent blocks > accordingly. > > As for CONTIG_LEFTRIGHT, we will handle CONTIG_RIGHT first when the split_index > is zero since it is more efficient and easier. > > Signed-off-by: Tao Ma <tao.ma@oracle.com> > --- > fs/ocfs2/alloc.c | 367 ++++++++++++++++++++++++++++++++++++++++++++++++----- > 1 files changed, 332 insertions(+), 35 deletions(-) > > diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c > index 23c8cda..c256750 100644 > --- a/fs/ocfs2/alloc.c > +++ b/fs/ocfs2/alloc.c > @@ -1450,6 +1450,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, > * - When our insert into the right path leaf is at the leftmost edge > * and requires an update of the path immediately to it's left. This > * can occur at the end of some types of rotation and appending inserts. > + * - When we've adjusted the last extent record in the left path leaf and the > + * 1st extent record in the right path leaf during cross extent block merge. > */ > static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, > struct ocfs2_path *left_path, Ok, great. I like that we're keeping these functions documented... It's hard enough to follow as-is! > /* > * Remove split_rec clusters from the record at index and merge them > * onto the beginning of the record at index + 1. > */ Should update this comment though... > -static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, > - handle_t *handle, > - struct ocfs2_extent_rec *split_rec, > - struct ocfs2_extent_list *el, int index) > +static int ocfs2_merge_rec_right(struct inode *inode, > + struct ocfs2_path *left_path, > + handle_t *handle, > + struct ocfs2_extent_rec *split_rec, > + struct ocfs2_extent_list *el, int index) > { > @@ -2751,7 +2848,90 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, > if (ret) > mlog_errno(ret); > > + if (right_path) { > + ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); > + if (ret) > + mlog_errno(ret); > + > + root_bh = left_path->p_node[subtree_index].bh; > + BUG_ON(root_bh != right_path->p_node[subtree_index].bh); > + > + ret = ocfs2_journal_access(handle, inode, root_bh, > + OCFS2_JOURNAL_ACCESS_WRITE); > + if (ret) { > + mlog_errno(ret); > + goto out; > + } > + > + for (i = subtree_index + 1; > + i < path_num_items(right_path); i++) { > + ret = ocfs2_journal_access(handle, inode, > + right_path->p_node[i].bh, > + OCFS2_JOURNAL_ACCESS_WRITE); > + if (ret) { > + mlog_errno(ret); > + goto out; > + } > + > + ret = ocfs2_journal_access(handle, inode, > + left_path->p_node[i].bh, > + OCFS2_JOURNAL_ACCESS_WRITE); > + if (ret) { > + mlog_errno(ret); > + goto out; > + } > + } I'd move the journal_access() calls here up before we actually change the records. That way we can catch journal errors before the tree gets into an transient state. > @@ -2759,22 +2939,64 @@ out: > * Remove split_rec clusters from the record at index and merge them > * onto the tail of the record at index - 1. > */ Should update this comment. > -static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, > +static int ocfs2_merge_rec_left(struct inode *inode, > + struct ocfs2_path *right_path, > handle_t *handle, > struct ocfs2_extent_rec *split_rec, > struct ocfs2_extent_list *el, int index) > { > - int ret, has_empty_extent = 0; > + int ret, i, subtree_index = 0, has_empty_extent = 0; > unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); > struct ocfs2_extent_rec *left_rec; > struct ocfs2_extent_rec *right_rec; > + struct buffer_head *bh = path_leaf_bh(right_path); > + struct buffer_head *root_bh = NULL; > + struct ocfs2_path *left_path = NULL; > + struct ocfs2_extent_list *left_el; > > - BUG_ON(index <= 0); > + BUG_ON(index < 0); > > - left_rec = &el->l_recs[index - 1]; > right_rec = &el->l_recs[index]; > - if (ocfs2_is_empty_extent(&el->l_recs[0])) > - has_empty_extent = 1; > + if (index == 0) { > + /* we meet with a cross extent block merge. */ > + ret = ocfs2_get_left_path(inode, right_path, &left_path); > + if (ret) { > + mlog_errno(ret); > + goto out; > + } > + > + left_el = path_leaf_el(left_path); > + BUG_ON(le16_to_cpu(left_el->l_next_free_rec) != > + le16_to_cpu(left_el->l_count)); > + > + left_rec = &left_el->l_recs[le16_to_cpu(left_el->l_count) - 1]; Using left_el->l_count to index the array is correct here, but would you mind changing this to use l_next_free_rec so it's more consistent with what we usually do? It actually reads a bit weird to my eyes because I'm used to seeing l_next_free_rec inside of array derefs! > @@ -2802,24 +3024,77 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, > ocfs2_cleanup_merge(el, index); > > ret = ocfs2_journal_dirty(handle, bh); > - if (ret) > + if (ret) { > mlog_errno(ret); > + goto out; > + } > > + if (left_path) { > + ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); > + if (ret) { > + mlog_errno(ret); > + goto out; > + } > + > + /* > + * In the situation that the right_rec is empty and the extent > + * block is empty also, ocfs2_complete_edge_insert can't handle > + * it, while ocfs2_rotate_tree_left can be called and the extent > + * block will be deleted since c_split_covers_rec is set. > + */ > + if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && > + le16_to_cpu(el->l_next_free_rec) == 1) > + goto out; > Hmm, I'm worried that we're exiting this function with a path whose upper cpos values are inconsistent. An error before we get the chance to actually do the rotate / removal could leave us with an inconsistent tree. I'm not sure that we can just slap some code to handle it here though - from what I can tell, the callers are expecting that we'd leave an empty extent for them to handle. I see that ocfs2_rotate_tree_left _almost_ handles this situation correctly, except the transaction extend could fail before we get to remove the blocks. That might actually be a bug, though I have to think about it some more. Generally if ocfs2_rotate_tree_left handled it 100% correctly, we _could_ just comment the heck out of this function that callers *must* call the rotate function later. Since calling ocfs2_rotate_tree_left() is implicitely decided by the callers, I'd also want to audit the code to make sure that it always gets called in the new merge situations we've introduced. > + root_bh = left_path->p_node[subtree_index].bh; > + BUG_ON(root_bh != right_path->p_node[subtree_index].bh); > + > + ret = ocfs2_journal_access(handle, inode, root_bh, > + OCFS2_JOURNAL_ACCESS_WRITE); > + if (ret) { > + mlog_errno(ret); > + goto out; > + } > + > + for (i = subtree_index + 1; > + i < path_num_items(right_path); i++) { > + ret = ocfs2_journal_access(handle, inode, > + right_path->p_node[i].bh, > + OCFS2_JOURNAL_ACCESS_WRITE); > + if (ret) { > + mlog_errno(ret); > + goto out; > + } > + > + ret = ocfs2_journal_access(handle, inode, > + left_path->p_node[i].bh, > + OCFS2_JOURNAL_ACCESS_WRITE); > + if (ret) { > + mlog_errno(ret); > + goto out; > + } > + } Same comment as before regarding the journal_access calls. > @@ -2858,9 +3132,21 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, > * Since the adding of an empty extent shifts > * everything back to the right, there's no need to > * update split_index here. > - */ > - ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), > - handle, split_rec, el, split_index); > + * > + * When the split_index is zero, we need to merge it to the > + * prevoius extent block. It is more efficient and easier > + * if we do merge_right first and merge_left later. > + */ So why don't we just always do the left merge 1st, instead of switching on them like this? --Mark -- Mark Fasheh Principal Software Developer, Oracle mark.fasheh@oracle.com ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 2008-01-06 21:52 ` Mark Fasheh @ 2008-01-07 1:06 ` tao.ma 2008-01-10 17:48 ` Mark Fasheh 0 siblings, 1 reply; 9+ messages in thread From: tao.ma @ 2008-01-07 1:06 UTC (permalink / raw) To: ocfs2-devel Thanks for your review. Mark Fasheh wrote: >> @@ -2802,24 +3024,77 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, >> ocfs2_cleanup_merge(el, index); >> >> ret = ocfs2_journal_dirty(handle, bh); >> - if (ret) >> + if (ret) { >> mlog_errno(ret); >> + goto out; >> + } >> >> + if (left_path) { >> + ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); >> + if (ret) { >> + mlog_errno(ret); >> + goto out; >> + } >> + >> + /* >> + * In the situation that the right_rec is empty and the extent >> + * block is empty also, ocfs2_complete_edge_insert can't handle >> + * it, while ocfs2_rotate_tree_left can be called and the extent >> + * block will be deleted since c_split_covers_rec is set. >> + */ >> + if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && >> + le16_to_cpu(el->l_next_free_rec) == 1) >> + goto out; >> >> > > Hmm, I'm worried that we're exiting this function with a path whose upper > cpos values are inconsistent. An error before we get the chance to actually > do the rotate / removal could leave us with an inconsistent tree. > > I'm not sure that we can just slap some code to handle it here though - from > what I can tell, the callers are expecting that we'd leave an empty extent > for them to handle. > Yes. So the situation here is really hard to handle and lead me to make the decision that we should let ocfs2_roatate_tree_left go on the issue. > I see that ocfs2_rotate_tree_left _almost_ handles this situation > correctly, except the transaction extend could fail before we get to remove > the blocks. That might actually be a bug, though I have to think about it > some more. Generally if ocfs2_rotate_tree_left handled it 100% correctly, we > _could_ just comment the heck out of this function that callers *must* call > the rotate function later. Since calling ocfs2_rotate_tree_left() is > implicitely decided by the callers, I'd also want to audit the code to make > sure that it always gets called in the new merge situations we've > introduced. > So any suggestion is welcomed. By now, I haven't found a good way to handle it except my current ugly codes. ;) > > >> @@ -2858,9 +3132,21 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, >> * Since the adding of an empty extent shifts >> * everything back to the right, there's no need to >> * update split_index here. >> - */ >> - ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), >> - handle, split_rec, el, split_index); >> + * >> + * When the split_index is zero, we need to merge it to the >> + * prevoius extent block. It is more efficient and easier >> + * if we do merge_right first and merge_left later. >> + */ >> > > So why don't we just always do the left merge 1st, instead of switching on > them like this? > If split_index==0, then if we do merge_right first, it will not touch the previous extent block(so no cross extent block merge is needed) and only the extent block after the record will be affected. The second merge_left will be cross extent block merge. If split_index==l_count-1, then if we do merge_left first, the first merge will move the recs[0] of the next extent block into this block. So when merge_right is called, no cross extent block merge is needed. So if you don't like this switch, I would prefer merge_right first and merge_left second to the original merge_left first. ;) btw, I have patched your code of ocfs2_check_tree to my current git tree, it works well under my test script. The test script should be ready for review soon. ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 2008-01-07 1:06 ` tao.ma @ 2008-01-10 17:48 ` Mark Fasheh 2008-01-10 18:21 ` tao.ma 0 siblings, 1 reply; 9+ messages in thread From: Mark Fasheh @ 2008-01-10 17:48 UTC (permalink / raw) To: ocfs2-devel On Mon, Jan 07, 2008 at 05:05:23PM +0800, tao.ma wrote: >> Hmm, I'm worried that we're exiting this function with a path whose upper >> cpos values are inconsistent. An error before we get the chance to >> actually >> do the rotate / removal could leave us with an inconsistent tree. >> >> I'm not sure that we can just slap some code to handle it here though - >> from >> what I can tell, the callers are expecting that we'd leave an empty extent >> for them to handle. >> > Yes. So the situation here is really hard to handle and lead me to make the > decision that we should let ocfs2_roatate_tree_left go on the issue. >> I see that ocfs2_rotate_tree_left _almost_ handles this situation >> correctly, except the transaction extend could fail before we get to >> remove >> the blocks. That might actually be a bug, though I have to think about it >> some more. Generally if ocfs2_rotate_tree_left handled it 100% correctly, >> we >> _could_ just comment the heck out of this function that callers *must* >> call >> the rotate function later. Since calling ocfs2_rotate_tree_left() is >> implicitely decided by the callers, I'd also want to audit the code to >> make >> sure that it always gets called in the new merge situations we've >> introduced. >> > So any suggestion is welcomed. By now, I haven't found a good way to handle > it except my current ugly codes. ;) Ok, I've had some time to think about this. We should just handle this corner case in ocfs2_merge_rec_left(). ocfs2_rotate_tree_left() cleanly exits if the empty extent has already been removed, so it's safe to unconditionally call it as we do today. >> So why don't we just always do the left merge 1st, instead of switching on >> them like this? >> > If split_index==0, then if we do merge_right first, it will not touch the > previous extent block(so no cross extent block merge is needed) and only > the extent block after the record will be affected. The second merge_left > will be cross extent block merge. > If split_index==l_count-1, then if we do merge_left first, the first merge > will move the recs[0] of the next extent block into this block. So when > merge_right is called, no cross extent block merge is needed. > So if you don't like this switch, I would prefer merge_right first and > merge_left second to the original merge_left first. ;) Yeah, I think what I was looking for was whether you had a reason to not just re-order the merging in all cases. Do we ever get into the case that a leftright merge is at index == (l_count - 1)? I think the search should prevent that (and instead give us index == 0 for the extent block to the right). If so, I think that just always doing the left merge 1st makes sense. I don't see any problem with that breaking the code to merge within a block either. > btw, I have patched your code of ocfs2_check_tree to my current git tree, > it works well under my test script. The test script should be ready for > review soon. Great, post it ;) Have you looked at running the burn-in script? --Mark -- Mark Fasheh Principal Software Developer, Oracle mark.fasheh@oracle.com ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 2008-01-10 17:48 ` Mark Fasheh @ 2008-01-10 18:21 ` tao.ma 0 siblings, 0 replies; 9+ messages in thread From: tao.ma @ 2008-01-10 18:21 UTC (permalink / raw) To: ocfs2-devel Mark Fasheh wrote: > On Mon, Jan 07, 2008 at 05:05:23PM +0800, tao.ma wrote: > >>> Hmm, I'm worried that we're exiting this function with a path whose upper >>> cpos values are inconsistent. An error before we get the chance to >>> actually >>> do the rotate / removal could leave us with an inconsistent tree. >>> >>> I'm not sure that we can just slap some code to handle it here though - >>> from >>> what I can tell, the callers are expecting that we'd leave an empty extent >>> for them to handle. >>> >>> >> Yes. So the situation here is really hard to handle and lead me to make the >> decision that we should let ocfs2_roatate_tree_left go on the issue. >> >>> I see that ocfs2_rotate_tree_left _almost_ handles this situation >>> correctly, except the transaction extend could fail before we get to >>> remove >>> the blocks. That might actually be a bug, though I have to think about it >>> some more. Generally if ocfs2_rotate_tree_left handled it 100% correctly, >>> we >>> _could_ just comment the heck out of this function that callers *must* >>> call >>> the rotate function later. Since calling ocfs2_rotate_tree_left() is >>> implicitely decided by the callers, I'd also want to audit the code to >>> make >>> sure that it always gets called in the new merge situations we've >>> introduced. >>> >>> >> So any suggestion is welcomed. By now, I haven't found a good way to handle >> it except my current ugly codes. ;) >> > > Ok, I've had some time to think about this. We should just handle this > corner case in ocfs2_merge_rec_left(). ocfs2_rotate_tree_left() cleanly > exits if the empty extent has already been removed, so it's safe to > unconditionally call it as we do today. > hmm, ok. Maybe a helper function is needed since now ocfs2_merge_rec_left have to handle so much issue. > > >>> So why don't we just always do the left merge 1st, instead of switching on >>> them like this? >>> >>> >> If split_index==0, then if we do merge_right first, it will not touch the >> previous extent block(so no cross extent block merge is needed) and only >> the extent block after the record will be affected. The second merge_left >> will be cross extent block merge. >> If split_index==l_count-1, then if we do merge_left first, the first merge >> will move the recs[0] of the next extent block into this block. So when >> merge_right is called, no cross extent block merge is needed. >> So if you don't like this switch, I would prefer merge_right first and >> merge_left second to the original merge_left first. ;) >> > > Yeah, I think what I was looking for was whether you had a reason to not > just re-order the merging in all cases. > > Do we ever get into the case that a leftright merge is at index == (l_count > - 1)? I think the search should prevent that (and instead give us index == 0 > for the extent block to the right). > Yes, I think we can get into this issue when the unwritten extent exists in l_count-1. > If so, I think that just always doing the left merge 1st makes sense. I > don't see any problem with that breaking the code to merge within a block > either. > Do you mean right merge first? As I have mentioned above, if index==0, the first right merge first will not touch cross extent block. Only the second one has to. And now I think some of my previous comment is wrong. ;) Even if index == l_count-1, it is ok for right merge first. Since the first right merge is a cross extent block merge, but the second left merge don't touch cross extent block. So there is totally one cross extent block merge. > > >> btw, I have patched your code of ocfs2_check_tree to my current git tree, >> it works well under my test script. The test script should be ready for >> review soon. >> > > Great, post it ;) Have you looked at running the burn-in script OK, I will post it soon. Actually it is generated when I create sparse file support in ocfs2-tools. I always want to modify it to be a test script for both kernel and the tools. Now it really goes. ;) Actually I make the ocfs2 file system read-only one time when I run one random_rw_test in it. :) But I have to sparse some time to investigate the real reason. So please wait. ;) ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [PATCH 2/2] Enable cross extent block merge.V1 2008-01-04 0:33 [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Tao Ma 2008-01-04 0:45 ` [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 Tao Ma @ 2008-01-04 0:47 ` Tao Ma 2008-01-06 22:00 ` Mark Fasheh 2008-01-06 13:44 ` [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Mark Fasheh 2 siblings, 1 reply; 9+ messages in thread From: Tao Ma @ 2008-01-04 0:47 UTC (permalink / raw) To: ocfs2-devel In ocfs2_figure_merge_contig_type, we judge whether there exists a cross extent block merge and enable it by setting CONTIG_LEFT and CONTIG_RIGHT accordingly. Signed-off-by: Tao Ma <tao.ma@oracle.com> --- fs/ocfs2/alloc.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 68 insertions(+), 8 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index c256750..e3b17ac 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -3795,20 +3795,48 @@ out: } static enum ocfs2_contig_type -ocfs2_figure_merge_contig_type(struct inode *inode, +ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, struct ocfs2_extent_list *el, int index, struct ocfs2_extent_rec *split_rec) { - struct ocfs2_extent_rec *rec; + int status; + struct ocfs2_extent_rec *rec = NULL; enum ocfs2_contig_type ret = CONTIG_NONE; + u32 left_cpos, right_cpos; + struct ocfs2_extent_list *new_el; + struct ocfs2_path *left_path = NULL, *right_path = NULL; + + if (index > 0) { + rec = &el->l_recs[index - 1]; + } else if (path->p_tree_depth > 0) { + status = ocfs2_find_cpos_for_left_leaf(inode->i_sb, + path, &left_cpos); + if (status) + goto out; + + if (left_cpos != 0) { + left_path = ocfs2_new_path(path_root_bh(path), + path_root_el(path)); + if (!left_path) + goto out; + + status = ocfs2_find_path(inode, left_path, left_cpos); + if (status) + goto out; + + new_el = path_leaf_el(left_path); + + BUG_ON(le16_to_cpu(new_el->l_next_free_rec) != + le16_to_cpu(new_el->l_count)); + rec = &new_el->l_recs[le16_to_cpu(new_el->l_count) - 1]; + } + } /* * We're careful to check for an empty extent record here - * the merge code will know what to do if it sees one. */ - - if (index > 0) { - rec = &el->l_recs[index - 1]; + if (rec) { if (index == 1 && ocfs2_is_empty_extent(rec)) { if (split_rec->e_cpos == el->l_recs[index].e_cpos) ret = CONTIG_RIGHT; @@ -3817,10 +3845,36 @@ ocfs2_figure_merge_contig_type(struct inode *inode, } } - if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) { + rec = NULL; + if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) + rec = &el->l_recs[index + 1]; + else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) && + path->p_tree_depth > 0) { + status = ocfs2_find_cpos_for_right_leaf(inode->i_sb, + path, &right_cpos); + if (status) + goto out; + + if (right_cpos != 0) { + right_path = ocfs2_new_path(path_root_bh(path), + path_root_el(path)); + if (!right_path) + goto out; + + status = ocfs2_find_path(inode, right_path, right_cpos); + if (status) + goto out; + + new_el = path_leaf_el(right_path); + rec = &new_el->l_recs[0]; + if (ocfs2_is_empty_extent(rec)) + goto out; + } + } + + if (rec) { enum ocfs2_contig_type contig_type; - rec = &el->l_recs[index + 1]; contig_type = ocfs2_extent_contig(inode, rec, split_rec); if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) @@ -3829,6 +3883,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, ret = contig_type; } +out: + if (left_path) + ocfs2_free_path(left_path); + if (right_path) + ocfs2_free_path(right_path); + return ret; } @@ -4291,7 +4351,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, goto out; } - ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, + ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el, split_index, split_rec); -- 1.5.3.GIT ^ permalink raw reply related [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [PATCH 2/2] Enable cross extent block merge.V1 2008-01-04 0:47 ` [Ocfs2-devel] [PATCH 2/2] Enable " Tao Ma @ 2008-01-06 22:00 ` Mark Fasheh 0 siblings, 0 replies; 9+ messages in thread From: Mark Fasheh @ 2008-01-06 22:00 UTC (permalink / raw) To: ocfs2-devel On Fri, Jan 04, 2008 at 04:46:19PM +0800, tao.ma wrote: > static enum ocfs2_contig_type > -ocfs2_figure_merge_contig_type(struct inode *inode, > +ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, > struct ocfs2_extent_list *el, int index, > struct ocfs2_extent_rec *split_rec) > { > - struct ocfs2_extent_rec *rec; > + int status; > + struct ocfs2_extent_rec *rec = NULL; > enum ocfs2_contig_type ret = CONTIG_NONE; > + u32 left_cpos, right_cpos; > + struct ocfs2_extent_list *new_el; > + struct ocfs2_path *left_path = NULL, *right_path = NULL; > + > + if (index > 0) { > + rec = &el->l_recs[index - 1]; > + } else if (path->p_tree_depth > 0) { > + status = ocfs2_find_cpos_for_left_leaf(inode->i_sb, > + path, &left_cpos); > + if (status) > + goto out; > + > + if (left_cpos != 0) { > + left_path = ocfs2_new_path(path_root_bh(path), > + path_root_el(path)); > + if (!left_path) > + goto out; > + > + status = ocfs2_find_path(inode, left_path, left_cpos); > + if (status) > + goto out; > + > + new_el = path_leaf_el(left_path); > + > + BUG_ON(le16_to_cpu(new_el->l_next_free_rec) != > + le16_to_cpu(new_el->l_count)); Since we haven't seen this extent block before, this error condition should make the file system go read-only, not BUG() > + rec = &new_el->l_recs[le16_to_cpu(new_el->l_count) - 1]; Use l_next_free_rec in the array here please. --Mark -- Mark Fasheh Principal Software Developer, Oracle mark.fasheh@oracle.com ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 2008-01-04 0:33 [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Tao Ma 2008-01-04 0:45 ` [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 Tao Ma 2008-01-04 0:47 ` [Ocfs2-devel] [PATCH 2/2] Enable " Tao Ma @ 2008-01-06 13:44 ` Mark Fasheh 2 siblings, 0 replies; 9+ messages in thread From: Mark Fasheh @ 2008-01-06 13:44 UTC (permalink / raw) To: ocfs2-devel On Fri, Jan 04, 2008 at 04:32:14PM +0800, tao.ma wrote: > The old extent merging code down underneath "ocfs2_mark_extent_written()" > can't merge extents between leaf blocks. This patch resolve this. > So that a large unwritten extent which has been split up with a bunch of > writes can be merged together once all unwritten regions have been written to. So, just to get things started I'll attach a patch which I think you might find useful. It adds a function 'ocfs2_check_tree()' which you can call after marking an extent unwritten. It'll walk the entire btree and look for certain inconsistencies. If one is found, the journal is synced and then the box is paniced. The syncing gives us the chance to inspect the btree after rebooting the box. Last time I was hacking on the tree code, I modified ocfs2-test/programs/fill_verify_holes/old_burn-in.sh to call fill_verify_holes with '-u' (for unwritten extents) and ran the script against a 512 blocksize/4k clustersize file system. The kernel I was running had the tree check patch applied. This setup helped me catch the vast majority of bugs which I had introduced by my changes. These days, I'd also disable the extent map cache (which wasn't around then) as it would also hide some problems. By the way - the page cache can also hide problems by allowing the "verify" program to read cached data which it wouldn't actually be able to find if the btree was actually searched. I think we can might be able to minimize this by inserting: echo 1 > /proc/sys/vm/drop_caches right before the verify step in old_burn-in.sh. Ultimately of course, you can do whatever you want for testing, but I'd definitely like to get a detailed description of what you did or what you're planning to do in that area. I'm paranoid about btree changes, even though I'm really happy that you took this task on ;) --Mark -- Mark Fasheh Principal Software Developer, Oracle mark.fasheh@oracle.com diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index cf30761..0bebced 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -49,6 +49,8 @@ #include "uptodate.h" #include "buffer_head_io.h" +static void ocfs2_check_tree(struct inode *inode); + static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, struct ocfs2_extent_block *eb); @@ -3782,6 +3784,9 @@ int ocfs2_insert_extent(struct ocfs2_sup else ocfs2_extent_map_insert_rec(inode, &rec); + if (status == 0) + ocfs2_check_tree(inode); + bail: if (bh) brelse(bh); @@ -4134,6 +4139,9 @@ int ocfs2_mark_extent_written(struct ino if (ret) mlog_errno(ret); + if (ret == 0) + ocfs2_check_tree(inode); + out: ocfs2_free_path(left_path); return ret; @@ -4501,6 +4509,9 @@ int ocfs2_remove_extent(struct inode *in } } + if (ret == 0) + ocfs2_check_tree(inode); + out: ocfs2_free_path(path); return ret; @@ -6122,3 +6133,177 @@ static void ocfs2_free_truncate_context( kfree(tc); } + +#define INDEX_DEPTH (OCFS2_MAX_PATH_DEPTH - 1) + +static void ocfs2_path_error(void) +{ + handle_t *handle = journal_current_handle(); + + handle->h_sync = 1; + journal_stop(handle); + + BUG(); +} + +static void ocfs2_check_path(struct inode *inode, + struct ocfs2_path *path, + int *check_index) +{ + int i, next_free, node; + u32 parent_range, child_range; + struct ocfs2_extent_list *el; + struct ocfs2_extent_rec *rec; + struct ocfs2_extent_rec *rec1; + struct ocfs2_extent_rec *parent_rec; + + for(node = 0; node < path_num_items(path); node++) { + el = path->p_node[node].el; + + next_free = le16_to_cpu(el->l_next_free_rec); + + /* Check for duplicate records */ + /* Check for records in order */ + for(i = 0; i < (next_free - 1); i++) { + rec = &el->l_recs[i]; + rec1 = &el->l_recs[i+1]; + + if (!ocfs2_is_empty_extent(rec) && + le32_to_cpu(rec1->e_cpos) <= + le32_to_cpu(rec->e_cpos)) + goto bad_records; + } + } + + /* Check ranges of records */ + el = path_root_el(path); + parent_rec = &el->l_recs[check_index[0]]; + i = 1; + while (i <= path->p_tree_depth) { + struct ocfs2_extent_rec *child_rec; + + el = path->p_node[i].el; + + parent_range = le32_to_cpu(parent_rec->e_cpos) + + le32_to_cpu(parent_rec->e_int_clusters); + child_range = ocfs2_sum_rightmost_rec(el); + if (child_range > parent_range) + goto bad_range; + + child_rec = &el->l_recs[0]; + + /* + * The leftmost branch is allowed to have a starting + * cpos of zero but children who actually start after + * that. + */ + if (parent_rec->e_cpos && + (le32_to_cpu(parent_rec->e_cpos) < + le32_to_cpu(child_rec->e_cpos))) + goto bad_range; + + parent_rec = &el->l_recs[check_index[i]]; + + i++; + } + + return; + +bad_records: + mlog(ML_ERROR, "Inode %lu: Bad Records\n", + (unsigned long)inode->i_ino); + ocfs2_path_error(); + return; + +bad_range: + mlog(ML_ERROR, "Inode %lu: Bad Range\n", + (unsigned long)inode->i_ino); + mlog(ML_ERROR, "Child block: %llu, Parent rec: (%u, %u, %llu)\n", + (unsigned long long)path->p_node[i].bh->b_blocknr, + le32_to_cpu(parent_rec->e_cpos), + le32_to_cpu(parent_rec->e_int_clusters), + (unsigned long long)le64_to_cpu(parent_rec->e_blkno)); + ocfs2_path_error(); +} + +static void ocfs2_check_tree(struct inode *inode) +{ + int ret, i; + int check_index[INDEX_DEPTH] = {0, }; + u64 blkno; + struct ocfs2_dinode *di; + struct buffer_head *di_bh = NULL; + struct buffer_head *bh; + struct buffer_head *first_tail_bh = NULL; + struct ocfs2_path *path = NULL; + struct ocfs2_extent_block *eb; + struct ocfs2_extent_list *el; + + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, + &di_bh, OCFS2_BH_CACHED, inode); + if (ret) { + mlog_errno(ret); + goto out; + } + + di = (struct ocfs2_dinode *)di_bh->b_data; + + path = ocfs2_new_inode_path(di_bh); + if (!path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + +restart: + i = 0; + while (i < path->p_tree_depth) { + el = path->p_node[i].el; + + BUG_ON(check_index[i] >= le16_to_cpu(el->l_next_free_rec)); + + blkno = le64_to_cpu(el->l_recs[check_index[i]].e_blkno); + bh = NULL; + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, + &bh, OCFS2_BH_CACHED, inode); + if (ret) { + mlog_errno(ret); + goto out; + } + + ocfs2_path_insert_eb(path, i + 1, bh); + + eb = (struct ocfs2_extent_block *) bh->b_data; + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) + BUG(); + + i++; + } + + ocfs2_check_path(inode, path, check_index); + + if (le64_to_cpu(di->i_last_eb_blk) != path_leaf_bh(path)->b_blocknr + && path->p_tree_depth) { + int max; + + for(i = (path->p_tree_depth - 1); i >= 0; i--) { + el = path->p_node[i].el; + + max = le16_to_cpu(el->l_next_free_rec); + + check_index[i]++; + if (check_index[i] >= max) + check_index[i] = 0; + else + break; + } + + ocfs2_reinit_path(path, 1); + goto restart; + } + +out: + ocfs2_free_path(path); + brelse(di_bh); + brelse(first_tail_bh); +} ^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2008-01-10 18:21 UTC | newest] Thread overview: 9+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-01-04 0:33 [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Tao Ma 2008-01-04 0:45 ` [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 Tao Ma 2008-01-06 21:52 ` Mark Fasheh 2008-01-07 1:06 ` tao.ma 2008-01-10 17:48 ` Mark Fasheh 2008-01-10 18:21 ` tao.ma 2008-01-04 0:47 ` [Ocfs2-devel] [PATCH 2/2] Enable " Tao Ma 2008-01-06 22:00 ` Mark Fasheh 2008-01-06 13:44 ` [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Mark Fasheh
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.