All of lore.kernel.org
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1
@ 2008-01-04  0:33 Tao Ma
  2008-01-04  0:45 ` [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 Tao Ma
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Tao Ma @ 2008-01-04  0:33 UTC (permalink / raw)
  To: ocfs2-devel

The old extent merging code down underneath "ocfs2_mark_extent_written()"
can't merge extents between leaf blocks. This patch resolve this.
So that a large unwritten extent which has been split up with a bunch of
writes can be merged together once all unwritten regions have been written to.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1
  2008-01-04  0:33 [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Tao Ma
@ 2008-01-04  0:45 ` Tao Ma
  2008-01-06 21:52   ` Mark Fasheh
  2008-01-04  0:47 ` [Ocfs2-devel] [PATCH 2/2] Enable " Tao Ma
  2008-01-06 13:44 ` [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Mark Fasheh
  2 siblings, 1 reply; 9+ messages in thread
From: Tao Ma @ 2008-01-04  0:45 UTC (permalink / raw)
  To: ocfs2-devel

In ocfs2_merge_rec_left, when we find the merge extent is "CONTIG_RIGHT"
with the first extent record of the next extent block, we will merge it to
the next extent block and change all the related extent blocks accordingly.

In ocfs2_merge_rec_right, when we find the merge extent is "CONTIG_LEFT"
with the last extent record of the previous extent block, we will merge
it to the prevoius extent block and change all the related extent blocks
accordingly.

As for CONTIG_LEFTRIGHT, we will handle CONTIG_RIGHT first when the split_index
is zero since it is more efficient and easier.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c |  367 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 332 insertions(+), 35 deletions(-)

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 23c8cda..c256750 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1450,6 +1450,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
  *   - When our insert into the right path leaf is at the leftmost edge
  *     and requires an update of the path immediately to it's left. This
  *     can occur at the end of some types of rotation and appending inserts.
+ *   - When we've adjusted the last extent record in the left path leaf and the
+ *     1st extent record in the right path leaf during cross extent block merge.
  */
 static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
 				       struct ocfs2_path *left_path,
@@ -2712,24 +2714,119 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
 	}
 }
 
+static int ocfs2_get_right_path(struct inode *inode,
+				struct ocfs2_path *left_path,
+				struct ocfs2_path **ret_right_path)
+{
+	int ret;
+	u32 right_cpos;
+	struct ocfs2_path *right_path = NULL;
+	struct ocfs2_extent_list *left_el;
+
+	*ret_right_path = NULL;
+
+	/* This function shouldn't be called for non-trees. */
+	BUG_ON(left_path->p_tree_depth == 0);
+
+	left_el = path_leaf_el(left_path);
+	BUG_ON(left_el->l_next_free_rec != left_el->l_count);
+
+	ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
+					     &right_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/* This function shouldn't be called for the rightmost leaf. */
+	BUG_ON(right_cpos == 0);
+
+	right_path = ocfs2_new_path(path_root_bh(left_path),
+				    path_root_el(left_path));
+	if (!right_path) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_find_path(inode, right_path, right_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	*ret_right_path = right_path;
+out:
+	if (ret)
+		ocfs2_free_path(right_path);
+	return ret;
+}
+
 /*
  * Remove split_rec clusters from the record at index and merge them
  * onto the beginning of the record at index + 1.
  */
-static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
-				handle_t *handle,
-				struct ocfs2_extent_rec *split_rec,
-				struct ocfs2_extent_list *el, int index)
+static int ocfs2_merge_rec_right(struct inode *inode,
+				 struct ocfs2_path *left_path,
+				 handle_t *handle,
+				 struct ocfs2_extent_rec *split_rec,
+				 struct ocfs2_extent_list *el, int index)
 {
-	int ret;
+	int ret, next_free, i;
 	unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
 	struct ocfs2_extent_rec *left_rec;
 	struct ocfs2_extent_rec *right_rec;
+	struct ocfs2_extent_list *right_el;
+	struct ocfs2_path *right_path = NULL;
+	int subtree_index = 0;
+	struct buffer_head *bh = path_leaf_bh(left_path);
+	struct buffer_head *root_bh = NULL;
 
 	BUG_ON(index >= le16_to_cpu(el->l_next_free_rec));
-
 	left_rec = &el->l_recs[index];
-	right_rec = &el->l_recs[index + 1];
+
+	if (index == le16_to_cpu(el->l_next_free_rec - 1) &&
+	    le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
+		/* we meet with a cross extent block merge. */
+		ret = ocfs2_get_right_path(inode, left_path, &right_path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		right_el = path_leaf_el(right_path);
+		next_free = le16_to_cpu(right_el->l_next_free_rec);
+		BUG_ON(next_free <= 0);
+		right_rec = &right_el->l_recs[0];
+		if (ocfs2_is_empty_extent(right_rec)) {
+			BUG_ON(le16_to_cpu(next_free) <= 1);
+			right_rec = &right_el->l_recs[1];
+		}
+
+		BUG_ON(le32_to_cpu(left_rec->e_cpos) +
+		       le16_to_cpu(left_rec->e_leaf_clusters) !=
+		       le32_to_cpu(right_rec->e_cpos));
+
+		subtree_index = ocfs2_find_subtree_root(inode,
+							left_path, right_path);
+
+		ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
+						      handle->h_buffer_credits,
+						      right_path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		ret = ocfs2_journal_access(handle, inode,
+					   path_leaf_bh(right_path),
+					   OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	} else
+		right_rec = &el->l_recs[index + 1];
 
 	ret = ocfs2_journal_access(handle, inode, bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
@@ -2751,7 +2848,90 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
 	if (ret)
 		mlog_errno(ret);
 
+	if (right_path) {
+		ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
+		if (ret)
+			mlog_errno(ret);
+
+		root_bh = left_path->p_node[subtree_index].bh;
+		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
+
+		ret = ocfs2_journal_access(handle, inode, root_bh,
+					   OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		for (i = subtree_index + 1;
+		     i < path_num_items(right_path); i++) {
+			ret = ocfs2_journal_access(handle, inode,
+						   right_path->p_node[i].bh,
+						   OCFS2_JOURNAL_ACCESS_WRITE);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			ret = ocfs2_journal_access(handle, inode,
+						   left_path->p_node[i].bh,
+						   OCFS2_JOURNAL_ACCESS_WRITE);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		}
+
+		ocfs2_complete_edge_insert(inode, handle, left_path,
+					   right_path, subtree_index);
+	}
 out:
+	if (right_path)
+		ocfs2_free_path(right_path);
+	return ret;
+}
+
+static int ocfs2_get_left_path(struct inode *inode,
+			       struct ocfs2_path *right_path,
+			       struct ocfs2_path **ret_left_path)
+{
+	int ret;
+	u32 left_cpos;
+	struct ocfs2_path *left_path = NULL;
+
+	*ret_left_path = NULL;
+
+	/* This function shouldn't be called for non-trees. */
+	BUG_ON(right_path->p_tree_depth == 0);
+
+	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
+					    right_path, &left_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/* This function shouldn't be called for the leftmost leave. */
+	BUG_ON(left_cpos == 0);
+
+	left_path = ocfs2_new_path(path_root_bh(right_path),
+				   path_root_el(right_path));
+	if (!left_path) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_find_path(inode, left_path, left_cpos);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	*ret_left_path = left_path;
+out:
+	if (ret)
+		ocfs2_free_path(left_path);
 	return ret;
 }
 
@@ -2759,22 +2939,64 @@ out:
  * Remove split_rec clusters from the record at index and merge them
  * onto the tail of the record at index - 1.
  */
-static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
+static int ocfs2_merge_rec_left(struct inode *inode,
+				struct ocfs2_path *right_path,
 				handle_t *handle,
 				struct ocfs2_extent_rec *split_rec,
 				struct ocfs2_extent_list *el, int index)
 {
-	int ret, has_empty_extent = 0;
+	int ret, i, subtree_index = 0, has_empty_extent = 0;
 	unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
 	struct ocfs2_extent_rec *left_rec;
 	struct ocfs2_extent_rec *right_rec;
+	struct buffer_head *bh = path_leaf_bh(right_path);
+	struct buffer_head *root_bh = NULL;
+	struct ocfs2_path *left_path = NULL;
+	struct ocfs2_extent_list *left_el;
 
-	BUG_ON(index <= 0);
+	BUG_ON(index < 0);
 
-	left_rec = &el->l_recs[index - 1];
 	right_rec = &el->l_recs[index];
-	if (ocfs2_is_empty_extent(&el->l_recs[0]))
-		has_empty_extent = 1;
+	if (index == 0) {
+		/* we meet with a cross extent block merge. */
+		ret = ocfs2_get_left_path(inode, right_path, &left_path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		left_el = path_leaf_el(left_path);
+		BUG_ON(le16_to_cpu(left_el->l_next_free_rec) !=
+		       le16_to_cpu(left_el->l_count));
+
+		left_rec = &left_el->l_recs[le16_to_cpu(left_el->l_count) - 1];
+		BUG_ON(le32_to_cpu(left_rec->e_cpos) +
+		       le16_to_cpu(left_rec->e_leaf_clusters) !=
+		       le32_to_cpu(split_rec->e_cpos));
+
+		subtree_index = ocfs2_find_subtree_root(inode,
+							left_path, right_path);
+
+		ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
+						      handle->h_buffer_credits,
+						      left_path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		ret = ocfs2_journal_access(handle, inode,
+					   path_leaf_bh(left_path),
+					   OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	} else {
+		left_rec = &el->l_recs[index - 1];
+		if (ocfs2_is_empty_extent(&el->l_recs[0]))
+			has_empty_extent = 1;
+	}
 
 	ret = ocfs2_journal_access(handle, inode, bh,
 				   OCFS2_JOURNAL_ACCESS_WRITE);
@@ -2802,24 +3024,77 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
 	ocfs2_cleanup_merge(el, index);
 
 	ret = ocfs2_journal_dirty(handle, bh);
-	if (ret)
+	if (ret) {
 		mlog_errno(ret);
+		goto out;
+	}
 
+	if (left_path) {
+		ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		/*
+		 * In the situation that the right_rec is empty and the extent
+		 * block is empty also, ocfs2_complete_edge_insert can't handle
+		 * it, while ocfs2_rotate_tree_left can be called and the extent
+		 * block will be deleted since c_split_covers_rec is set.
+		 */
+		if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
+		    le16_to_cpu(el->l_next_free_rec) == 1)
+			goto out;
+
+		root_bh = left_path->p_node[subtree_index].bh;
+		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
+
+		ret = ocfs2_journal_access(handle, inode, root_bh,
+					   OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		for (i = subtree_index + 1;
+		     i < path_num_items(right_path); i++) {
+			ret = ocfs2_journal_access(handle, inode,
+						   right_path->p_node[i].bh,
+						   OCFS2_JOURNAL_ACCESS_WRITE);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+
+			ret = ocfs2_journal_access(handle, inode,
+						   left_path->p_node[i].bh,
+						   OCFS2_JOURNAL_ACCESS_WRITE);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
+		}
+
+		ocfs2_complete_edge_insert(inode, handle, left_path,
+					   right_path, subtree_index);
+	}
 out:
+	if (left_path)
+		ocfs2_free_path(left_path);
 	return ret;
 }
 
 static int ocfs2_try_to_merge_extent(struct inode *inode,
 				     handle_t *handle,
-				     struct ocfs2_path *left_path,
+				     struct ocfs2_path *path,
 				     int split_index,
 				     struct ocfs2_extent_rec *split_rec,
 				     struct ocfs2_cached_dealloc_ctxt *dealloc,
 				     struct ocfs2_merge_ctxt *ctxt)
 
 {
-	int ret = 0;
-	struct ocfs2_extent_list *el = path_leaf_el(left_path);
+	int ret = 0, left_first = 1;
+	struct ocfs2_extent_list *el = path_leaf_el(path);
 	struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
 
 	BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
@@ -2832,7 +3107,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * extents - having more than one in a leaf is
 		 * illegal.
 		 */
-		ret = ocfs2_rotate_tree_left(inode, handle, left_path,
+		ret = ocfs2_rotate_tree_left(inode, handle, path,
 					     dealloc);
 		if (ret) {
 			mlog_errno(ret);
@@ -2847,7 +3122,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * Left-right contig implies this.
 		 */
 		BUG_ON(!ctxt->c_split_covers_rec);
-		BUG_ON(split_index == 0);
 
 		/*
 		 * Since the leftright insert always covers the entire
@@ -2858,9 +3132,21 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 * Since the adding of an empty extent shifts
 		 * everything back to the right, there's no need to
 		 * update split_index here.
-		 */
-		ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path),
-					   handle, split_rec, el, split_index);
+		 *
+ 		 * When the split_index is zero, we need to merge it to the
+ 		 * prevoius extent block. It is more efficient and easier
+ 		 * if we do merge_right first and merge_left later.
+ 		 */
+		if (!split_index)
+			left_first = 0;
+		if (left_first)
+			ret = ocfs2_merge_rec_left(inode, path,
+						   handle, split_rec,
+						   el, split_index);
+		else
+			ret = ocfs2_merge_rec_right(inode, path,
+						    handle, split_rec,
+						    el, split_index);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -2871,24 +3157,35 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 */
 		BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
 
-		/*
-		 * The left merge left us with an empty extent, remove
-		 * it.
-		 */
-		ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc);
+		/* The merge left us with an empty extent, remove it. */
+		ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
 		}
-		split_index--;
+
+		/*
+ 		 * For split_index = 0, we have done merge_right first and
+ 		 * want to merge it to the previous extent block now.
+ 		 * So no need to decrease it.
+ 		 */
+		if (left_first)
+			split_index--;
 		rec = &el->l_recs[split_index];
 
 		/*
 		 * Note that we don't pass split_rec here on purpose -
-		 * we've merged it into the left side.
+		 * we've merged it into the rec already.
 		 */
-		ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path),
-					    handle, rec, el, split_index);
+		if (left_first)
+			ret = ocfs2_merge_rec_right(inode, path,
+						    handle, rec,
+						    el, split_index);
+		else
+			ret = ocfs2_merge_rec_left(inode, path,
+						   handle, rec,
+						   el, split_index);
+
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
@@ -2896,7 +3193,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 
 		BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
 
-		ret = ocfs2_rotate_tree_left(inode, handle, left_path,
+		ret = ocfs2_rotate_tree_left(inode, handle, path,
 					     dealloc);
 		/*
 		 * Error from this last rotate is not critical, so
@@ -2915,7 +3212,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 		 */
 		if (ctxt->c_contig_type == CONTIG_RIGHT) {
 			ret = ocfs2_merge_rec_left(inode,
-						   path_leaf_bh(left_path),
+						   path,
 						   handle, split_rec, el,
 						   split_index);
 			if (ret) {
@@ -2924,7 +3221,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 			}
 		} else {
 			ret = ocfs2_merge_rec_right(inode,
-						    path_leaf_bh(left_path),
+						    path,
 						    handle, split_rec, el,
 						    split_index);
 			if (ret) {
@@ -2938,7 +3235,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
 			 * The merge may have left an empty extent in
 			 * our leaf. Try to rotate it away.
 			 */
-			ret = ocfs2_rotate_tree_left(inode, handle, left_path,
+			ret = ocfs2_rotate_tree_left(inode, handle, path,
 						     dealloc);
 			if (ret)
 				mlog_errno(ret);
-- 
1.5.3.GIT

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Enable cross extent block merge.V1
  2008-01-04  0:33 [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Tao Ma
  2008-01-04  0:45 ` [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 Tao Ma
@ 2008-01-04  0:47 ` Tao Ma
  2008-01-06 22:00   ` Mark Fasheh
  2008-01-06 13:44 ` [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Mark Fasheh
  2 siblings, 1 reply; 9+ messages in thread
From: Tao Ma @ 2008-01-04  0:47 UTC (permalink / raw)
  To: ocfs2-devel

In ocfs2_figure_merge_contig_type, we judge whether there exists
a cross extent block merge and enable it by setting CONTIG_LEFT
and CONTIG_RIGHT accordingly.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c |   76 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index c256750..e3b17ac 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3795,20 +3795,48 @@ out:
 }
 
 static enum ocfs2_contig_type
-ocfs2_figure_merge_contig_type(struct inode *inode,
+ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
 			       struct ocfs2_extent_list *el, int index,
 			       struct ocfs2_extent_rec *split_rec)
 {
-	struct ocfs2_extent_rec *rec;
+	int status;
+	struct ocfs2_extent_rec *rec = NULL;
 	enum ocfs2_contig_type ret = CONTIG_NONE;
+	u32 left_cpos, right_cpos;
+	struct ocfs2_extent_list *new_el;
+	struct ocfs2_path *left_path = NULL, *right_path = NULL;
+
+	if (index > 0) {
+		rec = &el->l_recs[index - 1];
+	} else if (path->p_tree_depth > 0) {
+		status = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
+						       path, &left_cpos);
+		if (status)
+			goto out;
+
+		if (left_cpos != 0) {
+			left_path = ocfs2_new_path(path_root_bh(path),
+						   path_root_el(path));
+			if (!left_path)
+				goto out;
+
+			status = ocfs2_find_path(inode, left_path, left_cpos);
+			if (status)
+				goto out;
+
+			new_el = path_leaf_el(left_path);
+
+			BUG_ON(le16_to_cpu(new_el->l_next_free_rec) !=
+			       le16_to_cpu(new_el->l_count));
+			rec = &new_el->l_recs[le16_to_cpu(new_el->l_count) - 1];
+		}
+	}
 
 	/*
 	 * We're careful to check for an empty extent record here -
 	 * the merge code will know what to do if it sees one.
 	 */
-
-	if (index > 0) {
-		rec = &el->l_recs[index - 1];
+	if (rec) {
 		if (index == 1 && ocfs2_is_empty_extent(rec)) {
 			if (split_rec->e_cpos == el->l_recs[index].e_cpos)
 				ret = CONTIG_RIGHT;
@@ -3817,10 +3845,36 @@ ocfs2_figure_merge_contig_type(struct inode *inode,
 		}
 	}
 
-	if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) {
+	rec = NULL;
+	if (index < (le16_to_cpu(el->l_next_free_rec) - 1))
+		rec = &el->l_recs[index + 1];
+	else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) &&
+		 path->p_tree_depth > 0) {
+		status = ocfs2_find_cpos_for_right_leaf(inode->i_sb,
+							path, &right_cpos);
+		if (status)
+			goto out;
+
+		if (right_cpos != 0) {
+			right_path = ocfs2_new_path(path_root_bh(path),
+						   path_root_el(path));
+			if (!right_path)
+				goto out;
+
+			status = ocfs2_find_path(inode, right_path, right_cpos);
+			if (status)
+				goto out;
+
+			new_el = path_leaf_el(right_path);
+			rec = &new_el->l_recs[0];
+			if (ocfs2_is_empty_extent(rec))
+				goto out;
+		}
+	}
+
+	if (rec) {
 		enum ocfs2_contig_type contig_type;
 
-		rec = &el->l_recs[index + 1];
 		contig_type = ocfs2_extent_contig(inode, rec, split_rec);
 
 		if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
@@ -3829,6 +3883,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode,
 			ret = contig_type;
 	}
 
+out:
+	if (left_path)
+		ocfs2_free_path(left_path);
+	if (right_path)
+		ocfs2_free_path(right_path);
+
 	return ret;
 }
 
@@ -4291,7 +4351,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
 		goto out;
 	}
 
-	ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el,
+	ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el,
 							    split_index,
 							    split_rec);
 
-- 
1.5.3.GIT

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1
  2008-01-04  0:33 [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Tao Ma
  2008-01-04  0:45 ` [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 Tao Ma
  2008-01-04  0:47 ` [Ocfs2-devel] [PATCH 2/2] Enable " Tao Ma
@ 2008-01-06 13:44 ` Mark Fasheh
  2 siblings, 0 replies; 9+ messages in thread
From: Mark Fasheh @ 2008-01-06 13:44 UTC (permalink / raw)
  To: ocfs2-devel

On Fri, Jan 04, 2008 at 04:32:14PM +0800, tao.ma wrote:
> The old extent merging code down underneath "ocfs2_mark_extent_written()"
> can't merge extents between leaf blocks. This patch resolve this.
> So that a large unwritten extent which has been split up with a bunch of
> writes can be merged together once all unwritten regions have been written to.

So, just to get things started I'll attach a patch which I think you might
find useful. It adds a function 'ocfs2_check_tree()' which you can call
after marking an extent unwritten. It'll walk the entire btree and look for
certain inconsistencies. If one is found, the journal is synced and then the
box is paniced. The syncing gives us the chance to inspect the btree after
rebooting the box.

Last time I was hacking on the tree code, I modified
ocfs2-test/programs/fill_verify_holes/old_burn-in.sh to call
fill_verify_holes with '-u' (for unwritten extents) and ran the script
against a 512 blocksize/4k clustersize file system. The kernel I was running
had the tree check patch applied. This setup helped me catch the vast
majority of bugs which I had introduced by my changes. These days, I'd also
disable the extent map cache (which wasn't around then) as it would also
hide some problems.


By the way - the page cache can also hide problems by allowing the "verify"
program to read cached data which it wouldn't actually be able to find if
the btree was actually searched. I think we can might be able to minimize
this by inserting:

echo 1 > /proc/sys/vm/drop_caches

right before the verify step in old_burn-in.sh.


Ultimately of course, you can do whatever you want for testing, but I'd
definitely like to get a detailed description of what you did or what you're
planning to do in that area. I'm paranoid about btree changes, even though
I'm really happy that you took this task on ;)
	--Mark

--
Mark Fasheh
Principal Software Developer, Oracle
mark.fasheh@oracle.com


diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index cf30761..0bebced 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -49,6 +49,8 @@ #include "uptodate.h"
 
 #include "buffer_head_io.h"
 
+static void ocfs2_check_tree(struct inode *inode);
+
 static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
 static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
 					 struct ocfs2_extent_block *eb);
@@ -3782,6 +3784,9 @@ int ocfs2_insert_extent(struct ocfs2_sup
 	else
 		ocfs2_extent_map_insert_rec(inode, &rec);
 
+	if (status == 0)
+		ocfs2_check_tree(inode);
+
 bail:
 	if (bh)
 		brelse(bh);
@@ -4134,6 +4139,9 @@ int ocfs2_mark_extent_written(struct ino
 	if (ret)
 		mlog_errno(ret);
 
+	if (ret == 0)
+		ocfs2_check_tree(inode);
+
 out:
 	ocfs2_free_path(left_path);
 	return ret;
@@ -4501,6 +4509,9 @@ int ocfs2_remove_extent(struct inode *in
 		}
 	}
 
+	if (ret == 0)
+		ocfs2_check_tree(inode);
+
 out:
 	ocfs2_free_path(path);
 	return ret;
@@ -6122,3 +6133,177 @@ static void ocfs2_free_truncate_context(
 
 	kfree(tc);
 }
+
+#define INDEX_DEPTH (OCFS2_MAX_PATH_DEPTH - 1)
+
+static void ocfs2_path_error(void)
+{
+	handle_t *handle = journal_current_handle();
+
+	handle->h_sync = 1;
+	journal_stop(handle);
+
+	BUG();
+}
+
+static void ocfs2_check_path(struct inode *inode,
+			     struct ocfs2_path *path,
+			     int *check_index)
+{
+	int i, next_free, node;
+	u32 parent_range, child_range;
+	struct ocfs2_extent_list *el;
+	struct ocfs2_extent_rec *rec;
+	struct ocfs2_extent_rec *rec1;
+	struct ocfs2_extent_rec *parent_rec;
+
+	for(node = 0; node < path_num_items(path); node++) {
+		el = path->p_node[node].el;
+
+		next_free = le16_to_cpu(el->l_next_free_rec);
+
+		/* Check for duplicate records */
+		/* Check for records in order */
+		for(i = 0; i < (next_free - 1); i++) {
+			rec = &el->l_recs[i];
+			rec1 = &el->l_recs[i+1];
+
+			if (!ocfs2_is_empty_extent(rec) &&
+			    le32_to_cpu(rec1->e_cpos) <=
+			    le32_to_cpu(rec->e_cpos))
+				goto bad_records;
+		}
+	}
+
+	/* Check ranges of records */
+	el = path_root_el(path);
+	parent_rec = &el->l_recs[check_index[0]];
+	i = 1;
+	while (i <= path->p_tree_depth) {
+		struct ocfs2_extent_rec *child_rec;
+
+		el = path->p_node[i].el;
+
+		parent_range = le32_to_cpu(parent_rec->e_cpos) +
+			le32_to_cpu(parent_rec->e_int_clusters);
+		child_range = ocfs2_sum_rightmost_rec(el);
+		if (child_range > parent_range)
+			goto bad_range;
+
+		child_rec = &el->l_recs[0];
+
+		/*
+		 * The leftmost branch is allowed to have a starting
+		 * cpos of zero but children who actually start after
+		 * that.
+		 */
+		if (parent_rec->e_cpos &&
+		    (le32_to_cpu(parent_rec->e_cpos) <
+		     le32_to_cpu(child_rec->e_cpos)))
+			goto bad_range;
+
+		parent_rec = &el->l_recs[check_index[i]];
+
+		i++;
+	}
+
+	return;
+
+bad_records:
+	mlog(ML_ERROR, "Inode %lu: Bad Records\n",
+	     (unsigned long)inode->i_ino);
+	ocfs2_path_error();
+	return;
+
+bad_range:
+	mlog(ML_ERROR, "Inode %lu: Bad Range\n",
+	     (unsigned long)inode->i_ino);
+	mlog(ML_ERROR, "Child block: %llu, Parent rec: (%u, %u, %llu)\n",
+	     (unsigned long long)path->p_node[i].bh->b_blocknr,
+	     le32_to_cpu(parent_rec->e_cpos),
+	     le32_to_cpu(parent_rec->e_int_clusters),
+	     (unsigned long long)le64_to_cpu(parent_rec->e_blkno));
+	ocfs2_path_error();
+}
+
+static void ocfs2_check_tree(struct inode *inode)
+{
+	int ret, i;
+	int check_index[INDEX_DEPTH] = {0, };
+	u64 blkno;
+	struct ocfs2_dinode *di;
+	struct buffer_head *di_bh = NULL;
+	struct buffer_head *bh;
+	struct buffer_head *first_tail_bh = NULL;
+	struct ocfs2_path *path = NULL;
+	struct ocfs2_extent_block *eb;
+	struct ocfs2_extent_list *el;
+
+	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
+			       &di_bh, OCFS2_BH_CACHED, inode);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	di = (struct ocfs2_dinode *)di_bh->b_data;
+
+	path = ocfs2_new_inode_path(di_bh);
+	if (!path) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+restart:
+	i = 0;
+	while (i < path->p_tree_depth) {
+		el = path->p_node[i].el;
+
+		BUG_ON(check_index[i] >= le16_to_cpu(el->l_next_free_rec));
+
+		blkno = le64_to_cpu(el->l_recs[check_index[i]].e_blkno);
+		bh = NULL;
+		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
+				       &bh, OCFS2_BH_CACHED, inode);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
+		ocfs2_path_insert_eb(path, i + 1, bh);
+
+		eb = (struct ocfs2_extent_block *) bh->b_data;
+		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb))
+			BUG();
+
+		i++;
+	}
+
+	ocfs2_check_path(inode, path, check_index);
+
+	if (le64_to_cpu(di->i_last_eb_blk) != path_leaf_bh(path)->b_blocknr
+	    && path->p_tree_depth) {
+		int max;
+
+		for(i = (path->p_tree_depth - 1); i >= 0; i--) {
+			el = path->p_node[i].el;
+
+			max = le16_to_cpu(el->l_next_free_rec);
+
+			check_index[i]++;
+			if (check_index[i] >= max)
+				check_index[i] = 0;
+			else
+				break;
+		}
+
+		ocfs2_reinit_path(path, 1);
+		goto restart;
+	}
+
+out:
+	ocfs2_free_path(path);
+	brelse(di_bh);
+	brelse(first_tail_bh);
+}

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1
  2008-01-04  0:45 ` [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 Tao Ma
@ 2008-01-06 21:52   ` Mark Fasheh
  2008-01-07  1:06     ` tao.ma
  0 siblings, 1 reply; 9+ messages in thread
From: Mark Fasheh @ 2008-01-06 21:52 UTC (permalink / raw)
  To: ocfs2-devel

On Fri, Jan 04, 2008 at 04:45:38PM +0800, tao.ma wrote:
> In ocfs2_merge_rec_left, when we find the merge extent is "CONTIG_RIGHT"
> with the first extent record of the next extent block, we will merge it to
> the next extent block and change all the related extent blocks accordingly.
> 
> In ocfs2_merge_rec_right, when we find the merge extent is "CONTIG_LEFT"
> with the last extent record of the previous extent block, we will merge
> it to the prevoius extent block and change all the related extent blocks
> accordingly.
> 
> As for CONTIG_LEFTRIGHT, we will handle CONTIG_RIGHT first when the split_index
> is zero since it is more efficient and easier.
> 
> Signed-off-by: Tao Ma <tao.ma@oracle.com>
> ---
>  fs/ocfs2/alloc.c |  367 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 files changed, 332 insertions(+), 35 deletions(-)
> 
> diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
> index 23c8cda..c256750 100644
> --- a/fs/ocfs2/alloc.c
> +++ b/fs/ocfs2/alloc.c
> @@ -1450,6 +1450,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
>   *   - When our insert into the right path leaf is at the leftmost edge
>   *     and requires an update of the path immediately to it's left. This
>   *     can occur at the end of some types of rotation and appending inserts.
> + *   - When we've adjusted the last extent record in the left path leaf and the
> + *     1st extent record in the right path leaf during cross extent block merge.
>   */
>  static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
>  				       struct ocfs2_path *left_path,

Ok, great. I like that we're keeping these functions documented... It's hard
enough to follow as-is!



>  /*
>   * Remove split_rec clusters from the record at index and merge them
>   * onto the beginning of the record at index + 1.
>   */

Should update this comment though...

> -static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
> -				handle_t *handle,
> -				struct ocfs2_extent_rec *split_rec,
> -				struct ocfs2_extent_list *el, int index)
> +static int ocfs2_merge_rec_right(struct inode *inode,
> +				 struct ocfs2_path *left_path,
> +				 handle_t *handle,
> +				 struct ocfs2_extent_rec *split_rec,
> +				 struct ocfs2_extent_list *el, int index)
>  {


> @@ -2751,7 +2848,90 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
>  	if (ret)
>  		mlog_errno(ret);
>  
> +	if (right_path) {
> +		ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
> +		if (ret)
> +			mlog_errno(ret);
> +
> +		root_bh = left_path->p_node[subtree_index].bh;
> +		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
> +
> +		ret = ocfs2_journal_access(handle, inode, root_bh,
> +					   OCFS2_JOURNAL_ACCESS_WRITE);
> +		if (ret) {
> +			mlog_errno(ret);
> +			goto out;
> +		}
> +
> +		for (i = subtree_index + 1;
> +		     i < path_num_items(right_path); i++) {
> +			ret = ocfs2_journal_access(handle, inode,
> +						   right_path->p_node[i].bh,
> +						   OCFS2_JOURNAL_ACCESS_WRITE);
> +			if (ret) {
> +				mlog_errno(ret);
> +				goto out;
> +			}
> +
> +			ret = ocfs2_journal_access(handle, inode,
> +						   left_path->p_node[i].bh,
> +						   OCFS2_JOURNAL_ACCESS_WRITE);
> +			if (ret) {
> +				mlog_errno(ret);
> +				goto out;
> +			}
> +		}

I'd move the journal_access() calls here up before we actually change the
records. That way we can catch journal errors before the tree gets into an
transient state.


> @@ -2759,22 +2939,64 @@ out:
>   * Remove split_rec clusters from the record at index and merge them
>   * onto the tail of the record at index - 1.
>   */

Should update this comment.


> -static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
> +static int ocfs2_merge_rec_left(struct inode *inode,
> +				struct ocfs2_path *right_path,
>  				handle_t *handle,
>  				struct ocfs2_extent_rec *split_rec,
>  				struct ocfs2_extent_list *el, int index)
>  {
> -	int ret, has_empty_extent = 0;
> +	int ret, i, subtree_index = 0, has_empty_extent = 0;
>  	unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
>  	struct ocfs2_extent_rec *left_rec;
>  	struct ocfs2_extent_rec *right_rec;
> +	struct buffer_head *bh = path_leaf_bh(right_path);
> +	struct buffer_head *root_bh = NULL;
> +	struct ocfs2_path *left_path = NULL;
> +	struct ocfs2_extent_list *left_el;
>  
> -	BUG_ON(index <= 0);
> +	BUG_ON(index < 0);
>  
> -	left_rec = &el->l_recs[index - 1];
>  	right_rec = &el->l_recs[index];
> -	if (ocfs2_is_empty_extent(&el->l_recs[0]))
> -		has_empty_extent = 1;
> +	if (index == 0) {
> +		/* we meet with a cross extent block merge. */
> +		ret = ocfs2_get_left_path(inode, right_path, &left_path);
> +		if (ret) {
> +			mlog_errno(ret);
> +			goto out;
> +		}
> +
> +		left_el = path_leaf_el(left_path);
> +		BUG_ON(le16_to_cpu(left_el->l_next_free_rec) !=
> +		       le16_to_cpu(left_el->l_count));
> +
> +		left_rec = &left_el->l_recs[le16_to_cpu(left_el->l_count) - 1];

Using left_el->l_count to index the array is correct here, but would you
mind changing this to use l_next_free_rec so it's more consistent with what
we usually do? It actually reads a bit weird to my eyes because I'm used to
seeing l_next_free_rec inside of array derefs!


> @@ -2802,24 +3024,77 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
>  	ocfs2_cleanup_merge(el, index);
>  
>  	ret = ocfs2_journal_dirty(handle, bh);
> -	if (ret)
> +	if (ret) {
>  		mlog_errno(ret);
> +		goto out;
> +	}
>  
> +	if (left_path) {
> +		ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
> +		if (ret) {
> +			mlog_errno(ret);
> +			goto out;
> +		}
> +
> +		/*
> +		 * In the situation that the right_rec is empty and the extent
> +		 * block is empty also, ocfs2_complete_edge_insert can't handle
> +		 * it, while ocfs2_rotate_tree_left can be called and the extent
> +		 * block will be deleted since c_split_covers_rec is set.
> +		 */
> +		if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
> +		    le16_to_cpu(el->l_next_free_rec) == 1)
> +			goto out;
>

Hmm, I'm worried that we're exiting this function with a path whose upper
cpos values are inconsistent. An error before we get the chance to actually
do the rotate / removal could leave us with an inconsistent tree.

I'm not sure that we can just slap some code to handle it here though - from
what I can tell, the callers are expecting that we'd leave an empty extent
for them to handle.

I see that ocfs2_rotate_tree_left _almost_ handles this situation
correctly, except the transaction extend could fail before we get to remove
the blocks. That might actually be a bug, though I have to think about it
some more. Generally if ocfs2_rotate_tree_left handled it 100% correctly, we
_could_ just comment the heck out of this function that callers *must* call
the rotate function later. Since calling ocfs2_rotate_tree_left() is
implicitely decided by the callers, I'd also want to audit the code to make
sure that it always gets called in the new merge situations we've
introduced.


> +		root_bh = left_path->p_node[subtree_index].bh;
> +		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
> +
> +		ret = ocfs2_journal_access(handle, inode, root_bh,
> +					   OCFS2_JOURNAL_ACCESS_WRITE);
> +		if (ret) {
> +			mlog_errno(ret);
> +			goto out;
> +		}
> +
> +		for (i = subtree_index + 1;
> +		     i < path_num_items(right_path); i++) {
> +			ret = ocfs2_journal_access(handle, inode,
> +						   right_path->p_node[i].bh,
> +						   OCFS2_JOURNAL_ACCESS_WRITE);
> +			if (ret) {
> +				mlog_errno(ret);
> +				goto out;
> +			}
> +
> +			ret = ocfs2_journal_access(handle, inode,
> +						   left_path->p_node[i].bh,
> +						   OCFS2_JOURNAL_ACCESS_WRITE);
> +			if (ret) {
> +				mlog_errno(ret);
> +				goto out;
> +			}
> +		}

Same comment as before regarding the journal_access calls.


> @@ -2858,9 +3132,21 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
>  		 * Since the adding of an empty extent shifts
>  		 * everything back to the right, there's no need to
>  		 * update split_index here.
> -		 */
> -		ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path),
> -					   handle, split_rec, el, split_index);
> +		 *
> + 		 * When the split_index is zero, we need to merge it to the
> + 		 * prevoius extent block. It is more efficient and easier
> + 		 * if we do merge_right first and merge_left later.
> + 		 */

So why don't we just always do the left merge 1st, instead of switching on
them like this?
	--Mark

--
Mark Fasheh
Principal Software Developer, Oracle
mark.fasheh@oracle.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Enable cross extent block merge.V1
  2008-01-04  0:47 ` [Ocfs2-devel] [PATCH 2/2] Enable " Tao Ma
@ 2008-01-06 22:00   ` Mark Fasheh
  0 siblings, 0 replies; 9+ messages in thread
From: Mark Fasheh @ 2008-01-06 22:00 UTC (permalink / raw)
  To: ocfs2-devel

On Fri, Jan 04, 2008 at 04:46:19PM +0800, tao.ma wrote:
>  static enum ocfs2_contig_type
> -ocfs2_figure_merge_contig_type(struct inode *inode,
> +ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
>  			       struct ocfs2_extent_list *el, int index,
>  			       struct ocfs2_extent_rec *split_rec)
>  {
> -	struct ocfs2_extent_rec *rec;
> +	int status;
> +	struct ocfs2_extent_rec *rec = NULL;
>  	enum ocfs2_contig_type ret = CONTIG_NONE;
> +	u32 left_cpos, right_cpos;
> +	struct ocfs2_extent_list *new_el;
> +	struct ocfs2_path *left_path = NULL, *right_path = NULL;
> +
> +	if (index > 0) {
> +		rec = &el->l_recs[index - 1];
> +	} else if (path->p_tree_depth > 0) {
> +		status = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
> +						       path, &left_cpos);
> +		if (status)
> +			goto out;
> +
> +		if (left_cpos != 0) {
> +			left_path = ocfs2_new_path(path_root_bh(path),
> +						   path_root_el(path));
> +			if (!left_path)
> +				goto out;
> +
> +			status = ocfs2_find_path(inode, left_path, left_cpos);
> +			if (status)
> +				goto out;
> +
> +			new_el = path_leaf_el(left_path);
> +
> +			BUG_ON(le16_to_cpu(new_el->l_next_free_rec) !=
> +			       le16_to_cpu(new_el->l_count));

Since we haven't seen this extent block before, this error condition should
make the file system go read-only, not BUG()


> +			rec = &new_el->l_recs[le16_to_cpu(new_el->l_count) - 1];

Use l_next_free_rec in the array here please.
	--Mark

--
Mark Fasheh
Principal Software Developer, Oracle
mark.fasheh@oracle.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1
  2008-01-06 21:52   ` Mark Fasheh
@ 2008-01-07  1:06     ` tao.ma
  2008-01-10 17:48       ` Mark Fasheh
  0 siblings, 1 reply; 9+ messages in thread
From: tao.ma @ 2008-01-07  1:06 UTC (permalink / raw)
  To: ocfs2-devel

Thanks for your review.
Mark Fasheh wrote:
>> @@ -2802,24 +3024,77 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
>>  	ocfs2_cleanup_merge(el, index);
>>  
>>  	ret = ocfs2_journal_dirty(handle, bh);
>> -	if (ret)
>> +	if (ret) {
>>  		mlog_errno(ret);
>> +		goto out;
>> +	}
>>  
>> +	if (left_path) {
>> +		ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
>> +		if (ret) {
>> +			mlog_errno(ret);
>> +			goto out;
>> +		}
>> +
>> +		/*
>> +		 * In the situation that the right_rec is empty and the extent
>> +		 * block is empty also, ocfs2_complete_edge_insert can't handle
>> +		 * it, while ocfs2_rotate_tree_left can be called and the extent
>> +		 * block will be deleted since c_split_covers_rec is set.
>> +		 */
>> +		if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
>> +		    le16_to_cpu(el->l_next_free_rec) == 1)
>> +			goto out;
>>
>>     
>
> Hmm, I'm worried that we're exiting this function with a path whose upper
> cpos values are inconsistent. An error before we get the chance to actually
> do the rotate / removal could leave us with an inconsistent tree.
>
> I'm not sure that we can just slap some code to handle it here though - from
> what I can tell, the callers are expecting that we'd leave an empty extent
> for them to handle.
>   
Yes. So the situation here is really hard to handle and lead me to make 
the decision that we should let ocfs2_roatate_tree_left go on the issue.
> I see that ocfs2_rotate_tree_left _almost_ handles this situation
> correctly, except the transaction extend could fail before we get to remove
> the blocks. That might actually be a bug, though I have to think about it
> some more. Generally if ocfs2_rotate_tree_left handled it 100% correctly, we
> _could_ just comment the heck out of this function that callers *must* call
> the rotate function later. Since calling ocfs2_rotate_tree_left() is
> implicitely decided by the callers, I'd also want to audit the code to make
> sure that it always gets called in the new merge situations we've
> introduced.
>   
So any suggestion is welcomed. By now, I haven't found a good way to 
handle it except my current ugly codes. ;)
>
>   
>> @@ -2858,9 +3132,21 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
>>  		 * Since the adding of an empty extent shifts
>>  		 * everything back to the right, there's no need to
>>  		 * update split_index here.
>> -		 */
>> -		ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path),
>> -					   handle, split_rec, el, split_index);
>> +		 *
>> + 		 * When the split_index is zero, we need to merge it to the
>> + 		 * prevoius extent block. It is more efficient and easier
>> + 		 * if we do merge_right first and merge_left later.
>> + 		 */
>>     
>
> So why don't we just always do the left merge 1st, instead of switching on
> them like this?
>   
If split_index==0, then if we do merge_right first, it will not touch 
the previous extent block(so no cross extent block merge is needed) and 
only the extent block after the record will be affected. The second 
merge_left will be cross extent block merge.
If split_index==l_count-1, then if we do merge_left first, the first 
merge will move the recs[0] of the next extent block into this block. So 
when merge_right is called, no cross extent block merge is needed.
So if you don't like this switch, I would prefer merge_right first and 
merge_left second to the original merge_left first. ;)

btw, I have patched your code of ocfs2_check_tree to my current git 
tree, it works well under my test script. The test script should be 
ready for review soon.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1
  2008-01-07  1:06     ` tao.ma
@ 2008-01-10 17:48       ` Mark Fasheh
  2008-01-10 18:21         ` tao.ma
  0 siblings, 1 reply; 9+ messages in thread
From: Mark Fasheh @ 2008-01-10 17:48 UTC (permalink / raw)
  To: ocfs2-devel

On Mon, Jan 07, 2008 at 05:05:23PM +0800, tao.ma wrote:
>> Hmm, I'm worried that we're exiting this function with a path whose upper
>> cpos values are inconsistent. An error before we get the chance to 
>> actually
>> do the rotate / removal could leave us with an inconsistent tree.
>>
>> I'm not sure that we can just slap some code to handle it here though - 
>> from
>> what I can tell, the callers are expecting that we'd leave an empty extent
>> for them to handle.
>>   
> Yes. So the situation here is really hard to handle and lead me to make the 
> decision that we should let ocfs2_roatate_tree_left go on the issue.
>> I see that ocfs2_rotate_tree_left _almost_ handles this situation
>> correctly, except the transaction extend could fail before we get to 
>> remove
>> the blocks. That might actually be a bug, though I have to think about it
>> some more. Generally if ocfs2_rotate_tree_left handled it 100% correctly, 
>> we
>> _could_ just comment the heck out of this function that callers *must* 
>> call
>> the rotate function later. Since calling ocfs2_rotate_tree_left() is
>> implicitely decided by the callers, I'd also want to audit the code to 
>> make
>> sure that it always gets called in the new merge situations we've
>> introduced.
>>   
> So any suggestion is welcomed. By now, I haven't found a good way to handle 
> it except my current ugly codes. ;)

Ok, I've had some time to think about this. We should just handle this
corner case in ocfs2_merge_rec_left(). ocfs2_rotate_tree_left() cleanly
exits if the empty extent has already been removed, so it's safe to
unconditionally call it as we do today.


>> So why don't we just always do the left merge 1st, instead of switching on
>> them like this?
>>   
> If split_index==0, then if we do merge_right first, it will not touch the 
> previous extent block(so no cross extent block merge is needed) and only 
> the extent block after the record will be affected. The second merge_left 
> will be cross extent block merge.
> If split_index==l_count-1, then if we do merge_left first, the first merge 
> will move the recs[0] of the next extent block into this block. So when 
> merge_right is called, no cross extent block merge is needed.
> So if you don't like this switch, I would prefer merge_right first and 
> merge_left second to the original merge_left first. ;)

Yeah, I think what I was looking for was whether you had a reason to not
just re-order the merging in all cases.

Do we ever get into the case that a leftright merge is at index == (l_count
- 1)? I think the search should prevent that (and instead give us index == 0
for the extent block to the right).

If so, I think that just always doing the left merge 1st makes sense. I
don't see any problem with that breaking the code to merge within a block
either.


> btw, I have patched your code of ocfs2_check_tree to my current git tree, 
> it works well under my test script. The test script should be ready for 
> review soon.

Great, post it ;) Have you looked at running the burn-in script?
	--Mark

--
Mark Fasheh
Principal Software Developer, Oracle
mark.fasheh@oracle.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1
  2008-01-10 17:48       ` Mark Fasheh
@ 2008-01-10 18:21         ` tao.ma
  0 siblings, 0 replies; 9+ messages in thread
From: tao.ma @ 2008-01-10 18:21 UTC (permalink / raw)
  To: ocfs2-devel

Mark Fasheh wrote:
> On Mon, Jan 07, 2008 at 05:05:23PM +0800, tao.ma wrote:
>   
>>> Hmm, I'm worried that we're exiting this function with a path whose upper
>>> cpos values are inconsistent. An error before we get the chance to 
>>> actually
>>> do the rotate / removal could leave us with an inconsistent tree.
>>>
>>> I'm not sure that we can just slap some code to handle it here though - 
>>> from
>>> what I can tell, the callers are expecting that we'd leave an empty extent
>>> for them to handle.
>>>   
>>>       
>> Yes. So the situation here is really hard to handle and lead me to make the 
>> decision that we should let ocfs2_roatate_tree_left go on the issue.
>>     
>>> I see that ocfs2_rotate_tree_left _almost_ handles this situation
>>> correctly, except the transaction extend could fail before we get to 
>>> remove
>>> the blocks. That might actually be a bug, though I have to think about it
>>> some more. Generally if ocfs2_rotate_tree_left handled it 100% correctly, 
>>> we
>>> _could_ just comment the heck out of this function that callers *must* 
>>> call
>>> the rotate function later. Since calling ocfs2_rotate_tree_left() is
>>> implicitely decided by the callers, I'd also want to audit the code to 
>>> make
>>> sure that it always gets called in the new merge situations we've
>>> introduced.
>>>   
>>>       
>> So any suggestion is welcomed. By now, I haven't found a good way to handle 
>> it except my current ugly codes. ;)
>>     
>
> Ok, I've had some time to think about this. We should just handle this
> corner case in ocfs2_merge_rec_left(). ocfs2_rotate_tree_left() cleanly
> exits if the empty extent has already been removed, so it's safe to
> unconditionally call it as we do today.
>   
hmm, ok. Maybe a helper function is needed since now 
ocfs2_merge_rec_left have to handle so much issue.
>
>   
>>> So why don't we just always do the left merge 1st, instead of switching on
>>> them like this?
>>>   
>>>       
>> If split_index==0, then if we do merge_right first, it will not touch the 
>> previous extent block(so no cross extent block merge is needed) and only 
>> the extent block after the record will be affected. The second merge_left 
>> will be cross extent block merge.
>> If split_index==l_count-1, then if we do merge_left first, the first merge 
>> will move the recs[0] of the next extent block into this block. So when 
>> merge_right is called, no cross extent block merge is needed.
>> So if you don't like this switch, I would prefer merge_right first and 
>> merge_left second to the original merge_left first. ;)
>>     
>
> Yeah, I think what I was looking for was whether you had a reason to not
> just re-order the merging in all cases.
>
> Do we ever get into the case that a leftright merge is at index == (l_count
> - 1)? I think the search should prevent that (and instead give us index == 0
> for the extent block to the right).
>   
Yes, I think we can get into this issue when the unwritten extent exists 
in l_count-1.
> If so, I think that just always doing the left merge 1st makes sense. I
> don't see any problem with that breaking the code to merge within a block
> either.
>   
Do you mean right merge first? As I have mentioned above, if index==0, 
the first right merge first will not touch cross extent block. Only the 
second one has to.
And now I think some of my previous comment is wrong. ;)
Even if index == l_count-1, it is ok for right merge first. Since the 
first right merge is a cross extent block merge, but the second left 
merge don't touch cross extent block. So there is totally one cross 
extent block merge.
>
>   
>> btw, I have patched your code of ocfs2_check_tree to my current git tree, 
>> it works well under my test script. The test script should be ready for 
>> review soon.
>>     
>
> Great, post it ;) Have you looked at running the burn-in script
OK, I will post it soon. Actually it is generated when I create sparse 
file support in ocfs2-tools. I always want to modify it to be a test 
script for both kernel and the tools. Now it really goes. ;)
Actually I make the ocfs2 file system read-only one time when I run one 
random_rw_test in it. :)
But I have to sparse some time to investigate the real reason. So please 
wait. ;)

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2008-01-10 18:21 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-04  0:33 [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Tao Ma
2008-01-04  0:45 ` [Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1 Tao Ma
2008-01-06 21:52   ` Mark Fasheh
2008-01-07  1:06     ` tao.ma
2008-01-10 17:48       ` Mark Fasheh
2008-01-10 18:21         ` tao.ma
2008-01-04  0:47 ` [Ocfs2-devel] [PATCH 2/2] Enable " Tao Ma
2008-01-06 22:00   ` Mark Fasheh
2008-01-06 13:44 ` [Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1 Mark Fasheh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.