[PATCH 1/2 v3] Btrfs: use flag EXTENT_DEFRAG for snapshot-aware defrag

linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/2 v3] Btrfs: use flag EXTENT_DEFRAG for snapshot-aware defrag
@ 2012-09-17  9:58 Liu Bo
  2012-09-17  9:58 ` [PATCH 2/2 v3] Btrfs: " Liu Bo
  0 siblings, 1 reply; 14+ messages in thread
From: Liu Bo @ 2012-09-17  9:58 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dave

We're going to use this flag EXTENT_DEFRAG to indicate which range
belongs to defragment so that we can implement snapshow-aware defrag:

We set the EXTENT_DEFRAG flag when dirtying the extents that need
defragmented, so later on writeback thread can differentiate between
normal writeback and writeback started by defragmentation.

This patch is used for the latter one.

Originally patch by Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/extent_io.c |    8 ++++++++
 fs/btrfs/extent_io.h |    2 ++
 fs/btrfs/file.c      |    4 ++--
 fs/btrfs/inode.c     |   20 ++++++++++++--------
 fs/btrfs/ioctl.c     |    8 ++++----
 5 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4c87847..604e404 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1144,6 +1144,14 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
 			      NULL, cached_state, mask);
 }
 
+int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
+		      struct extent_state **cached_state, gfp_t mask)
+{
+	return set_extent_bit(tree, start, end,
+			      EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
+			      NULL, cached_state, mask);
+}
+
 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
 		       gfp_t mask)
 {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 25900af..512f8da 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -235,6 +235,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 		       int bits, int clear_bits, gfp_t mask);
 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
 			struct extent_state **cached_state, gfp_t mask);
+int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
+		      struct extent_state **cached_state, gfp_t mask);
 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
 			  u64 *start_ret, u64 *end_ret, int bits);
 struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5caf285..226690a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1173,8 +1173,8 @@ again:
 
 		clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
 				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
-				  EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
-				  GFP_NOFS);
+				  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
+				  0, 0, &cached_state, GFP_NOFS);
 		unlock_extent_cached(&BTRFS_I(inode)->io_tree,
 				     start_pos, last_pos - 1, &cached_state,
 				     GFP_NOFS);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b2c3514..55857eb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3531,7 +3531,8 @@ again:
 	}
 
 	clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
-			  EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
+			  EXTENT_DIRTY | EXTENT_DELALLOC |
+			  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
 			  0, 0, &cached_state, GFP_NOFS);
 
 	ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
@@ -5998,7 +5999,8 @@ unlock:
 	if (lockstart < lockend) {
 		if (create && len < lockend - lockstart) {
 			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
-					 lockstart + len - 1, unlock_bits, 1, 0,
+					 lockstart + len - 1,
+					 unlock_bits | EXTENT_DEFRAG, 1, 0,
 					 &cached_state, GFP_NOFS);
 			/*
 			 * Beside unlock, we also need to cleanup reserved space
@@ -6006,8 +6008,8 @@ unlock:
 			 */
 			clear_extent_bit(&BTRFS_I(inode)->io_tree,
 					 lockstart + len, lockend,
-					 unlock_bits | EXTENT_DO_ACCOUNTING,
-					 1, 0, NULL, GFP_NOFS);
+					 unlock_bits | EXTENT_DO_ACCOUNTING |
+					 EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
 		} else {
 			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
 					 lockend, unlock_bits, 1, 0,
@@ -6572,8 +6574,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
 		 */
 		clear_extent_bit(tree, page_start, page_end,
 				 EXTENT_DIRTY | EXTENT_DELALLOC |
-				 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
-				 &cached_state, GFP_NOFS);
+				 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+				 EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS);
 		/*
 		 * whoever cleared the private bit is responsible
 		 * for the finish_ordered_io
@@ -6589,7 +6591,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
 	}
 	clear_extent_bit(tree, page_start, page_end,
 		 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
-		 EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS);
+		 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
+		 &cached_state, GFP_NOFS);
 	__btrfs_releasepage(page, GFP_NOFS);
 
 	ClearPageChecked(page);
@@ -6686,7 +6689,8 @@ again:
 	 * prepare_pages in the normal write path.
 	 */
 	clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
-			  EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
+			  EXTENT_DIRTY | EXTENT_DELALLOC |
+			  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
 			  0, 0, &cached_state, GFP_NOFS);
 
 	ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 00ddf22..db91f77 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1027,8 +1027,8 @@ again:
 			 page_start, page_end - 1, 0, &cached_state);
 	clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
 			  page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
-			  EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
-			  GFP_NOFS);
+			  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
+			  &cached_state, GFP_NOFS);
 
 	if (i_done != page_cnt) {
 		spin_lock(&BTRFS_I(inode)->lock);
@@ -1039,8 +1039,8 @@ again:
 	}
 
 
-	btrfs_set_extent_delalloc(inode, page_start, page_end - 1,
-				  &cached_state);
+	set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1,
+			  &cached_state, GFP_NOFS);
 
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree,
 			     page_start, page_end - 1, &cached_state,
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-09-17  9:58 [PATCH 1/2 v3] Btrfs: use flag EXTENT_DEFRAG for snapshot-aware defrag Liu Bo
@ 2012-09-17  9:58 ` Liu Bo
  2012-09-17 10:04   ` Liu Bo
                     ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Liu Bo @ 2012-09-17  9:58 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dave

This comes from one of btrfs's project ideas,
As we defragment files, we break any sharing from other snapshots.
The balancing code will preserve the sharing, and defrag needs to grow this
as well.

Now we're able to fill the blank with this patch, in which we make full use of
backref walking stuff.

Here is the basic idea,
o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
o  at endio, after we finish updating fs tree, we use backref walking to find
   all parents of the ranges and re-link them with the new COWed file layout by
   adding corresponding backrefs.

Originally patch by Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
Changes since v2:
        - adopt better names for local structures.
        - add proper reschedule phrase
        - better error handling
        - minor cleanups
	(Thanks, David)

 fs/btrfs/inode.c |  617 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 617 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 55857eb..8278aa2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -54,6 +54,7 @@
 #include "locking.h"
 #include "free-space-cache.h"
 #include "inode-map.h"
+#include "backref.h"
 
 struct btrfs_iget_args {
 	u64 ino;
@@ -1846,6 +1847,608 @@ out:
 	return ret;
 }
 
+/* snapshot-aware defrag */
+struct sa_defrag_extent_backref {
+	struct rb_node node;
+	struct old_sa_defrag_extent *old;
+	u64 root_id;
+	u64 inum;
+	u64 file_pos;
+	u64 extent_offset;
+	u64 num_bytes;
+	u64 generation;
+};
+
+struct old_sa_defrag_extent {
+	struct list_head list;
+	struct new_sa_defrag_extent *new;
+
+	u64 extent_offset;
+	u64 bytenr;
+	u64 offset;
+	u64 len;
+	int count;
+};
+
+struct new_sa_defrag_extent {
+	struct rb_root root;
+	struct list_head head;
+	struct btrfs_path *path;
+	struct inode *inode;
+	u64 file_pos;
+	u64 len;
+	u64 bytenr;
+	u64 disk_len;
+	u8 compress_type;
+};
+
+static int backref_comp(struct sa_defrag_extent_backref *b1,
+			struct sa_defrag_extent_backref *b2)
+{
+	if (b1->root_id < b2->root_id)
+		return -1;
+	else if (b1->root_id > b2->root_id)
+		return 1;
+
+	if (b1->inum < b2->inum)
+		return -1;
+	else if (b1->inum > b2->inum)
+		return 1;
+
+	if (b1->file_pos < b2->file_pos)
+		return -1;
+	else if (b1->file_pos > b2->file_pos)
+		return 1;
+
+	WARN_ON(1);
+	return 0;
+}
+
+static void backref_insert(struct rb_root *root,
+			   struct sa_defrag_extent_backref *backref)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct sa_defrag_extent_backref *entry;
+	int ret;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
+
+		ret = backref_comp(backref, entry);
+		if (ret < 0)
+			p = &(*p)->rb_left;
+		else if (ret > 0)
+			p = &(*p)->rb_right;
+		else
+			BUG_ON(1);
+	}
+
+	rb_link_node(&backref->node, parent, p);
+	rb_insert_color(&backref->node, root);
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
+				       void *ctx)
+{
+	struct btrfs_file_extent_item *extent;
+	struct btrfs_fs_info *fs_info;
+	struct old_sa_defrag_extent *old = ctx;
+	struct new_sa_defrag_extent *new = old->new;
+	struct btrfs_path *path = new->path;
+	struct btrfs_key key;
+	struct btrfs_root *root;
+	struct sa_defrag_extent_backref *backref;
+	struct extent_buffer *leaf;
+	struct inode *inode = new->inode;
+	int slot;
+	int ret;
+	u64 extent_offset;
+	u64 num_bytes;
+
+	if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
+	    inum == btrfs_ino(inode))
+		return 0;
+
+	key.objectid = root_id;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+
+	fs_info = BTRFS_I(inode)->root->fs_info;
+	root = btrfs_read_fs_root_no_name(fs_info, &key);
+	if (IS_ERR(root)) {
+		if (PTR_ERR(root) == -ENOENT)
+			return 0;
+		WARN_ON(1);
+		pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
+			 inum, offset, root_id);
+		return PTR_ERR(root);
+	}
+
+	key.objectid = inum;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	if (offset > (u64)-1 << 32)
+		key.offset = 0;
+	else
+		key.offset = offset;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		WARN_ON(1);
+		return ret;
+	}
+
+	while (1) {
+		cond_resched();
+
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0) {
+				goto out;
+			} else if (ret > 0) {
+				ret = 0;
+				goto out;
+			}
+			continue;
+		}
+
+		path->slots[0]++;
+
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+
+		if (key.objectid != inum || key.type != BTRFS_EXTENT_DATA_KEY)
+			continue;
+
+		extent = btrfs_item_ptr(leaf, slot,
+					struct btrfs_file_extent_item);
+
+		if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
+			continue;
+
+		if (key.offset - btrfs_file_extent_offset(leaf, extent) !=
+		    offset)
+			continue;
+
+		break;
+	}
+
+	extent_offset = btrfs_file_extent_offset(leaf, extent);
+	num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
+
+	if (extent_offset >= old->extent_offset + old->offset + old->len ||
+	    extent_offset + num_bytes < old->extent_offset + old->offset)
+		goto out;
+
+	backref = kmalloc(sizeof(*backref), GFP_NOFS);
+	if (!backref) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	backref->root_id = root_id;
+	backref->inum = inum;
+	backref->file_pos = offset + extent_offset;
+	backref->num_bytes = num_bytes;
+	backref->extent_offset = extent_offset;
+	backref->generation = btrfs_file_extent_generation(leaf, extent);
+	backref->old = old;
+	backref_insert(&new->root, backref);
+	old->count++;
+out:
+	btrfs_release_path(path);
+	WARN_ON(ret);
+	return ret;
+}
+
+static noinline bool record_extent_backrefs(struct btrfs_path *path,
+				   struct new_sa_defrag_extent *new)
+{
+	struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
+	struct old_sa_defrag_extent *old, *tmp;
+	int ret;
+
+	new->path = path;
+
+	list_for_each_entry_safe(old, tmp, &new->head, list) {
+		ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+						  path, record_one_backref,
+						  old);
+		WARN_ON(ret < 0);
+
+		/* no backref to be processed for this extent */
+		if (!old->count) {
+			list_del(&old->list);
+			kfree(old);
+		}
+	}
+
+	if (list_empty(&new->head))
+		return false;
+
+	return true;
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int relink_extent_backref(struct btrfs_path *path,
+				 struct sa_defrag_extent_backref *prev,
+				 struct sa_defrag_extent_backref *backref)
+{
+	struct btrfs_file_extent_item *extent;
+	struct btrfs_file_extent_item *item;
+	struct btrfs_ordered_extent *ordered;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_root *root;
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct old_sa_defrag_extent *old = backref->old;
+	struct new_sa_defrag_extent *new = old->new;
+	struct inode *src_inode = new->inode;
+	struct inode *inode;
+	struct extent_state *cached = NULL;
+	int ret = 0;
+	u64 hint_byte;
+	u64 start;
+	u64 len;
+	bool merge = false;
+
+	if (prev && prev->root_id == backref->root_id &&
+	    prev->inum == backref->inum &&
+	    prev->extent_offset == backref->extent_offset &&
+	    prev->file_pos + prev->num_bytes == backref->file_pos)
+		merge = true;
+
+	key.objectid = backref->root_id;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+
+	fs_info = BTRFS_I(src_inode)->root->fs_info;
+	root = btrfs_read_fs_root_no_name(fs_info, &key);
+	if (IS_ERR(root)) {
+		if (PTR_ERR(root) == -ENOENT)
+			return 0;
+		return PTR_ERR(root);
+	}
+
+	key.objectid = backref->inum;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+
+	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+	if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
+		if (inode && !IS_ERR(inode))
+			iput(inode);
+		return 0;
+	}
+
+	lock_extent_bits(&BTRFS_I(inode)->io_tree, backref->file_pos,
+			 backref->file_pos + backref->num_bytes, 0, &cached);
+
+	ordered = btrfs_lookup_first_ordered_extent(inode,
+						    backref->file_pos +
+						    backref->num_bytes);
+	if (ordered) {
+		btrfs_put_ordered_extent(ordered);
+		goto out_unlock;
+	}
+
+	/*
+	 * 1 for drop_extents
+	 * 1 for merge clause's search_slot
+	 * 1 for insert items
+	 */
+	trans = btrfs_start_transaction(root, 3);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		goto out_unlock;
+	}
+
+	key.objectid = backref->inum;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = backref->file_pos;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		goto out_free_path;
+	} else if (ret > 0) {
+		ret = 0;
+		goto out_free_path;
+	}
+
+	extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
+				struct btrfs_file_extent_item);
+
+	if (btrfs_file_extent_generation(path->nodes[0], extent) !=
+	    backref->generation)
+		goto out_free_path;
+
+	btrfs_release_path(path);
+
+	start = backref->file_pos;
+	if (backref->extent_offset < old->extent_offset + old->offset)
+		start += old->extent_offset + old->offset -
+			 backref->extent_offset;
+
+	len = min(backref->extent_offset + backref->num_bytes,
+		  old->extent_offset + old->offset + old->len);
+	len -= max(backref->extent_offset, old->extent_offset + old->offset);
+
+	ret = btrfs_drop_extents(trans, inode, start,
+				 start + len, &hint_byte, 1);
+	if (ret)
+		goto out_free_path;
+again:
+	key.objectid = btrfs_ino(inode);
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = start;
+
+	if (merge) {
+		struct btrfs_file_extent_item *fi;
+		u64 extent_len;
+		struct btrfs_key found_key;
+
+		ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+		if (ret < 0)
+			goto out_free_path;
+
+		path->slots[0]--;
+		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+		fi = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_file_extent_item);
+		extent_len = btrfs_file_extent_num_bytes(leaf, fi);
+
+		if (btrfs_file_extent_disk_bytenr(leaf, fi) == new->bytenr &&
+		    btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_REG &&
+		    !btrfs_file_extent_compression(leaf, fi) &&
+		    !btrfs_file_extent_encryption(leaf, fi) &&
+		    !btrfs_file_extent_other_encoding(leaf, fi) &&
+		    extent_len + found_key.offset == start) {
+			btrfs_set_file_extent_num_bytes(leaf, fi,
+							extent_len + len);
+			btrfs_mark_buffer_dirty(leaf);
+			inode_add_bytes(inode, len);
+
+			ret = 1;
+			goto out_free_path;
+		} else {
+			merge = false;
+			btrfs_release_path(path);
+			goto again;
+		}
+	}
+
+	ret = btrfs_insert_empty_item(trans, root, path, &key,
+					sizeof(*extent));
+	if (ret) {
+		btrfs_abort_transaction(trans, root, ret);
+		goto out_free_path;
+	}
+
+	leaf = path->nodes[0];
+	item = btrfs_item_ptr(leaf, path->slots[0],
+				struct btrfs_file_extent_item);
+	btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
+	btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
+	btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
+	btrfs_set_file_extent_num_bytes(leaf, item, len);
+	btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
+	btrfs_set_file_extent_generation(leaf, item, trans->transid);
+	btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
+	btrfs_set_file_extent_compression(leaf, item, new->compress_type);
+	btrfs_set_file_extent_encryption(leaf, item, 0);
+	btrfs_set_file_extent_other_encoding(leaf, item, 0);
+
+	btrfs_mark_buffer_dirty(leaf);
+	inode_add_bytes(inode, len);
+
+	ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
+			new->disk_len, 0,
+			backref->root_id, backref->inum,
+			start, 0);
+	if (ret) {
+		btrfs_abort_transaction(trans, root, ret);
+		goto out_free_path;
+	}
+
+	ret = 1;
+out_free_path:
+	btrfs_release_path(path);
+	btrfs_end_transaction(trans, root);
+out_unlock:
+	unlock_extent_cached(&BTRFS_I(inode)->io_tree, backref->file_pos,
+			     backref->file_pos + backref->num_bytes,
+			     &cached, GFP_NOFS);
+	iput(inode);
+	return ret;
+}
+
+static void relink_file_extents(struct new_sa_defrag_extent *new)
+{
+	struct btrfs_path *path;
+	struct old_sa_defrag_extent *old, *tmp;
+	struct sa_defrag_extent_backref *backref;
+	struct sa_defrag_extent_backref *prev = NULL;
+	struct inode *inode;
+	struct btrfs_root *root;
+	struct rb_node *node;
+	struct extent_state *cached = NULL;
+	int ret;
+
+	inode = new->inode;
+	root = BTRFS_I(inode)->root;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return;
+
+	if (!record_extent_backrefs(path, new)) {
+		btrfs_free_path(path);
+		goto out;
+	}
+	btrfs_release_path(path);
+
+	lock_extent_bits(&BTRFS_I(inode)->io_tree, new->file_pos,
+			 new->file_pos + new->len, 0, &cached);
+
+	while (1) {
+		node = rb_first(&new->root);
+		if (!node)
+			break;
+		rb_erase(node, &new->root);
+
+		backref = rb_entry(node, struct sa_defrag_extent_backref, node);
+
+		ret = relink_extent_backref(path, prev, backref);
+		WARN_ON(ret < 0);
+
+		kfree(prev);
+
+		if (ret == 1)
+			prev = backref;
+		else
+			prev = NULL;
+		cond_resched();
+	}
+
+	kfree(prev);
+
+	unlock_extent_cached(&BTRFS_I(inode)->io_tree, new->file_pos,
+			     new->file_pos + new->len, &cached, GFP_NOFS);
+
+	btrfs_free_path(path);
+
+	list_for_each_entry_safe(old, tmp, &new->head, list) {
+		list_del(&old->list);
+		kfree(old);
+	}
+out:
+	atomic_dec(&root->fs_info->defrag_running);
+	wake_up(&root->fs_info->transaction_wait);
+
+	kfree(new);
+}
+
+static struct new_sa_defrag_extent *
+record_old_file_extents(struct inode *inode,
+			struct btrfs_ordered_extent *ordered)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	struct old_sa_defrag_extent *old, *tmp;
+	struct new_sa_defrag_extent *new;
+	int ret;
+
+	new = kmalloc(sizeof(*new), GFP_NOFS);
+	if (!new)
+		return NULL;
+
+	new->inode = inode;
+	new->file_pos = ordered->file_offset;
+	new->len = ordered->len;
+	new->bytenr = ordered->start;
+	new->disk_len = ordered->disk_len;
+	new->compress_type = ordered->compress_type;
+	new->root = RB_ROOT;
+	INIT_LIST_HEAD(&new->head);
+
+	path = btrfs_alloc_path();
+	if (!path)
+		goto out_kfree;
+
+	key.objectid = btrfs_ino(inode);
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = new->file_pos;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out_free_path;
+	if (ret > 0 && path->slots[0] > 0)
+		path->slots[0]--;
+
+	/* find out all the old extents for the file range */
+	while (1) {
+		struct btrfs_file_extent_item *extent;
+		struct extent_buffer *l;
+		int slot;
+		u64 num_bytes;
+		u64 offset;
+		u64 end;
+
+		l = path->nodes[0];
+		slot = path->slots[0];
+
+		if (slot >= btrfs_header_nritems(l)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				goto out_free_list;
+			else if (ret > 0)
+				break;
+			continue;
+		}
+
+		btrfs_item_key_to_cpu(l, &key, slot);
+
+		if (key.objectid != btrfs_ino(inode))
+			break;
+		if (key.type != BTRFS_EXTENT_DATA_KEY)
+			break;
+		if (key.offset >= new->file_pos + new->len)
+			break;
+
+		extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
+
+		num_bytes = btrfs_file_extent_num_bytes(l, extent);
+		if (key.offset + num_bytes < new->file_pos)
+			goto next;
+
+		old = kmalloc(sizeof(*old), GFP_NOFS);
+		if (!old)
+			goto out_free_list;
+
+		offset = max(new->file_pos, key.offset);
+		end = min(new->file_pos + new->len, key.offset + num_bytes);
+
+		old->bytenr = btrfs_file_extent_disk_bytenr(l, extent);
+		old->extent_offset = btrfs_file_extent_offset(l, extent);
+		old->offset = offset - key.offset;
+		old->len = end - offset;
+		old->new = new;
+		old->count = 0;
+		list_add_tail(&old->list, &new->head);
+next:
+		path->slots[0]++;
+		cond_resched();
+	}
+
+	btrfs_free_path(path);
+	atomic_inc(&root->fs_info->defrag_running);
+
+	return new;
+
+out_free_list:
+	list_for_each_entry_safe(old, tmp, &new->head, list) {
+		list_del(&old->list);
+		kfree(old);
+	}
+out_free_path:
+	btrfs_free_path(path);
+out_kfree:
+	kfree(new);
+	return NULL;
+}
+
 /*
  * helper function for btrfs_finish_ordered_io, this
  * just reads in some of the csum leaves to prime them into ram
@@ -1863,6 +2466,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	struct btrfs_trans_handle *trans = NULL;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct extent_state *cached_state = NULL;
+	struct new_sa_defrag_extent *new = NULL;
 	int compress_type = 0;
 	int ret;
 	bool nolock;
@@ -1899,6 +2503,15 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 			 ordered_extent->file_offset + ordered_extent->len - 1,
 			 0, &cached_state);
 
+	ret = test_range_bit(io_tree, ordered_extent->file_offset,
+			ordered_extent->file_offset + ordered_extent->len - 1,
+			EXTENT_DEFRAG, 1, cached_state);
+	if (ret && btrfs_root_last_snapshot(&root->root_item) >=
+						BTRFS_I(inode)->generation) {
+		/* the inode is shared */
+		new = record_old_file_extents(inode, ordered_extent);
+	}
+
 	if (nolock)
 		trans = btrfs_join_transaction_nolock(root);
 	else
@@ -1975,6 +2588,10 @@ out:
 	 */
 	btrfs_remove_ordered_extent(inode, ordered_extent);
 
+	/* for snapshot-aware defrag */
+	if (new)
+		relink_file_extents(new);
+
 	/* once for us */
 	btrfs_put_ordered_extent(ordered_extent);
 	/* once for the tree */
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-09-17  9:58 ` [PATCH 2/2 v3] Btrfs: " Liu Bo
@ 2012-09-17 10:04   ` Liu Bo
  2012-09-17 17:15   ` Josef Bacik
  2012-09-25 17:39   ` Mitch Harder
  2 siblings, 0 replies; 14+ messages in thread
From: Liu Bo @ 2012-09-17 10:04 UTC (permalink / raw)
  To: Josef Bacik(fio); +Cc: linux-btrfs

Please only push this one since the first one remains unchanged, I also posted it for
others to better review.

thanks,
liubo

On 09/17/2012 05:58 PM, Liu Bo wrote:
> This comes from one of btrfs's project ideas,
> As we defragment files, we break any sharing from other snapshots.
> The balancing code will preserve the sharing, and defrag needs to grow this
> as well.
> 
> Now we're able to fill the blank with this patch, in which we make full use of
> backref walking stuff.
> 
> Here is the basic idea,
> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
> o  at endio, after we finish updating fs tree, we use backref walking to find
>    all parents of the ranges and re-link them with the new COWed file layout by
>    adding corresponding backrefs.
> 
> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> ---
> Changes since v2:
>         - adopt better names for local structures.
>         - add proper reschedule phrase
>         - better error handling
>         - minor cleanups
> 	(Thanks, David)
> 
>  fs/btrfs/inode.c |  617 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 617 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 55857eb..8278aa2 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -54,6 +54,7 @@
>  #include "locking.h"
>  #include "free-space-cache.h"
>  #include "inode-map.h"
> +#include "backref.h"
>  
>  struct btrfs_iget_args {
>  	u64 ino;
> @@ -1846,6 +1847,608 @@ out:
>  	return ret;
>  }
>  
> +/* snapshot-aware defrag */
> +struct sa_defrag_extent_backref {
> +	struct rb_node node;
> +	struct old_sa_defrag_extent *old;
> +	u64 root_id;
> +	u64 inum;
> +	u64 file_pos;
> +	u64 extent_offset;
> +	u64 num_bytes;
> +	u64 generation;
> +};
> +
> +struct old_sa_defrag_extent {
> +	struct list_head list;
> +	struct new_sa_defrag_extent *new;
> +
> +	u64 extent_offset;
> +	u64 bytenr;
> +	u64 offset;
> +	u64 len;
> +	int count;
> +};
> +
> +struct new_sa_defrag_extent {
> +	struct rb_root root;
> +	struct list_head head;
> +	struct btrfs_path *path;
> +	struct inode *inode;
> +	u64 file_pos;
> +	u64 len;
> +	u64 bytenr;
> +	u64 disk_len;
> +	u8 compress_type;
> +};
> +
> +static int backref_comp(struct sa_defrag_extent_backref *b1,
> +			struct sa_defrag_extent_backref *b2)
> +{
> +	if (b1->root_id < b2->root_id)
> +		return -1;
> +	else if (b1->root_id > b2->root_id)
> +		return 1;
> +
> +	if (b1->inum < b2->inum)
> +		return -1;
> +	else if (b1->inum > b2->inum)
> +		return 1;
> +
> +	if (b1->file_pos < b2->file_pos)
> +		return -1;
> +	else if (b1->file_pos > b2->file_pos)
> +		return 1;
> +
> +	WARN_ON(1);
> +	return 0;
> +}
> +
> +static void backref_insert(struct rb_root *root,
> +			   struct sa_defrag_extent_backref *backref)
> +{
> +	struct rb_node **p = &root->rb_node;
> +	struct rb_node *parent = NULL;
> +	struct sa_defrag_extent_backref *entry;
> +	int ret;
> +
> +	while (*p) {
> +		parent = *p;
> +		entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
> +
> +		ret = backref_comp(backref, entry);
> +		if (ret < 0)
> +			p = &(*p)->rb_left;
> +		else if (ret > 0)
> +			p = &(*p)->rb_right;
> +		else
> +			BUG_ON(1);
> +	}
> +
> +	rb_link_node(&backref->node, parent, p);
> +	rb_insert_color(&backref->node, root);
> +}
> +
> +/*
> + * Note the backref might has changed, and in this case we just return 0.
> + */
> +static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
> +				       void *ctx)
> +{
> +	struct btrfs_file_extent_item *extent;
> +	struct btrfs_fs_info *fs_info;
> +	struct old_sa_defrag_extent *old = ctx;
> +	struct new_sa_defrag_extent *new = old->new;
> +	struct btrfs_path *path = new->path;
> +	struct btrfs_key key;
> +	struct btrfs_root *root;
> +	struct sa_defrag_extent_backref *backref;
> +	struct extent_buffer *leaf;
> +	struct inode *inode = new->inode;
> +	int slot;
> +	int ret;
> +	u64 extent_offset;
> +	u64 num_bytes;
> +
> +	if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
> +	    inum == btrfs_ino(inode))
> +		return 0;
> +
> +	key.objectid = root_id;
> +	key.type = BTRFS_ROOT_ITEM_KEY;
> +	key.offset = (u64)-1;
> +
> +	fs_info = BTRFS_I(inode)->root->fs_info;
> +	root = btrfs_read_fs_root_no_name(fs_info, &key);
> +	if (IS_ERR(root)) {
> +		if (PTR_ERR(root) == -ENOENT)
> +			return 0;
> +		WARN_ON(1);
> +		pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
> +			 inum, offset, root_id);
> +		return PTR_ERR(root);
> +	}
> +
> +	key.objectid = inum;
> +	key.type = BTRFS_EXTENT_DATA_KEY;
> +	if (offset > (u64)-1 << 32)
> +		key.offset = 0;
> +	else
> +		key.offset = offset;
> +
> +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +	if (ret < 0) {
> +		WARN_ON(1);
> +		return ret;
> +	}
> +
> +	while (1) {
> +		cond_resched();
> +
> +		leaf = path->nodes[0];
> +		slot = path->slots[0];
> +
> +		if (slot >= btrfs_header_nritems(leaf)) {
> +			ret = btrfs_next_leaf(root, path);
> +			if (ret < 0) {
> +				goto out;
> +			} else if (ret > 0) {
> +				ret = 0;
> +				goto out;
> +			}
> +			continue;
> +		}
> +
> +		path->slots[0]++;
> +
> +		btrfs_item_key_to_cpu(leaf, &key, slot);
> +
> +		if (key.objectid != inum || key.type != BTRFS_EXTENT_DATA_KEY)
> +			continue;
> +
> +		extent = btrfs_item_ptr(leaf, slot,
> +					struct btrfs_file_extent_item);
> +
> +		if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
> +			continue;
> +
> +		if (key.offset - btrfs_file_extent_offset(leaf, extent) !=
> +		    offset)
> +			continue;
> +
> +		break;
> +	}
> +
> +	extent_offset = btrfs_file_extent_offset(leaf, extent);
> +	num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
> +
> +	if (extent_offset >= old->extent_offset + old->offset + old->len ||
> +	    extent_offset + num_bytes < old->extent_offset + old->offset)
> +		goto out;
> +
> +	backref = kmalloc(sizeof(*backref), GFP_NOFS);
> +	if (!backref) {
> +		ret = -ENOENT;
> +		goto out;
> +	}
> +
> +	backref->root_id = root_id;
> +	backref->inum = inum;
> +	backref->file_pos = offset + extent_offset;
> +	backref->num_bytes = num_bytes;
> +	backref->extent_offset = extent_offset;
> +	backref->generation = btrfs_file_extent_generation(leaf, extent);
> +	backref->old = old;
> +	backref_insert(&new->root, backref);
> +	old->count++;
> +out:
> +	btrfs_release_path(path);
> +	WARN_ON(ret);
> +	return ret;
> +}
> +
> +static noinline bool record_extent_backrefs(struct btrfs_path *path,
> +				   struct new_sa_defrag_extent *new)
> +{
> +	struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
> +	struct old_sa_defrag_extent *old, *tmp;
> +	int ret;
> +
> +	new->path = path;
> +
> +	list_for_each_entry_safe(old, tmp, &new->head, list) {
> +		ret = iterate_inodes_from_logical(old->bytenr, fs_info,
> +						  path, record_one_backref,
> +						  old);
> +		WARN_ON(ret < 0);
> +
> +		/* no backref to be processed for this extent */
> +		if (!old->count) {
> +			list_del(&old->list);
> +			kfree(old);
> +		}
> +	}
> +
> +	if (list_empty(&new->head))
> +		return false;
> +
> +	return true;
> +}
> +
> +/*
> + * Note the backref might has changed, and in this case we just return 0.
> + */
> +static noinline int relink_extent_backref(struct btrfs_path *path,
> +				 struct sa_defrag_extent_backref *prev,
> +				 struct sa_defrag_extent_backref *backref)
> +{
> +	struct btrfs_file_extent_item *extent;
> +	struct btrfs_file_extent_item *item;
> +	struct btrfs_ordered_extent *ordered;
> +	struct btrfs_trans_handle *trans;
> +	struct btrfs_fs_info *fs_info;
> +	struct btrfs_root *root;
> +	struct btrfs_key key;
> +	struct extent_buffer *leaf;
> +	struct old_sa_defrag_extent *old = backref->old;
> +	struct new_sa_defrag_extent *new = old->new;
> +	struct inode *src_inode = new->inode;
> +	struct inode *inode;
> +	struct extent_state *cached = NULL;
> +	int ret = 0;
> +	u64 hint_byte;
> +	u64 start;
> +	u64 len;
> +	bool merge = false;
> +
> +	if (prev && prev->root_id == backref->root_id &&
> +	    prev->inum == backref->inum &&
> +	    prev->extent_offset == backref->extent_offset &&
> +	    prev->file_pos + prev->num_bytes == backref->file_pos)
> +		merge = true;
> +
> +	key.objectid = backref->root_id;
> +	key.type = BTRFS_ROOT_ITEM_KEY;
> +	key.offset = (u64)-1;
> +
> +	fs_info = BTRFS_I(src_inode)->root->fs_info;
> +	root = btrfs_read_fs_root_no_name(fs_info, &key);
> +	if (IS_ERR(root)) {
> +		if (PTR_ERR(root) == -ENOENT)
> +			return 0;
> +		return PTR_ERR(root);
> +	}
> +
> +	key.objectid = backref->inum;
> +	key.type = BTRFS_INODE_ITEM_KEY;
> +	key.offset = 0;
> +
> +	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
> +	if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
> +		if (inode && !IS_ERR(inode))
> +			iput(inode);
> +		return 0;
> +	}
> +
> +	lock_extent_bits(&BTRFS_I(inode)->io_tree, backref->file_pos,
> +			 backref->file_pos + backref->num_bytes, 0, &cached);
> +
> +	ordered = btrfs_lookup_first_ordered_extent(inode,
> +						    backref->file_pos +
> +						    backref->num_bytes);
> +	if (ordered) {
> +		btrfs_put_ordered_extent(ordered);
> +		goto out_unlock;
> +	}
> +
> +	/*
> +	 * 1 for drop_extents
> +	 * 1 for merge clause's search_slot
> +	 * 1 for insert items
> +	 */
> +	trans = btrfs_start_transaction(root, 3);
> +	if (IS_ERR(trans)) {
> +		ret = PTR_ERR(trans);
> +		goto out_unlock;
> +	}
> +
> +	key.objectid = backref->inum;
> +	key.type = BTRFS_EXTENT_DATA_KEY;
> +	key.offset = backref->file_pos;
> +
> +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +	if (ret < 0) {
> +		goto out_free_path;
> +	} else if (ret > 0) {
> +		ret = 0;
> +		goto out_free_path;
> +	}
> +
> +	extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
> +				struct btrfs_file_extent_item);
> +
> +	if (btrfs_file_extent_generation(path->nodes[0], extent) !=
> +	    backref->generation)
> +		goto out_free_path;
> +
> +	btrfs_release_path(path);
> +
> +	start = backref->file_pos;
> +	if (backref->extent_offset < old->extent_offset + old->offset)
> +		start += old->extent_offset + old->offset -
> +			 backref->extent_offset;
> +
> +	len = min(backref->extent_offset + backref->num_bytes,
> +		  old->extent_offset + old->offset + old->len);
> +	len -= max(backref->extent_offset, old->extent_offset + old->offset);
> +
> +	ret = btrfs_drop_extents(trans, inode, start,
> +				 start + len, &hint_byte, 1);
> +	if (ret)
> +		goto out_free_path;
> +again:
> +	key.objectid = btrfs_ino(inode);
> +	key.type = BTRFS_EXTENT_DATA_KEY;
> +	key.offset = start;
> +
> +	if (merge) {
> +		struct btrfs_file_extent_item *fi;
> +		u64 extent_len;
> +		struct btrfs_key found_key;
> +
> +		ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
> +		if (ret < 0)
> +			goto out_free_path;
> +
> +		path->slots[0]--;
> +		leaf = path->nodes[0];
> +		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
> +
> +		fi = btrfs_item_ptr(leaf, path->slots[0],
> +				    struct btrfs_file_extent_item);
> +		extent_len = btrfs_file_extent_num_bytes(leaf, fi);
> +
> +		if (btrfs_file_extent_disk_bytenr(leaf, fi) == new->bytenr &&
> +		    btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_REG &&
> +		    !btrfs_file_extent_compression(leaf, fi) &&
> +		    !btrfs_file_extent_encryption(leaf, fi) &&
> +		    !btrfs_file_extent_other_encoding(leaf, fi) &&
> +		    extent_len + found_key.offset == start) {
> +			btrfs_set_file_extent_num_bytes(leaf, fi,
> +							extent_len + len);
> +			btrfs_mark_buffer_dirty(leaf);
> +			inode_add_bytes(inode, len);
> +
> +			ret = 1;
> +			goto out_free_path;
> +		} else {
> +			merge = false;
> +			btrfs_release_path(path);
> +			goto again;
> +		}
> +	}
> +
> +	ret = btrfs_insert_empty_item(trans, root, path, &key,
> +					sizeof(*extent));
> +	if (ret) {
> +		btrfs_abort_transaction(trans, root, ret);
> +		goto out_free_path;
> +	}
> +
> +	leaf = path->nodes[0];
> +	item = btrfs_item_ptr(leaf, path->slots[0],
> +				struct btrfs_file_extent_item);
> +	btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
> +	btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
> +	btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
> +	btrfs_set_file_extent_num_bytes(leaf, item, len);
> +	btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
> +	btrfs_set_file_extent_generation(leaf, item, trans->transid);
> +	btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
> +	btrfs_set_file_extent_compression(leaf, item, new->compress_type);
> +	btrfs_set_file_extent_encryption(leaf, item, 0);
> +	btrfs_set_file_extent_other_encoding(leaf, item, 0);
> +
> +	btrfs_mark_buffer_dirty(leaf);
> +	inode_add_bytes(inode, len);
> +
> +	ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
> +			new->disk_len, 0,
> +			backref->root_id, backref->inum,
> +			start, 0);
> +	if (ret) {
> +		btrfs_abort_transaction(trans, root, ret);
> +		goto out_free_path;
> +	}
> +
> +	ret = 1;
> +out_free_path:
> +	btrfs_release_path(path);
> +	btrfs_end_transaction(trans, root);
> +out_unlock:
> +	unlock_extent_cached(&BTRFS_I(inode)->io_tree, backref->file_pos,
> +			     backref->file_pos + backref->num_bytes,
> +			     &cached, GFP_NOFS);
> +	iput(inode);
> +	return ret;
> +}
> +
> +static void relink_file_extents(struct new_sa_defrag_extent *new)
> +{
> +	struct btrfs_path *path;
> +	struct old_sa_defrag_extent *old, *tmp;
> +	struct sa_defrag_extent_backref *backref;
> +	struct sa_defrag_extent_backref *prev = NULL;
> +	struct inode *inode;
> +	struct btrfs_root *root;
> +	struct rb_node *node;
> +	struct extent_state *cached = NULL;
> +	int ret;
> +
> +	inode = new->inode;
> +	root = BTRFS_I(inode)->root;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return;
> +
> +	if (!record_extent_backrefs(path, new)) {
> +		btrfs_free_path(path);
> +		goto out;
> +	}
> +	btrfs_release_path(path);
> +
> +	lock_extent_bits(&BTRFS_I(inode)->io_tree, new->file_pos,
> +			 new->file_pos + new->len, 0, &cached);
> +
> +	while (1) {
> +		node = rb_first(&new->root);
> +		if (!node)
> +			break;
> +		rb_erase(node, &new->root);
> +
> +		backref = rb_entry(node, struct sa_defrag_extent_backref, node);
> +
> +		ret = relink_extent_backref(path, prev, backref);
> +		WARN_ON(ret < 0);
> +
> +		kfree(prev);
> +
> +		if (ret == 1)
> +			prev = backref;
> +		else
> +			prev = NULL;
> +		cond_resched();
> +	}
> +
> +	kfree(prev);
> +
> +	unlock_extent_cached(&BTRFS_I(inode)->io_tree, new->file_pos,
> +			     new->file_pos + new->len, &cached, GFP_NOFS);
> +
> +	btrfs_free_path(path);
> +
> +	list_for_each_entry_safe(old, tmp, &new->head, list) {
> +		list_del(&old->list);
> +		kfree(old);
> +	}
> +out:
> +	atomic_dec(&root->fs_info->defrag_running);
> +	wake_up(&root->fs_info->transaction_wait);
> +
> +	kfree(new);
> +}
> +
> +static struct new_sa_defrag_extent *
> +record_old_file_extents(struct inode *inode,
> +			struct btrfs_ordered_extent *ordered)
> +{
> +	struct btrfs_root *root = BTRFS_I(inode)->root;
> +	struct btrfs_path *path;
> +	struct btrfs_key key;
> +	struct old_sa_defrag_extent *old, *tmp;
> +	struct new_sa_defrag_extent *new;
> +	int ret;
> +
> +	new = kmalloc(sizeof(*new), GFP_NOFS);
> +	if (!new)
> +		return NULL;
> +
> +	new->inode = inode;
> +	new->file_pos = ordered->file_offset;
> +	new->len = ordered->len;
> +	new->bytenr = ordered->start;
> +	new->disk_len = ordered->disk_len;
> +	new->compress_type = ordered->compress_type;
> +	new->root = RB_ROOT;
> +	INIT_LIST_HEAD(&new->head);
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		goto out_kfree;
> +
> +	key.objectid = btrfs_ino(inode);
> +	key.type = BTRFS_EXTENT_DATA_KEY;
> +	key.offset = new->file_pos;
> +
> +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +	if (ret < 0)
> +		goto out_free_path;
> +	if (ret > 0 && path->slots[0] > 0)
> +		path->slots[0]--;
> +
> +	/* find out all the old extents for the file range */
> +	while (1) {
> +		struct btrfs_file_extent_item *extent;
> +		struct extent_buffer *l;
> +		int slot;
> +		u64 num_bytes;
> +		u64 offset;
> +		u64 end;
> +
> +		l = path->nodes[0];
> +		slot = path->slots[0];
> +
> +		if (slot >= btrfs_header_nritems(l)) {
> +			ret = btrfs_next_leaf(root, path);
> +			if (ret < 0)
> +				goto out_free_list;
> +			else if (ret > 0)
> +				break;
> +			continue;
> +		}
> +
> +		btrfs_item_key_to_cpu(l, &key, slot);
> +
> +		if (key.objectid != btrfs_ino(inode))
> +			break;
> +		if (key.type != BTRFS_EXTENT_DATA_KEY)
> +			break;
> +		if (key.offset >= new->file_pos + new->len)
> +			break;
> +
> +		extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
> +
> +		num_bytes = btrfs_file_extent_num_bytes(l, extent);
> +		if (key.offset + num_bytes < new->file_pos)
> +			goto next;
> +
> +		old = kmalloc(sizeof(*old), GFP_NOFS);
> +		if (!old)
> +			goto out_free_list;
> +
> +		offset = max(new->file_pos, key.offset);
> +		end = min(new->file_pos + new->len, key.offset + num_bytes);
> +
> +		old->bytenr = btrfs_file_extent_disk_bytenr(l, extent);
> +		old->extent_offset = btrfs_file_extent_offset(l, extent);
> +		old->offset = offset - key.offset;
> +		old->len = end - offset;
> +		old->new = new;
> +		old->count = 0;
> +		list_add_tail(&old->list, &new->head);
> +next:
> +		path->slots[0]++;
> +		cond_resched();
> +	}
> +
> +	btrfs_free_path(path);
> +	atomic_inc(&root->fs_info->defrag_running);
> +
> +	return new;
> +
> +out_free_list:
> +	list_for_each_entry_safe(old, tmp, &new->head, list) {
> +		list_del(&old->list);
> +		kfree(old);
> +	}
> +out_free_path:
> +	btrfs_free_path(path);
> +out_kfree:
> +	kfree(new);
> +	return NULL;
> +}
> +
>  /*
>   * helper function for btrfs_finish_ordered_io, this
>   * just reads in some of the csum leaves to prime them into ram
> @@ -1863,6 +2466,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
>  	struct btrfs_trans_handle *trans = NULL;
>  	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
>  	struct extent_state *cached_state = NULL;
> +	struct new_sa_defrag_extent *new = NULL;
>  	int compress_type = 0;
>  	int ret;
>  	bool nolock;
> @@ -1899,6 +2503,15 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
>  			 ordered_extent->file_offset + ordered_extent->len - 1,
>  			 0, &cached_state);
>  
> +	ret = test_range_bit(io_tree, ordered_extent->file_offset,
> +			ordered_extent->file_offset + ordered_extent->len - 1,
> +			EXTENT_DEFRAG, 1, cached_state);
> +	if (ret && btrfs_root_last_snapshot(&root->root_item) >=
> +						BTRFS_I(inode)->generation) {
> +		/* the inode is shared */
> +		new = record_old_file_extents(inode, ordered_extent);
> +	}
> +
>  	if (nolock)
>  		trans = btrfs_join_transaction_nolock(root);
>  	else
> @@ -1975,6 +2588,10 @@ out:
>  	 */
>  	btrfs_remove_ordered_extent(inode, ordered_extent);
>  
> +	/* for snapshot-aware defrag */
> +	if (new)
> +		relink_file_extents(new);
> +
>  	/* once for us */
>  	btrfs_put_ordered_extent(ordered_extent);
>  	/* once for the tree */
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-09-17  9:58 ` [PATCH 2/2 v3] Btrfs: " Liu Bo
  2012-09-17 10:04   ` Liu Bo
@ 2012-09-17 17:15   ` Josef Bacik
  2012-09-18  0:23     ` Liu Bo
  2012-09-25 17:39   ` Mitch Harder
  2 siblings, 1 reply; 14+ messages in thread
From: Josef Bacik @ 2012-09-17 17:15 UTC (permalink / raw)
  To: Liu Bo; +Cc: linux-btrfs@vger.kernel.org, dave@jikos.cz

On Mon, Sep 17, 2012 at 03:58:56AM -0600, Liu Bo wrote:
> This comes from one of btrfs's project ideas,
> As we defragment files, we break any sharing from other snapshots.
> The balancing code will preserve the sharing, and defrag needs to grow this
> as well.
> 
> Now we're able to fill the blank with this patch, in which we make full use of
> backref walking stuff.
> 
> Here is the basic idea,
> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
> o  at endio, after we finish updating fs tree, we use backref walking to find
>    all parents of the ranges and re-link them with the new COWed file layout by
>    adding corresponding backrefs.
> 
> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>

I was trying to fixup the rejects on this patch when I noticed there were no
tabs, only spaces.  Thats not going to work and now I have to go back and make
sure none of your other patches did this.  Thanks,

Josef

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-09-17 17:15   ` Josef Bacik
@ 2012-09-18  0:23     ` Liu Bo
  2012-09-18 13:10       ` Josef Bacik
  0 siblings, 1 reply; 14+ messages in thread
From: Liu Bo @ 2012-09-18  0:23 UTC (permalink / raw)
  To: Josef Bacik; +Cc: linux-btrfs@vger.kernel.org, dave@jikos.cz

[-- Attachment #1: Type: text/plain, Size: 1635 bytes --]

On 09/18/2012 01:15 AM, Josef Bacik wrote:
> On Mon, Sep 17, 2012 at 03:58:56AM -0600, Liu Bo wrote:
>> This comes from one of btrfs's project ideas,
>> As we defragment files, we break any sharing from other snapshots.
>> The balancing code will preserve the sharing, and defrag needs to grow this
>> as well.
>>
>> Now we're able to fill the blank with this patch, in which we make full use of
>> backref walking stuff.
>>
>> Here is the basic idea,
>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
>> o  at endio, after we finish updating fs tree, we use backref walking to find
>>    all parents of the ranges and re-link them with the new COWed file layout by
>>    adding corresponding backrefs.
>>
>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> 
> I was trying to fixup the rejects on this patch when I noticed there were no
> tabs, only spaces.  Thats not going to work and now I have to go back and make
> sure none of your other patches did this.  Thanks,
> 
> Josef
> 

I'm quite confused about this, my local part and the email part I received show it is well formed.

There is no spaces, and every time before I send them out, I use checkpatch.pl and make sure
checkpatch.pl does not complain about anything:

$ ./scripts/checkpatch.pl ~/Desktop/0002-Btrfs-snapshot-aware-defrag.patch 
total: 0 errors, 0 warnings, 647 lines checked

/home/liubo/Desktop/0002-Btrfs-snapshot-aware-defrag.patch has no obvious style problems and is ready for submission.


Anyway, I don't know where goes wrong but I'm attaching it for you.

thanks,
liubo

[-- Attachment #2: 0002-Btrfs-snapshot-aware-defrag.patch --]
[-- Type: text/x-patch, Size: 17704 bytes --]

>From bd2c940675d1c3fba936c407f0fda1dd67a487cf Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Thu, 9 Aug 2012 12:04:33 +0800
Subject: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag

This comes from one of btrfs's project ideas,
As we defragment files, we break any sharing from other snapshots.
The balancing code will preserve the sharing, and defrag needs to grow this
as well.

Now we're able to fill the blank with this patch, in which we make full use of
backref walking stuff.

Here is the basic idea,
o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
o  at endio, after we finish updating fs tree, we use backref walking to find
   all parents of the ranges and re-link them with the new COWed file layout by
   adding corresponding backrefs.

Originally patch by Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
Changes since v2:
        - adopt better names for local structures.
        - add proper reschedule phrase
        - better error handling
        - minor cleanups
	(Thanks, David)

 fs/btrfs/inode.c |  617 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 617 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 55857eb..8278aa2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -54,6 +54,7 @@
 #include "locking.h"
 #include "free-space-cache.h"
 #include "inode-map.h"
+#include "backref.h"
 
 struct btrfs_iget_args {
 	u64 ino;
@@ -1846,6 +1847,608 @@ out:
 	return ret;
 }
 
+/* snapshot-aware defrag */
+struct sa_defrag_extent_backref {
+	struct rb_node node;
+	struct old_sa_defrag_extent *old;
+	u64 root_id;
+	u64 inum;
+	u64 file_pos;
+	u64 extent_offset;
+	u64 num_bytes;
+	u64 generation;
+};
+
+struct old_sa_defrag_extent {
+	struct list_head list;
+	struct new_sa_defrag_extent *new;
+
+	u64 extent_offset;
+	u64 bytenr;
+	u64 offset;
+	u64 len;
+	int count;
+};
+
+struct new_sa_defrag_extent {
+	struct rb_root root;
+	struct list_head head;
+	struct btrfs_path *path;
+	struct inode *inode;
+	u64 file_pos;
+	u64 len;
+	u64 bytenr;
+	u64 disk_len;
+	u8 compress_type;
+};
+
+static int backref_comp(struct sa_defrag_extent_backref *b1,
+			struct sa_defrag_extent_backref *b2)
+{
+	if (b1->root_id < b2->root_id)
+		return -1;
+	else if (b1->root_id > b2->root_id)
+		return 1;
+
+	if (b1->inum < b2->inum)
+		return -1;
+	else if (b1->inum > b2->inum)
+		return 1;
+
+	if (b1->file_pos < b2->file_pos)
+		return -1;
+	else if (b1->file_pos > b2->file_pos)
+		return 1;
+
+	WARN_ON(1);
+	return 0;
+}
+
+static void backref_insert(struct rb_root *root,
+			   struct sa_defrag_extent_backref *backref)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct sa_defrag_extent_backref *entry;
+	int ret;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
+
+		ret = backref_comp(backref, entry);
+		if (ret < 0)
+			p = &(*p)->rb_left;
+		else if (ret > 0)
+			p = &(*p)->rb_right;
+		else
+			BUG_ON(1);
+	}
+
+	rb_link_node(&backref->node, parent, p);
+	rb_insert_color(&backref->node, root);
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
+				       void *ctx)
+{
+	struct btrfs_file_extent_item *extent;
+	struct btrfs_fs_info *fs_info;
+	struct old_sa_defrag_extent *old = ctx;
+	struct new_sa_defrag_extent *new = old->new;
+	struct btrfs_path *path = new->path;
+	struct btrfs_key key;
+	struct btrfs_root *root;
+	struct sa_defrag_extent_backref *backref;
+	struct extent_buffer *leaf;
+	struct inode *inode = new->inode;
+	int slot;
+	int ret;
+	u64 extent_offset;
+	u64 num_bytes;
+
+	if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
+	    inum == btrfs_ino(inode))
+		return 0;
+
+	key.objectid = root_id;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+
+	fs_info = BTRFS_I(inode)->root->fs_info;
+	root = btrfs_read_fs_root_no_name(fs_info, &key);
+	if (IS_ERR(root)) {
+		if (PTR_ERR(root) == -ENOENT)
+			return 0;
+		WARN_ON(1);
+		pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
+			 inum, offset, root_id);
+		return PTR_ERR(root);
+	}
+
+	key.objectid = inum;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	if (offset > (u64)-1 << 32)
+		key.offset = 0;
+	else
+		key.offset = offset;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		WARN_ON(1);
+		return ret;
+	}
+
+	while (1) {
+		cond_resched();
+
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0) {
+				goto out;
+			} else if (ret > 0) {
+				ret = 0;
+				goto out;
+			}
+			continue;
+		}
+
+		path->slots[0]++;
+
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+
+		if (key.objectid != inum || key.type != BTRFS_EXTENT_DATA_KEY)
+			continue;
+
+		extent = btrfs_item_ptr(leaf, slot,
+					struct btrfs_file_extent_item);
+
+		if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
+			continue;
+
+		if (key.offset - btrfs_file_extent_offset(leaf, extent) !=
+		    offset)
+			continue;
+
+		break;
+	}
+
+	extent_offset = btrfs_file_extent_offset(leaf, extent);
+	num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
+
+	if (extent_offset >= old->extent_offset + old->offset + old->len ||
+	    extent_offset + num_bytes < old->extent_offset + old->offset)
+		goto out;
+
+	backref = kmalloc(sizeof(*backref), GFP_NOFS);
+	if (!backref) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	backref->root_id = root_id;
+	backref->inum = inum;
+	backref->file_pos = offset + extent_offset;
+	backref->num_bytes = num_bytes;
+	backref->extent_offset = extent_offset;
+	backref->generation = btrfs_file_extent_generation(leaf, extent);
+	backref->old = old;
+	backref_insert(&new->root, backref);
+	old->count++;
+out:
+	btrfs_release_path(path);
+	WARN_ON(ret);
+	return ret;
+}
+
+static noinline bool record_extent_backrefs(struct btrfs_path *path,
+				   struct new_sa_defrag_extent *new)
+{
+	struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
+	struct old_sa_defrag_extent *old, *tmp;
+	int ret;
+
+	new->path = path;
+
+	list_for_each_entry_safe(old, tmp, &new->head, list) {
+		ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+						  path, record_one_backref,
+						  old);
+		WARN_ON(ret < 0);
+
+		/* no backref to be processed for this extent */
+		if (!old->count) {
+			list_del(&old->list);
+			kfree(old);
+		}
+	}
+
+	if (list_empty(&new->head))
+		return false;
+
+	return true;
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int relink_extent_backref(struct btrfs_path *path,
+				 struct sa_defrag_extent_backref *prev,
+				 struct sa_defrag_extent_backref *backref)
+{
+	struct btrfs_file_extent_item *extent;
+	struct btrfs_file_extent_item *item;
+	struct btrfs_ordered_extent *ordered;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_root *root;
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct old_sa_defrag_extent *old = backref->old;
+	struct new_sa_defrag_extent *new = old->new;
+	struct inode *src_inode = new->inode;
+	struct inode *inode;
+	struct extent_state *cached = NULL;
+	int ret = 0;
+	u64 hint_byte;
+	u64 start;
+	u64 len;
+	bool merge = false;
+
+	if (prev && prev->root_id == backref->root_id &&
+	    prev->inum == backref->inum &&
+	    prev->extent_offset == backref->extent_offset &&
+	    prev->file_pos + prev->num_bytes == backref->file_pos)
+		merge = true;
+
+	key.objectid = backref->root_id;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+
+	fs_info = BTRFS_I(src_inode)->root->fs_info;
+	root = btrfs_read_fs_root_no_name(fs_info, &key);
+	if (IS_ERR(root)) {
+		if (PTR_ERR(root) == -ENOENT)
+			return 0;
+		return PTR_ERR(root);
+	}
+
+	key.objectid = backref->inum;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+
+	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+	if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
+		if (inode && !IS_ERR(inode))
+			iput(inode);
+		return 0;
+	}
+
+	lock_extent_bits(&BTRFS_I(inode)->io_tree, backref->file_pos,
+			 backref->file_pos + backref->num_bytes, 0, &cached);
+
+	ordered = btrfs_lookup_first_ordered_extent(inode,
+						    backref->file_pos +
+						    backref->num_bytes);
+	if (ordered) {
+		btrfs_put_ordered_extent(ordered);
+		goto out_unlock;
+	}
+
+	/*
+	 * 1 for drop_extents
+	 * 1 for merge clause's search_slot
+	 * 1 for insert items
+	 */
+	trans = btrfs_start_transaction(root, 3);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		goto out_unlock;
+	}
+
+	key.objectid = backref->inum;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = backref->file_pos;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		goto out_free_path;
+	} else if (ret > 0) {
+		ret = 0;
+		goto out_free_path;
+	}
+
+	extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
+				struct btrfs_file_extent_item);
+
+	if (btrfs_file_extent_generation(path->nodes[0], extent) !=
+	    backref->generation)
+		goto out_free_path;
+
+	btrfs_release_path(path);
+
+	start = backref->file_pos;
+	if (backref->extent_offset < old->extent_offset + old->offset)
+		start += old->extent_offset + old->offset -
+			 backref->extent_offset;
+
+	len = min(backref->extent_offset + backref->num_bytes,
+		  old->extent_offset + old->offset + old->len);
+	len -= max(backref->extent_offset, old->extent_offset + old->offset);
+
+	ret = btrfs_drop_extents(trans, inode, start,
+				 start + len, &hint_byte, 1);
+	if (ret)
+		goto out_free_path;
+again:
+	key.objectid = btrfs_ino(inode);
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = start;
+
+	if (merge) {
+		struct btrfs_file_extent_item *fi;
+		u64 extent_len;
+		struct btrfs_key found_key;
+
+		ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+		if (ret < 0)
+			goto out_free_path;
+
+		path->slots[0]--;
+		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+		fi = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_file_extent_item);
+		extent_len = btrfs_file_extent_num_bytes(leaf, fi);
+
+		if (btrfs_file_extent_disk_bytenr(leaf, fi) == new->bytenr &&
+		    btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_REG &&
+		    !btrfs_file_extent_compression(leaf, fi) &&
+		    !btrfs_file_extent_encryption(leaf, fi) &&
+		    !btrfs_file_extent_other_encoding(leaf, fi) &&
+		    extent_len + found_key.offset == start) {
+			btrfs_set_file_extent_num_bytes(leaf, fi,
+							extent_len + len);
+			btrfs_mark_buffer_dirty(leaf);
+			inode_add_bytes(inode, len);
+
+			ret = 1;
+			goto out_free_path;
+		} else {
+			merge = false;
+			btrfs_release_path(path);
+			goto again;
+		}
+	}
+
+	ret = btrfs_insert_empty_item(trans, root, path, &key,
+					sizeof(*extent));
+	if (ret) {
+		btrfs_abort_transaction(trans, root, ret);
+		goto out_free_path;
+	}
+
+	leaf = path->nodes[0];
+	item = btrfs_item_ptr(leaf, path->slots[0],
+				struct btrfs_file_extent_item);
+	btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
+	btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
+	btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
+	btrfs_set_file_extent_num_bytes(leaf, item, len);
+	btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
+	btrfs_set_file_extent_generation(leaf, item, trans->transid);
+	btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
+	btrfs_set_file_extent_compression(leaf, item, new->compress_type);
+	btrfs_set_file_extent_encryption(leaf, item, 0);
+	btrfs_set_file_extent_other_encoding(leaf, item, 0);
+
+	btrfs_mark_buffer_dirty(leaf);
+	inode_add_bytes(inode, len);
+
+	ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
+			new->disk_len, 0,
+			backref->root_id, backref->inum,
+			start, 0);
+	if (ret) {
+		btrfs_abort_transaction(trans, root, ret);
+		goto out_free_path;
+	}
+
+	ret = 1;
+out_free_path:
+	btrfs_release_path(path);
+	btrfs_end_transaction(trans, root);
+out_unlock:
+	unlock_extent_cached(&BTRFS_I(inode)->io_tree, backref->file_pos,
+			     backref->file_pos + backref->num_bytes,
+			     &cached, GFP_NOFS);
+	iput(inode);
+	return ret;
+}
+
+static void relink_file_extents(struct new_sa_defrag_extent *new)
+{
+	struct btrfs_path *path;
+	struct old_sa_defrag_extent *old, *tmp;
+	struct sa_defrag_extent_backref *backref;
+	struct sa_defrag_extent_backref *prev = NULL;
+	struct inode *inode;
+	struct btrfs_root *root;
+	struct rb_node *node;
+	struct extent_state *cached = NULL;
+	int ret;
+
+	inode = new->inode;
+	root = BTRFS_I(inode)->root;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return;
+
+	if (!record_extent_backrefs(path, new)) {
+		btrfs_free_path(path);
+		goto out;
+	}
+	btrfs_release_path(path);
+
+	lock_extent_bits(&BTRFS_I(inode)->io_tree, new->file_pos,
+			 new->file_pos + new->len, 0, &cached);
+
+	while (1) {
+		node = rb_first(&new->root);
+		if (!node)
+			break;
+		rb_erase(node, &new->root);
+
+		backref = rb_entry(node, struct sa_defrag_extent_backref, node);
+
+		ret = relink_extent_backref(path, prev, backref);
+		WARN_ON(ret < 0);
+
+		kfree(prev);
+
+		if (ret == 1)
+			prev = backref;
+		else
+			prev = NULL;
+		cond_resched();
+	}
+
+	kfree(prev);
+
+	unlock_extent_cached(&BTRFS_I(inode)->io_tree, new->file_pos,
+			     new->file_pos + new->len, &cached, GFP_NOFS);
+
+	btrfs_free_path(path);
+
+	list_for_each_entry_safe(old, tmp, &new->head, list) {
+		list_del(&old->list);
+		kfree(old);
+	}
+out:
+	atomic_dec(&root->fs_info->defrag_running);
+	wake_up(&root->fs_info->transaction_wait);
+
+	kfree(new);
+}
+
+static struct new_sa_defrag_extent *
+record_old_file_extents(struct inode *inode,
+			struct btrfs_ordered_extent *ordered)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	struct old_sa_defrag_extent *old, *tmp;
+	struct new_sa_defrag_extent *new;
+	int ret;
+
+	new = kmalloc(sizeof(*new), GFP_NOFS);
+	if (!new)
+		return NULL;
+
+	new->inode = inode;
+	new->file_pos = ordered->file_offset;
+	new->len = ordered->len;
+	new->bytenr = ordered->start;
+	new->disk_len = ordered->disk_len;
+	new->compress_type = ordered->compress_type;
+	new->root = RB_ROOT;
+	INIT_LIST_HEAD(&new->head);
+
+	path = btrfs_alloc_path();
+	if (!path)
+		goto out_kfree;
+
+	key.objectid = btrfs_ino(inode);
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = new->file_pos;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out_free_path;
+	if (ret > 0 && path->slots[0] > 0)
+		path->slots[0]--;
+
+	/* find out all the old extents for the file range */
+	while (1) {
+		struct btrfs_file_extent_item *extent;
+		struct extent_buffer *l;
+		int slot;
+		u64 num_bytes;
+		u64 offset;
+		u64 end;
+
+		l = path->nodes[0];
+		slot = path->slots[0];
+
+		if (slot >= btrfs_header_nritems(l)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				goto out_free_list;
+			else if (ret > 0)
+				break;
+			continue;
+		}
+
+		btrfs_item_key_to_cpu(l, &key, slot);
+
+		if (key.objectid != btrfs_ino(inode))
+			break;
+		if (key.type != BTRFS_EXTENT_DATA_KEY)
+			break;
+		if (key.offset >= new->file_pos + new->len)
+			break;
+
+		extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
+
+		num_bytes = btrfs_file_extent_num_bytes(l, extent);
+		if (key.offset + num_bytes < new->file_pos)
+			goto next;
+
+		old = kmalloc(sizeof(*old), GFP_NOFS);
+		if (!old)
+			goto out_free_list;
+
+		offset = max(new->file_pos, key.offset);
+		end = min(new->file_pos + new->len, key.offset + num_bytes);
+
+		old->bytenr = btrfs_file_extent_disk_bytenr(l, extent);
+		old->extent_offset = btrfs_file_extent_offset(l, extent);
+		old->offset = offset - key.offset;
+		old->len = end - offset;
+		old->new = new;
+		old->count = 0;
+		list_add_tail(&old->list, &new->head);
+next:
+		path->slots[0]++;
+		cond_resched();
+	}
+
+	btrfs_free_path(path);
+	atomic_inc(&root->fs_info->defrag_running);
+
+	return new;
+
+out_free_list:
+	list_for_each_entry_safe(old, tmp, &new->head, list) {
+		list_del(&old->list);
+		kfree(old);
+	}
+out_free_path:
+	btrfs_free_path(path);
+out_kfree:
+	kfree(new);
+	return NULL;
+}
+
 /*
  * helper function for btrfs_finish_ordered_io, this
  * just reads in some of the csum leaves to prime them into ram
@@ -1863,6 +2466,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	struct btrfs_trans_handle *trans = NULL;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct extent_state *cached_state = NULL;
+	struct new_sa_defrag_extent *new = NULL;
 	int compress_type = 0;
 	int ret;
 	bool nolock;
@@ -1899,6 +2503,15 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 			 ordered_extent->file_offset + ordered_extent->len - 1,
 			 0, &cached_state);
 
+	ret = test_range_bit(io_tree, ordered_extent->file_offset,
+			ordered_extent->file_offset + ordered_extent->len - 1,
+			EXTENT_DEFRAG, 1, cached_state);
+	if (ret && btrfs_root_last_snapshot(&root->root_item) >=
+						BTRFS_I(inode)->generation) {
+		/* the inode is shared */
+		new = record_old_file_extents(inode, ordered_extent);
+	}
+
 	if (nolock)
 		trans = btrfs_join_transaction_nolock(root);
 	else
@@ -1975,6 +2588,10 @@ out:
 	 */
 	btrfs_remove_ordered_extent(inode, ordered_extent);
 
+	/* for snapshot-aware defrag */
+	if (new)
+		relink_file_extents(new);
+
 	/* once for us */
 	btrfs_put_ordered_extent(ordered_extent);
 	/* once for the tree */
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-09-18  0:23     ` Liu Bo
@ 2012-09-18 13:10       ` Josef Bacik
  0 siblings, 0 replies; 14+ messages in thread
From: Josef Bacik @ 2012-09-18 13:10 UTC (permalink / raw)
  To: Liu Bo; +Cc: Josef Bacik, linux-btrfs@vger.kernel.org, dave@jikos.cz

On Mon, Sep 17, 2012 at 06:23:21PM -0600, Liu Bo wrote:
> On 09/18/2012 01:15 AM, Josef Bacik wrote:
> > On Mon, Sep 17, 2012 at 03:58:56AM -0600, Liu Bo wrote:
> >> This comes from one of btrfs's project ideas,
> >> As we defragment files, we break any sharing from other snapshots.
> >> The balancing code will preserve the sharing, and defrag needs to grow this
> >> as well.
> >>
> >> Now we're able to fill the blank with this patch, in which we make full use of
> >> backref walking stuff.
> >>
> >> Here is the basic idea,
> >> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
> >> o  at endio, after we finish updating fs tree, we use backref walking to find
> >>    all parents of the ranges and re-link them with the new COWed file layout by
> >>    adding corresponding backrefs.
> >>
> >> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
> >> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> > 
> > I was trying to fixup the rejects on this patch when I noticed there were no
> > tabs, only spaces.  Thats not going to work and now I have to go back and make
> > sure none of your other patches did this.  Thanks,
> > 
> > Josef
> > 
> 
> I'm quite confused about this, my local part and the email part I received show it is well formed.
> 
> There is no spaces, and every time before I send them out, I use checkpatch.pl and make sure
> checkpatch.pl does not complain about anything:
> 
> $ ./scripts/checkpatch.pl ~/Desktop/0002-Btrfs-snapshot-aware-defrag.patch 
> total: 0 errors, 0 warnings, 647 lines checked
> 
> /home/liubo/Desktop/0002-Btrfs-snapshot-aware-defrag.patch has no obvious style problems and is ready for submission.
> 
> 
> Anyway, I don't know where goes wrong but I'm attaching it for you.
> 

Yeah I don't know wtf is going on here, that patch turned out fine.  I blame
exchange ;).  Thanks,

Josef

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-09-17  9:58 ` [PATCH 2/2 v3] Btrfs: " Liu Bo
  2012-09-17 10:04   ` Liu Bo
  2012-09-17 17:15   ` Josef Bacik
@ 2012-09-25 17:39   ` Mitch Harder
  2012-09-26  1:07     ` Liu Bo
  2 siblings, 1 reply; 14+ messages in thread
From: Mitch Harder @ 2012-09-25 17:39 UTC (permalink / raw)
  To: Liu Bo; +Cc: linux-btrfs, dave

On Mon, Sep 17, 2012 at 4:58 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
> This comes from one of btrfs's project ideas,
> As we defragment files, we break any sharing from other snapshots.
> The balancing code will preserve the sharing, and defrag needs to grow this
> as well.
>
> Now we're able to fill the blank with this patch, in which we make full use of
> backref walking stuff.
>
> Here is the basic idea,
> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
> o  at endio, after we finish updating fs tree, we use backref walking to find
>    all parents of the ranges and re-link them with the new COWed file layout by
>    adding corresponding backrefs.
>
> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>

I'm hitting the WARN_ON in record_extent_backrefs() indicating a
problem with the return value from iterate_inodes_from_logical().

[ 6865.184782] ------------[ cut here ]------------
[ 6865.184819] WARNING: at fs/btrfs/inode.c:2062
record_extent_backrefs+0xe5/0xe7 [btrfs]()
[ 6865.184823] Hardware name: OptiPlex 745
[ 6865.184825] Modules linked in: lpc_ich mfd_core xts gf128mul cryptd
aes_x86_64 sha256_generic btrfs libcrc32c
[ 6865.184841] Pid: 4239, comm: btrfs-endio-wri Not tainted 3.5.4-git-local+ #1
[ 6865.184844] Call Trace:
[ 6865.184856]  [<ffffffff81031d6a>] warn_slowpath_common+0x74/0xa2
[ 6865.184862]  [<ffffffff81031db2>] warn_slowpath_null+0x1a/0x1c
[ 6865.184884]  [<ffffffffa003356b>] record_extent_backrefs+0xe5/0xe7 [btrfs]
[ 6865.184908]  [<ffffffffa003cf3a>] btrfs_finish_ordered_io+0x131/0xa4b [btrfs]
[ 6865.184930]  [<ffffffffa003d869>] finish_ordered_fn+0x15/0x17 [btrfs]
[ 6865.184951]  [<ffffffffa005882f>] worker_loop+0x145/0x516 [btrfs]
[ 6865.184959]  [<ffffffff81059727>] ? __wake_up_common+0x54/0x84
[ 6865.184983]  [<ffffffffa00586ea>] ? btrfs_queue_worker+0x2d3/0x2d3 [btrfs]
[ 6865.184989]  [<ffffffff810516bb>] kthread+0x93/0x98
[ 6865.184996]  [<ffffffff817d7934>] kernel_thread_helper+0x4/0x10
[ 6865.185001]  [<ffffffff81051628>] ? kthread_freezable_should_stop+0x6a/0x6a
[ 6865.185021]  [<ffffffff817d7930>] ? gs_change+0xb/0xb
[ 6865.185025] ---[ end trace 26cc0e186efc79d8 ]---


I'm testing a 3.5.4 kernel merged with 3.6_rc patchset as well as the
send_recv patches and most of the btrfs-next patches.

I'm running into this issue when mounting with autodefrag, and running
some snapshot tests.

This may be related to a problem elsewhere, because I've been
encountering other backref issues even before testing this patch.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-09-25 17:39   ` Mitch Harder
@ 2012-09-26  1:07     ` Liu Bo
  2012-10-03 14:02       ` Chris Mason
  0 siblings, 1 reply; 14+ messages in thread
From: Liu Bo @ 2012-09-26  1:07 UTC (permalink / raw)
  To: Mitch Harder; +Cc: linux-btrfs, dave

On 09/26/2012 01:39 AM, Mitch Harder wrote:
> On Mon, Sep 17, 2012 at 4:58 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
>> This comes from one of btrfs's project ideas,
>> As we defragment files, we break any sharing from other snapshots.
>> The balancing code will preserve the sharing, and defrag needs to grow this
>> as well.
>>
>> Now we're able to fill the blank with this patch, in which we make full use of
>> backref walking stuff.
>>
>> Here is the basic idea,
>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
>> o  at endio, after we finish updating fs tree, we use backref walking to find
>>    all parents of the ranges and re-link them with the new COWed file layout by
>>    adding corresponding backrefs.
>>
>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> 
> I'm hitting the WARN_ON in record_extent_backrefs() indicating a
> problem with the return value from iterate_inodes_from_logical().
> 
> [ 6865.184782] ------------[ cut here ]------------
> [ 6865.184819] WARNING: at fs/btrfs/inode.c:2062
> record_extent_backrefs+0xe5/0xe7 [btrfs]()
> [ 6865.184823] Hardware name: OptiPlex 745
> [ 6865.184825] Modules linked in: lpc_ich mfd_core xts gf128mul cryptd
> aes_x86_64 sha256_generic btrfs libcrc32c
> [ 6865.184841] Pid: 4239, comm: btrfs-endio-wri Not tainted 3.5.4-git-local+ #1
> [ 6865.184844] Call Trace:
> [ 6865.184856]  [<ffffffff81031d6a>] warn_slowpath_common+0x74/0xa2
> [ 6865.184862]  [<ffffffff81031db2>] warn_slowpath_null+0x1a/0x1c
> [ 6865.184884]  [<ffffffffa003356b>] record_extent_backrefs+0xe5/0xe7 [btrfs]
> [ 6865.184908]  [<ffffffffa003cf3a>] btrfs_finish_ordered_io+0x131/0xa4b [btrfs]
> [ 6865.184930]  [<ffffffffa003d869>] finish_ordered_fn+0x15/0x17 [btrfs]
> [ 6865.184951]  [<ffffffffa005882f>] worker_loop+0x145/0x516 [btrfs]
> [ 6865.184959]  [<ffffffff81059727>] ? __wake_up_common+0x54/0x84
> [ 6865.184983]  [<ffffffffa00586ea>] ? btrfs_queue_worker+0x2d3/0x2d3 [btrfs]
> [ 6865.184989]  [<ffffffff810516bb>] kthread+0x93/0x98
> [ 6865.184996]  [<ffffffff817d7934>] kernel_thread_helper+0x4/0x10
> [ 6865.185001]  [<ffffffff81051628>] ? kthread_freezable_should_stop+0x6a/0x6a
> [ 6865.185021]  [<ffffffff817d7930>] ? gs_change+0xb/0xb
> [ 6865.185025] ---[ end trace 26cc0e186efc79d8 ]---
> 
> 
> I'm testing a 3.5.4 kernel merged with 3.6_rc patchset as well as the
> send_recv patches and most of the btrfs-next patches.
> 
> I'm running into this issue when mounting with autodefrag, and running
> some snapshot tests.
> 
> This may be related to a problem elsewhere, because I've been
> encountering other backref issues even before testing this patch.
> 

Oh, will look into it, thanks for the report.

thanks,
liubo


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-09-26  1:07     ` Liu Bo
@ 2012-10-03 14:02       ` Chris Mason
  2012-10-04 14:22         ` Liu Bo
  2012-10-08 12:18         ` Liu Bo
  0 siblings, 2 replies; 14+ messages in thread
From: Chris Mason @ 2012-10-03 14:02 UTC (permalink / raw)
  To: Liu Bo; +Cc: Mitch Harder, linux-btrfs@vger.kernel.org, dave@jikos.cz

On Tue, Sep 25, 2012 at 07:07:53PM -0600, Liu Bo wrote:
> On 09/26/2012 01:39 AM, Mitch Harder wrote:
> > On Mon, Sep 17, 2012 at 4:58 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
> >> This comes from one of btrfs's project ideas,
> >> As we defragment files, we break any sharing from other snapshots.
> >> The balancing code will preserve the sharing, and defrag needs to grow this
> >> as well.
> >>
> >> Now we're able to fill the blank with this patch, in which we make full use of
> >> backref walking stuff.
> >>
> >> Here is the basic idea,
> >> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
> >> o  at endio, after we finish updating fs tree, we use backref walking to find
> >>    all parents of the ranges and re-link them with the new COWed file layout by
> >>    adding corresponding backrefs.
> >>
> >> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
> >> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> > 
> > I'm hitting the WARN_ON in record_extent_backrefs() indicating a
> > problem with the return value from iterate_inodes_from_logical().

Me too.  It triggers reliably with mount -o autodefrag, and then crashes
a in the next function ;)

-chris

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-10-03 14:02       ` Chris Mason
@ 2012-10-04 14:22         ` Liu Bo
  2012-10-04 19:40           ` Mitch Harder
  2012-10-08 12:18         ` Liu Bo
  1 sibling, 1 reply; 14+ messages in thread
From: Liu Bo @ 2012-10-04 14:22 UTC (permalink / raw)
  To: Mitch Harder; +Cc: Chris Mason, linux-btrfs@vger.kernel.org, dave@jikos.cz

On 10/03/2012 10:02 PM, Chris Mason wrote:
> On Tue, Sep 25, 2012 at 07:07:53PM -0600, Liu Bo wrote:
>> On 09/26/2012 01:39 AM, Mitch Harder wrote:
>>> On Mon, Sep 17, 2012 at 4:58 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
>>>> This comes from one of btrfs's project ideas,
>>>> As we defragment files, we break any sharing from other snapshots.
>>>> The balancing code will preserve the sharing, and defrag needs to grow this
>>>> as well.
>>>>
>>>> Now we're able to fill the blank with this patch, in which we make full use of
>>>> backref walking stuff.
>>>>
>>>> Here is the basic idea,
>>>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
>>>> o  at endio, after we finish updating fs tree, we use backref walking to find
>>>>    all parents of the ranges and re-link them with the new COWed file layout by
>>>>    adding corresponding backrefs.
>>>>
>>>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
>>>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
>>>
>>> I'm hitting the WARN_ON in record_extent_backrefs() indicating a
>>> problem with the return value from iterate_inodes_from_logical().
> 
> Me too.  It triggers reliably with mount -o autodefrag, and then crashes
> a in the next function ;)
> 
> -chris
> 

Good news, I'm starting hitting the crash (a NULL pointer crash) ;)

thanks,
liubo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-10-04 14:22         ` Liu Bo
@ 2012-10-04 19:40           ` Mitch Harder
  0 siblings, 0 replies; 14+ messages in thread
From: Mitch Harder @ 2012-10-04 19:40 UTC (permalink / raw)
  To: Liu Bo; +Cc: Chris Mason, linux-btrfs@vger.kernel.org, dave@jikos.cz

On Thu, Oct 4, 2012 at 9:22 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
> On 10/03/2012 10:02 PM, Chris Mason wrote:
>> On Tue, Sep 25, 2012 at 07:07:53PM -0600, Liu Bo wrote:
>>> On 09/26/2012 01:39 AM, Mitch Harder wrote:
>>>> On Mon, Sep 17, 2012 at 4:58 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
>>>>> This comes from one of btrfs's project ideas,
>>>>> As we defragment files, we break any sharing from other snapshots.
>>>>> The balancing code will preserve the sharing, and defrag needs to grow this
>>>>> as well.
>>>>>
>>>>> Now we're able to fill the blank with this patch, in which we make full use of
>>>>> backref walking stuff.
>>>>>
>>>>> Here is the basic idea,
>>>>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
>>>>> o  at endio, after we finish updating fs tree, we use backref walking to find
>>>>>    all parents of the ranges and re-link them with the new COWed file layout by
>>>>>    adding corresponding backrefs.
>>>>>
>>>>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
>>>>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
>>>>
>>>> I'm hitting the WARN_ON in record_extent_backrefs() indicating a
>>>> problem with the return value from iterate_inodes_from_logical().
>>
>> Me too.  It triggers reliably with mount -o autodefrag, and then crashes
>> a in the next function ;)
>>
>> -chris
>>
>
> Good news, I'm starting hitting the crash (a NULL pointer crash) ;)
>
> thanks,
> liubo

I'm also starting to hit this crash while balancing a test partition.

I guess this isn't surprising since both autodefrag and balancing make
use of relocation.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-10-03 14:02       ` Chris Mason
  2012-10-04 14:22         ` Liu Bo
@ 2012-10-08 12:18         ` Liu Bo
  2012-10-08 13:19           ` Chris Mason
  1 sibling, 1 reply; 14+ messages in thread
From: Liu Bo @ 2012-10-08 12:18 UTC (permalink / raw)
  To: Chris Mason, Mitch Harder; +Cc: linux-btrfs@vger.kernel.org, dave@jikos.cz

On 10/03/2012 10:02 PM, Chris Mason wrote:
> On Tue, Sep 25, 2012 at 07:07:53PM -0600, Liu Bo wrote:
>> On 09/26/2012 01:39 AM, Mitch Harder wrote:
>>> On Mon, Sep 17, 2012 at 4:58 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
>>>> This comes from one of btrfs's project ideas,
>>>> As we defragment files, we break any sharing from other snapshots.
>>>> The balancing code will preserve the sharing, and defrag needs to grow this
>>>> as well.
>>>>
>>>> Now we're able to fill the blank with this patch, in which we make full use of
>>>> backref walking stuff.
>>>>
>>>> Here is the basic idea,
>>>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
>>>> o  at endio, after we finish updating fs tree, we use backref walking to find
>>>>    all parents of the ranges and re-link them with the new COWed file layout by
>>>>    adding corresponding backrefs.
>>>>
>>>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
>>>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
>>>
>>> I'm hitting the WARN_ON in record_extent_backrefs() indicating a
>>> problem with the return value from iterate_inodes_from_logical().
> 
> Me too.  It triggers reliably with mount -o autodefrag, and then crashes
> a in the next function ;)
> 
> -chris
> 

Hi Chris, Mitch,

I'm afraid that I may need a little more time to fix all bugs in it because there seems to be
some backref walking bugs mixed in, and at least 4 different crashes make it harder to address bugs.

I use an 1G random write fio job running in background, following by creating 20 snapshots in background,
and mount -o autodefrag.

So if your crash is quite stable in one place, please let me know the steps.

thanks,
liubo



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-10-08 12:18         ` Liu Bo
@ 2012-10-08 13:19           ` Chris Mason
  2012-10-08 15:06             ` Mitch Harder
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Mason @ 2012-10-08 13:19 UTC (permalink / raw)
  To: Liu Bo
  Cc: Chris Mason, Mitch Harder, linux-btrfs@vger.kernel.org,
	dave@jikos.cz

On Mon, Oct 08, 2012 at 06:18:26AM -0600, Liu Bo wrote:
> On 10/03/2012 10:02 PM, Chris Mason wrote:
> > On Tue, Sep 25, 2012 at 07:07:53PM -0600, Liu Bo wrote:
> >> On 09/26/2012 01:39 AM, Mitch Harder wrote:
> >>> On Mon, Sep 17, 2012 at 4:58 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
> >>>> This comes from one of btrfs's project ideas,
> >>>> As we defragment files, we break any sharing from other snapshots.
> >>>> The balancing code will preserve the sharing, and defrag needs to grow this
> >>>> as well.
> >>>>
> >>>> Now we're able to fill the blank with this patch, in which we make full use of
> >>>> backref walking stuff.
> >>>>
> >>>> Here is the basic idea,
> >>>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
> >>>> o  at endio, after we finish updating fs tree, we use backref walking to find
> >>>>    all parents of the ranges and re-link them with the new COWed file layout by
> >>>>    adding corresponding backrefs.
> >>>>
> >>>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
> >>>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> >>>
> >>> I'm hitting the WARN_ON in record_extent_backrefs() indicating a
> >>> problem with the return value from iterate_inodes_from_logical().
> > 
> > Me too.  It triggers reliably with mount -o autodefrag, and then crashes
> > a in the next function ;)
> > 
> > -chris
> > 
> 
> Hi Chris, Mitch,
> 
> I'm afraid that I may need a little more time to fix all bugs in it because there seems to be
> some backref walking bugs mixed in, and at least 4 different crashes make it harder to address bugs.
> 
> I use an 1G random write fio job running in background, following by creating 20 snapshots in background,
> and mount -o autodefrag.
> 
> So if your crash is quite stable in one place, please let me know the steps.

I have a notmuch mail database.  I just receive mail with auto defrag on
and it crashes.  Chrome databases may do it as well.

If it helps, I have compression too.

-chris


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2 v3] Btrfs: snapshot-aware defrag
  2012-10-08 13:19           ` Chris Mason
@ 2012-10-08 15:06             ` Mitch Harder
  0 siblings, 0 replies; 14+ messages in thread
From: Mitch Harder @ 2012-10-08 15:06 UTC (permalink / raw)
  To: Chris Mason, Liu Bo, Chris Mason, Mitch Harder,
	linux-btrfs@vger.kernel.org, dave@jikos.cz

On Mon, Oct 8, 2012 at 8:19 AM, Chris Mason <chris.mason@fusionio.com> wrote:
> On Mon, Oct 08, 2012 at 06:18:26AM -0600, Liu Bo wrote:
>> On 10/03/2012 10:02 PM, Chris Mason wrote:
>> > On Tue, Sep 25, 2012 at 07:07:53PM -0600, Liu Bo wrote:
>> >> On 09/26/2012 01:39 AM, Mitch Harder wrote:
>> >>> On Mon, Sep 17, 2012 at 4:58 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
>> >>>> This comes from one of btrfs's project ideas,
>> >>>> As we defragment files, we break any sharing from other snapshots.
>> >>>> The balancing code will preserve the sharing, and defrag needs to grow this
>> >>>> as well.
>> >>>>
>> >>>> Now we're able to fill the blank with this patch, in which we make full use of
>> >>>> backref walking stuff.
>> >>>>
>> >>>> Here is the basic idea,
>> >>>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
>> >>>> o  at endio, after we finish updating fs tree, we use backref walking to find
>> >>>>    all parents of the ranges and re-link them with the new COWed file layout by
>> >>>>    adding corresponding backrefs.
>> >>>>
>> >>>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
>> >>>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
>> >>>
>> >>> I'm hitting the WARN_ON in record_extent_backrefs() indicating a
>> >>> problem with the return value from iterate_inodes_from_logical().
>> >
>> > Me too.  It triggers reliably with mount -o autodefrag, and then crashes
>> > a in the next function ;)
>> >
>> > -chris
>> >
>>
>> Hi Chris, Mitch,
>>
>> I'm afraid that I may need a little more time to fix all bugs in it because there seems to be
>> some backref walking bugs mixed in, and at least 4 different crashes make it harder to address bugs.
>>
>> I use an 1G random write fio job running in background, following by creating 20 snapshots in background,
>> and mount -o autodefrag.
>>
>> So if your crash is quite stable in one place, please let me know the steps.
>
> I have a notmuch mail database.  I just receive mail with auto defrag on
> and it crashes.  Chrome databases may do it as well.
>
> If it helps, I have compression too.
>
> -chris
>

I can usually reproduce fairly quickly, but I don't have a test that
fails in exactly the same spot every time.

My tests usually involve manipulating kernel git sources with
autodefrag (and usually lzo compression).  I have also hit a similar
error when balancing a partition with multiple snapshots.

I'll go back and review my methods for replicating, and see if any of
them can reproduce predictably.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2012-10-08 15:06 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-09-17  9:58 [PATCH 1/2 v3] Btrfs: use flag EXTENT_DEFRAG for snapshot-aware defrag Liu Bo
2012-09-17  9:58 ` [PATCH 2/2 v3] Btrfs: " Liu Bo
2012-09-17 10:04   ` Liu Bo
2012-09-17 17:15   ` Josef Bacik
2012-09-18  0:23     ` Liu Bo
2012-09-18 13:10       ` Josef Bacik
2012-09-25 17:39   ` Mitch Harder
2012-09-26  1:07     ` Liu Bo
2012-10-03 14:02       ` Chris Mason
2012-10-04 14:22         ` Liu Bo
2012-10-04 19:40           ` Mitch Harder
2012-10-08 12:18         ` Liu Bo
2012-10-08 13:19           ` Chris Mason
2012-10-08 15:06             ` Mitch Harder

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).