* [PATCH 3/4] update nodatacow code
@ 2008-10-27 13:38 Yan Zheng
0 siblings, 0 replies; only message in thread
From: Yan Zheng @ 2008-10-27 13:38 UTC (permalink / raw)
To: linux-btrfs, Chris Mason; +Cc: yanzheng
Hello,
This patch simplifies the nodatacow checker. If all references
were created after the latest snapshot, then we can avoid COW
safely. This patch also updates run_delalloc_nocow to do more
fine-grained checking.
Regards
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
---
diff -urp 3/fs/btrfs/ctree.h 4/fs/btrfs/ctree.h
--- 3/fs/btrfs/ctree.h 2008-10-27 16:31:51.000000000 +0800
+++ 4/fs/btrfs/ctree.h 2008-10-27 16:34:27.000000000 +0800
@@ -445,6 +445,7 @@ struct btrfs_root_item {
__le64 bytenr;
__le64 byte_limit;
__le64 bytes_used;
+ __le64 last_snapshot;
__le32 flags;
__le32 refs;
struct btrfs_disk_key drop_progress;
@@ -1375,6 +1376,8 @@ BTRFS_SETGET_STACK_FUNCS(root_refs, stru
BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32);
BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64);
BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
+BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
+ last_snapshot, 64);
/* struct btrfs_super_block */
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
@@ -1504,9 +1507,8 @@ int btrfs_update_pinned_extents(struct b
u64 bytenr, u64 num, int pin);
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *leaf);
-int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_key *key, u64 bytenr);
+int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr);
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
diff -urp 3/fs/btrfs/extent-tree.c 4/fs/btrfs/extent-tree.c
--- 3/fs/btrfs/extent-tree.c 2008-10-27 16:31:54.000000000 +0800
+++ 4/fs/btrfs/extent-tree.c 2008-10-27 21:12:49.000000000 +0800
@@ -848,9 +848,8 @@ out:
return 0;
}
-static int get_reference_status(struct btrfs_root *root, u64 bytenr,
- u64 parent_gen, u64 ref_objectid,
- u64 *min_generation, u32 *ref_count)
+int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr)
{
struct btrfs_root *extent_root = root->fs_info->extent_root;
struct btrfs_path *path;
@@ -858,8 +857,8 @@ static int get_reference_status(struct b
struct btrfs_extent_ref *ref_item;
struct btrfs_key key;
struct btrfs_key found_key;
- u64 root_objectid = root->root_key.objectid;
- u64 ref_generation;
+ u64 ref_root;
+ u64 last_snapshot;
u32 nritems;
int ret;
@@ -872,7 +871,9 @@ static int get_reference_status(struct b
if (ret < 0)
goto out;
BUG_ON(ret == 0);
- if (ret < 0 || path->slots[0] == 0)
+
+ ret = -ENOENT;
+ if (path->slots[0] == 0)
goto out;
path->slots[0]--;
@@ -880,14 +881,10 @@ static int get_reference_status(struct b
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid != bytenr ||
- found_key.type != BTRFS_EXTENT_ITEM_KEY) {
- ret = 1;
+ found_key.type != BTRFS_EXTENT_ITEM_KEY)
goto out;
- }
-
- *ref_count = 0;
- *min_generation = (u64)-1;
+ last_snapshot = btrfs_root_last_snapshot(&root->root_item);
while (1) {
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
@@ -910,114 +907,22 @@ static int get_reference_status(struct b
ref_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_ref);
- ref_generation = btrfs_ref_generation(leaf, ref_item);
- /*
- * For (parent_gen > 0 && parent_gen > ref_generation):
- *
- * we reach here through the oldest root, therefore
- * all other reference from same snapshot should have
- * a larger generation.
- */
- if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
- (parent_gen > 0 && parent_gen > ref_generation) ||
- (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
- ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
- *ref_count = 2;
- break;
- }
-
- *ref_count = 1;
- if (*min_generation > ref_generation)
- *min_generation = ref_generation;
-
- path->slots[0]++;
- }
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_key *key, u64 bytenr)
-{
- struct btrfs_root *old_root;
- struct btrfs_path *path = NULL;
- struct extent_buffer *eb;
- struct btrfs_file_extent_item *item;
- u64 ref_generation;
- u64 min_generation;
- u64 extent_start;
- u32 ref_count;
- int level;
- int ret;
-
- BUG_ON(trans == NULL);
- BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
- ret = get_reference_status(root, bytenr, 0, key->objectid,
- &min_generation, &ref_count);
- if (ret)
- return ret;
-
- if (ref_count != 1)
- return 1;
-
- old_root = root->dirty_root->root;
- ref_generation = old_root->root_key.offset;
-
- /* all references are created in running transaction */
- if (min_generation > ref_generation) {
- ret = 0;
- goto out;
- }
-
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
- path->skip_locking = 1;
- /* if no item found, the extent is referenced by other snapshot */
- ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0);
- if (ret)
- goto out;
-
- eb = path->nodes[0];
- item = btrfs_item_ptr(eb, path->slots[0],
- struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
- btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
- ret = 1;
- goto out;
- }
-
- for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
- if (level >= 0) {
- eb = path->nodes[level];
- if (!eb)
- continue;
- extent_start = eb->start;
- } else
- extent_start = bytenr;
-
- ret = get_reference_status(root, extent_start, ref_generation,
- 0, &min_generation, &ref_count);
- if (ret)
+ ref_root = btrfs_ref_root(leaf, ref_item);
+ if (ref_root != root->root_key.objectid &&
+ ref_root != BTRFS_TREE_LOG_OBJECTID) {
+ ret = 1;
goto out;
-
- if (ref_count != 1) {
+ }
+ if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) {
ret = 1;
goto out;
}
- if (level >= 0)
- ref_generation = btrfs_header_generation(eb);
+
+ path->slots[0]++;
}
ret = 0;
out:
- if (path)
- btrfs_free_path(path);
+ btrfs_free_path(path);
return ret;
}
diff -urp 3/fs/btrfs/inode.c 4/fs/btrfs/inode.c
--- 3/fs/btrfs/inode.c 2008-10-27 21:25:18.000000000 +0800
+++ 4/fs/btrfs/inode.c 2008-10-27 21:26:31.000000000 +0800
@@ -201,108 +201,144 @@ out:
*/
static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
{
- u64 extent_start;
- u64 extent_end;
- u64 bytenr;
- u64 loops = 0;
- u64 total_fs_bytes;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_block_group_cache *block_group;
struct btrfs_trans_handle *trans;
struct extent_buffer *leaf;
- int found_type;
struct btrfs_path *path;
- struct btrfs_file_extent_item *item;
- int ret;
- int err = 0;
+ struct btrfs_file_extent_item *fi;
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *em;
struct btrfs_key found_key;
+ u64 cow_start;
+ u64 cur_offset;
+ u64 extent_end;
+ u64 disk_bytenr;
+ int extent_type;
+ int ret;
+ int type;
+ int nocow;
+ int check_prev = 1;
- total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
path = btrfs_alloc_path();
BUG_ON(!path);
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
-again:
- ret = btrfs_lookup_file_extent(NULL, root, path,
- inode->i_ino, start, 0);
- if (ret < 0) {
- err = ret;
- goto out;
- }
-
- if (ret != 0) {
- if (path->slots[0] == 0)
- goto not_found;
- path->slots[0]--;
- }
-
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
-
- /* are we inside the extent that was found? */
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- found_type = btrfs_key_type(&found_key);
- if (found_key.objectid != inode->i_ino ||
- found_type != BTRFS_EXTENT_DATA_KEY)
- goto not_found;
- found_type = btrfs_file_extent_type(leaf, item);
- extent_start = found_key.offset;
- if (found_type == BTRFS_FILE_EXTENT_REG) {
- u64 extent_num_bytes;
-
- extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
- extent_end = extent_start + extent_num_bytes;
- err = 0;
-
- if (loops && start != extent_start)
- goto not_found;
+ cow_start = (u64)-1;
+ cur_offset = start;
+ while (1) {
+ ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
+ cur_offset, 0);
+ BUG_ON(ret < 0);
+ if (ret > 0 && path->slots[0] > 0 && check_prev) {
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key,
+ path->slots[0] - 1);
+ if (found_key.objectid == inode->i_ino &&
+ found_key.type == BTRFS_EXTENT_DATA_KEY)
+ path->slots[0]--;
+ }
+ check_prev = 0;
+next_slot:
+ leaf = path->nodes[0];
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ BUG_ON(1);
+ if (ret > 0)
+ break;
+ leaf = path->nodes[0];
+ }
- if (start < extent_start || start >= extent_end)
- goto not_found;
+ nocow = 0;
+ disk_bytenr = 0;
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
- if (bytenr == 0)
- goto not_found;
+ if (found_key.objectid > inode->i_ino ||
+ found_key.type > BTRFS_EXTENT_DATA_KEY ||
+ found_key.offset > end)
+ break;
- if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr))
- goto not_found;
- /*
- * we may be called by the resizer, make sure we're inside
- * the limits of the FS
- */
- block_group = btrfs_lookup_block_group(root->fs_info,
- bytenr);
- if (!block_group || block_group->ro)
- goto not_found;
+ if (found_key.offset > cur_offset) {
+ extent_end = found_key.offset;
+ goto out_check;
+ }
- bytenr += btrfs_file_extent_offset(leaf, item);
- extent_num_bytes = min(end + 1, extent_end) - start;
- ret = btrfs_add_ordered_extent(inode, start, bytenr,
- extent_num_bytes, 1);
- if (ret) {
- err = ret;
- goto out;
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(leaf, fi);
+
+ if (extent_type == BTRFS_FILE_EXTENT_REG) {
+ struct btrfs_block_group_cache *block_group;
+ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ extent_end = found_key.offset +
+ btrfs_file_extent_num_bytes(leaf, fi);
+ if (extent_end <= start) {
+ path->slots[0]++;
+ goto next_slot;
+ }
+ if (disk_bytenr == 0 ||
+ btrfs_cross_ref_exist(trans, root, disk_bytenr))
+ goto out_check;
+ block_group = btrfs_lookup_block_group(root->fs_info,
+ disk_bytenr);
+ if (!block_group || block_group->ro)
+ goto out_check;
+ disk_bytenr += btrfs_file_extent_offset(leaf, fi);
+ nocow = 1;
+ } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+ struct btrfs_item *item;
+ item = btrfs_item_nr(leaf, path->slots[0]);
+ extent_end = found_key.offset +
+ btrfs_file_extent_inline_len(leaf, item);
+ extent_end = ALIGN(extent_end, root->sectorsize);
+ } else {
+ BUG_ON(1);
+ }
+out_check:
+ if (extent_end <= start) {
+ path->slots[0]++;
+ goto next_slot;
+ }
+ if (!nocow) {
+ if (cow_start == (u64)-1)
+ cow_start = cur_offset;
+ cur_offset = extent_end;
+ if (cur_offset > end)
+ break;
+ path->slots[0]++;
+ goto next_slot;
}
btrfs_release_path(root, path);
- start = extent_end;
- if (start <= end) {
- loops++;
- goto again;
+ if (cow_start != (u64)-1) {
+ ret = cow_file_range(inode, cow_start,
+ found_key.offset - 1);
+ BUG_ON(ret);
+ cow_start = (u64)-1;
}
- } else {
-not_found:
- btrfs_end_transaction(trans, root);
- btrfs_free_path(path);
- return cow_file_range(inode, start, end);
+
+ disk_bytenr += cur_offset - found_key.offset;
+ ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
+ min(end + 1, extent_end) - cur_offset,
+ BTRFS_ORDERED_NOCOW);
+ cur_offset = extent_end;
+ if (cur_offset > end)
+ break;
}
-out:
- WARN_ON(err);
- btrfs_end_transaction(trans, root);
+ btrfs_release_path(root, path);
+
+ if (cur_offset <= end && cow_start == (u64)-1)
+ cow_start = cur_offset;
+ if (cow_start != (u64)-1) {
+ ret = cow_file_range(inode, cow_start, end);
+ BUG_ON(ret);
+ }
+
+ ret = btrfs_end_transaction(trans, root);
+ BUG_ON(ret);
btrfs_free_path(path);
- return err;
+ return 0;
}
/*
diff -urp 3/fs/btrfs/ordered-data.c 4/fs/btrfs/ordered-data.c
--- 3/fs/btrfs/ordered-data.c 2008-10-24 09:18:54.000000000 +0800
+++ 4/fs/btrfs/ordered-data.c 2008-10-27 16:37:28.000000000 +0800
@@ -165,7 +165,7 @@ static inline struct rb_node *tree_searc
* inserted.
*/
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
- u64 start, u64 len, int nocow)
+ u64 start, u64 len, int type)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
@@ -181,8 +181,9 @@ int btrfs_add_ordered_extent(struct inod
entry->start = start;
entry->len = len;
entry->inode = inode;
- if (nocow)
- set_bit(BTRFS_ORDERED_NOCOW, &entry->flags);
+
+ if (type == BTRFS_ORDERED_NOCOW)
+ set_bit(type, &entry->flags);
/* one ref for the tree */
atomic_set(&entry->refs, 1);
diff -urp 3/fs/btrfs/ordered-data.h 4/fs/btrfs/ordered-data.h
--- 3/fs/btrfs/ordered-data.h 2008-10-24 09:18:54.000000000 +0800
+++ 4/fs/btrfs/ordered-data.h 2008-10-27 16:37:41.000000000 +0800
@@ -127,7 +127,7 @@ int btrfs_remove_ordered_extent(struct i
int btrfs_dec_test_ordered_pending(struct inode *inode,
u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
- u64 start, u64 len, int nocow);
+ u64 start, u64 len, int type);
int btrfs_add_ordered_sum(struct inode *inode,
struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
diff -urp 3/fs/btrfs/transaction.c 4/fs/btrfs/transaction.c
--- 3/fs/btrfs/transaction.c 2008-10-27 16:31:48.000000000 +0800
+++ 4/fs/btrfs/transaction.c 2008-10-27 16:34:27.000000000 +0800
@@ -751,6 +751,9 @@ static noinline int create_pending_snaps
if (ret)
goto fail;
+ btrfs_record_root_in_trans(root);
+ btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
+
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
key.objectid = objectid;
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2008-10-27 13:38 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-27 13:38 [PATCH 3/4] update nodatacow code Yan Zheng
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.