From mboxrd@z Thu Jan 1 00:00:00 1970 From: Miao Xie Subject: Re: [RFC PATCH 2/2] Btrfs: introduce free space cluster for each node Date: Tue, 01 Nov 2011 15:39:11 +0800 Message-ID: <4EAFA21F.5010200@cn.fujitsu.com> References: <4E687A41.8080101@cn.fujitsu.com> Reply-To: miaox@cn.fujitsu.com Mime-Version: 1.0 Content-Type: text/plain; charset=GB2312 To: Linux Btrfs Return-path: In-Reply-To: <4E687A41.8080101@cn.fujitsu.com> List-ID: Any Comment? On Thu, 08 Sep 2011 16:18:09 +0800, Miao Xie wrote: > This patch introduce free space cluster for each node in the b+tree. = And > we also simplify the fill method and the space allocation of the clus= ter: > - Allocate free space cluster for each node > - Allocate the free space extent (>=3D256KB, split a large extent or = get the > contiguous blocks from the bitmaps) from the block group cache and = cache > it in the cluster, instead of moving the free space entry from the = block > group cache to the cluster directly. By this way, we just manage a = extent > in the cluster. > - When doing the tree block allocation, we can get the space just by = split > the extent in the parent's cluster. By this way, we can allocate th= e tree > block concurrently and also can store the child nodes/leaves into t= he > contiguous blocks. >=20 > Beside that, we modify the source of the space cache to make it fit t= he above > change. When we write out the information of the free space, we also = write out > the extent information in the clusters. And when we load the free spa= ce > information, we will try to merge the free space fragment at first, b= ecause the > extent in the clusters is small, and may merge them to be a large one= =2E > (Before applying this patch, we build the free space tree by the cach= ed information > directly. I think we needn't worry about the compatibility. At the wo= rst, we may > create lots of small extents which may be merge into a large one, but= it will not > break the use of Btrfs.=A3=A9 >=20 > We did some performance test for this patch, we find it works well, a= nd make the > small file performance grow up. >=20 > The sequential write performance of the small file: > N_files N_threads File Size Before After > 10240 8 2KB(Inline) 2.2055MB/s 4.1779MB/s > 10240 8 4KB 1.001MB/s 1.1893MB/s >=20 > Signed-off-by: Miao Xie > --- > fs/btrfs/ctree.c | 28 ++- > fs/btrfs/ctree.h | 50 +++- > fs/btrfs/disk-io.c | 2 +- > fs/btrfs/extent-tree.c | 107 +++++--- > fs/btrfs/extent_io.c | 7 +- > fs/btrfs/extent_io.h | 3 + > fs/btrfs/free-space-cache.c | 667 ++++++++++++++++-----------------= ---------- > fs/btrfs/free-space-cache.h | 13 + > fs/btrfs/inode-map.c | 1 + > fs/btrfs/inode.c | 25 +- > fs/btrfs/ioctl.c | 2 +- > fs/btrfs/tree-log.c | 7 + > 12 files changed, 419 insertions(+), 493 deletions(-) >=20 > diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c > index 011cab3..1d3edd8 100644 > --- a/fs/btrfs/ctree.c > +++ b/fs/btrfs/ctree.c > @@ -238,7 +238,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *tr= ans, > else > btrfs_node_key(buf, &disk_key, 0); > =20 > - cow =3D btrfs_alloc_free_block(trans, root, buf->len, 0, > + cow =3D btrfs_alloc_free_block(trans, root, buf->len, NULL, 0, > new_root_objectid, &disk_key, level, > buf->start, 0); > if (IS_ERR(cow)) > @@ -444,9 +444,10 @@ static noinline int __btrfs_cow_block(struct btr= fs_trans_handle *trans, > } else > parent_start =3D 0; > =20 > - cow =3D btrfs_alloc_free_block(trans, root, buf->len, parent_start, > - root->root_key.objectid, &disk_key, > - level, search_start, empty_size); > + cow =3D btrfs_alloc_free_block(trans, root, buf->len, parent, > + parent_start, root->root_key.objectid, > + &disk_key, level, search_start, > + empty_size); > if (IS_ERR(cow)) > return PTR_ERR(cow); > =20 > @@ -467,6 +468,10 @@ static noinline int __btrfs_cow_block(struct btr= fs_trans_handle *trans, > (unsigned long)btrfs_header_fsid(cow), > BTRFS_FSID_SIZE); > =20 > + WARN_ON(cow->cluster); > + cow->cluster =3D buf->cluster; > + buf->cluster =3D NULL; > + > update_ref_for_cow(trans, root, buf, cow, &last_ref); > =20 > if (root->ref_cows) > @@ -2070,7 +2075,7 @@ static noinline int insert_new_root(struct btrf= s_trans_handle *trans, > else > btrfs_node_key(lower, &lower_key, 0); > =20 > - c =3D btrfs_alloc_free_block(trans, root, root->nodesize, 0, > + c =3D btrfs_alloc_free_block(trans, root, root->nodesize, NULL, 0, > root->root_key.objectid, &lower_key, > level, root->node->start, 0); > if (IS_ERR(c)) > @@ -2170,6 +2175,7 @@ static noinline int split_node(struct btrfs_tra= ns_handle *trans, > { > struct extent_buffer *c; > struct extent_buffer *split; > + struct extent_buffer *parent =3D NULL; > struct btrfs_disk_key disk_key; > int mid; > int ret; > @@ -2197,9 +2203,12 @@ static noinline int split_node(struct btrfs_tr= ans_handle *trans, > mid =3D (c_nritems + 1) / 2; > btrfs_node_key(c, &disk_key, mid); > =20 > - split =3D btrfs_alloc_free_block(trans, root, root->nodesize, 0, > - root->root_key.objectid, > - &disk_key, level, c->start, 0); > + if (level + 1 < BTRFS_MAX_LEVEL) > + parent =3D path->nodes[level + 1]; > + > + split =3D btrfs_alloc_free_block(trans, root, root->nodesize, paren= t, 0, > + root->root_key.objectid, > + &disk_key, level, c->start, 0); > if (IS_ERR(split)) > return PTR_ERR(split); > =20 > @@ -2951,7 +2960,8 @@ again: > else > btrfs_item_key(l, &disk_key, mid); > =20 > - right =3D btrfs_alloc_free_block(trans, root, root->leafsize, 0, > + right =3D btrfs_alloc_free_block(trans, root, root->leafsize, > + path->nodes[1], 0, > root->root_key.objectid, > &disk_key, 0, l->start, 0); > if (IS_ERR(right)) > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index c364d50..8c33a74 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -789,14 +789,9 @@ struct btrfs_block_rsv { > */ > struct btrfs_free_cluster { > spinlock_t lock; > - spinlock_t refill_lock; > - struct rb_root root; > + struct mutex refill_lock; > =20 > - /* largest extent in this cluster */ > - u64 max_size; > - > - /* first extent starting offset */ > - u64 window_start; > + struct btrfs_free_space *info; > =20 > struct btrfs_block_group_cache *block_group; > /* > @@ -2156,7 +2151,9 @@ u64 btrfs_find_block_group(struct btrfs_root *r= oot, > u64 search_start, u64 search_hint, int owner); > struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_hand= le *trans, > struct btrfs_root *root, u32 blocksize, > - u64 parent, u64 root_objectid, > + struct extent_buffer *parent, > + u64 parent_start, > + u64 root_objectid, > struct btrfs_disk_key *key, int level, > u64 hint, u64 empty_size); > void btrfs_free_tree_block(struct btrfs_trans_handle *trans, > @@ -2175,12 +2172,37 @@ int btrfs_alloc_logged_file_extent(struct btr= fs_trans_handle *trans, > struct btrfs_root *root, > u64 root_objectid, u64 owner, u64 offset, > struct btrfs_key *ins); > -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, > - struct btrfs_root *root, > - u64 num_bytes, u64 min_alloc_size, > - u64 empty_size, u64 hint_byte, > - u64 search_end, struct btrfs_key *ins, > - u64 data); > +int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, > + struct btrfs_root *root, > + u64 num_bytes, u64 min_alloc_size, > + u64 empty_size, u64 hint_byte, > + u64 search_end, struct btrfs_key *ins, > + u64 data, struct btrfs_free_cluster *cluster); > +static inline int btrfs_reserve_extent_data(struct btrfs_trans_handl= e *trans, > + struct btrfs_root *root, > + u64 num_bytes, u64 min_alloc_size, > + u64 empty_size, u64 hint_byte, > + u64 search_end, > + struct btrfs_key *ins) > +{ > + return __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_siz= e, > + empty_size, hint_byte, search_end, ins, > + 1, NULL); > +} > + > +static inline int btrfs_reserve_extent_mdata(struct btrfs_trans_hand= le *trans, > + struct btrfs_root *root, > + u64 num_bytes, u64 min_alloc_size, > + u64 empty_size, u64 hint_byte, > + u64 search_end, > + struct btrfs_key *ins, > + struct btrfs_free_cluster *cluster) > +{ > + return __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_siz= e, > + empty_size, hint_byte, search_end, ins, > + 0, cluster); > +} > + > int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_roo= t *root, > struct extent_buffer *buf, int full_backref); > int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_roo= t *root, > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index 07b3ac6..951a57f 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -1171,7 +1171,7 @@ static struct btrfs_root *alloc_log_tree(struct= btrfs_trans_handle *trans, > */ > root->ref_cows =3D 0; > =20 > - leaf =3D btrfs_alloc_free_block(trans, root, root->leafsize, 0, > + leaf =3D btrfs_alloc_free_block(trans, root, root->leafsize, NULL, = 0, > BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0); > if (IS_ERR(leaf)) { > kfree(root); > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index 80d6148..43cc5c4 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -4672,6 +4672,8 @@ void btrfs_free_tree_block(struct btrfs_trans_h= andle *trans, > struct btrfs_block_group_cache *cache =3D NULL; > int ret; > =20 > + btrfs_free_extent_cluster(buf); > + > if (root->root_key.objectid !=3D BTRFS_TREE_LOG_OBJECTID) { > ret =3D btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, > parent, root->root_key.objectid, > @@ -4853,8 +4855,10 @@ enum btrfs_loop_type { > LOOP_FIND_IDEAL =3D 0, > LOOP_CACHING_NOWAIT =3D 1, > LOOP_CACHING_WAIT =3D 2, > - LOOP_ALLOC_CHUNK =3D 3, > - LOOP_NO_EMPTY_SIZE =3D 4, > + LOOP_RECLAIM_CLUSTERS =3D 3, > + LOOP_RECLAIM_ALL_CLUSTERS =3D 4, > + LOOP_ALLOC_CHUNK =3D 5, > + LOOP_NO_EMPTY_SIZE =3D 6, > }; > =20 > /* > @@ -4870,7 +4874,8 @@ static noinline int find_free_extent(struct btr= fs_trans_handle *trans, > u64 num_bytes, u64 empty_size, > u64 search_start, u64 search_end, > u64 hint_byte, struct btrfs_key *ins, > - u64 data) > + u64 data, > + struct btrfs_free_cluster *cluster) > { > int ret =3D 0; > struct btrfs_root *root =3D orig_root->fs_info->extent_root; > @@ -4912,9 +4917,12 @@ static noinline int find_free_extent(struct bt= rfs_trans_handle *trans, > allowed_chunk_alloc =3D 1; > =20 > if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { > - last_ptr =3D &root->fs_info->meta_alloc_cluster; > + if (cluster) > + last_ptr =3D cluster; > + else > + last_ptr =3D &root->fs_info->meta_alloc_cluster; > if (!btrfs_test_opt(root, SSD)) > - empty_cluster =3D 64 * 1024; > + empty_cluster =3D 256 * 1024; > } > =20 > if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && > @@ -4925,7 +4933,7 @@ static noinline int find_free_extent(struct btr= fs_trans_handle *trans, > if (last_ptr) { > spin_lock(&last_ptr->lock); > if (last_ptr->block_group) > - hint_byte =3D last_ptr->window_start; > + hint_byte =3D last_ptr->info->offset; > spin_unlock(&last_ptr->lock); > } > =20 > @@ -5043,6 +5051,13 @@ have_block_group: > if (unlikely(block_group->ro)) > goto loop; > =20 > + > + if (loop =3D=3D LOOP_RECLAIM_CLUSTERS) { > + btrfs_reclaim_extent_clusters(block_group, > + empty_cluster * 2); > + } else if (loop =3D=3D LOOP_RECLAIM_ALL_CLUSTERS) > + btrfs_reclaim_extent_clusters(block_group, 0); > + > spin_lock(&block_group->free_space_ctl->tree_lock); > if (cached && > block_group->free_space_ctl->free_space < > @@ -5066,7 +5081,7 @@ have_block_group: > * the refill lock keeps out other > * people trying to start a new cluster > */ > - spin_lock(&last_ptr->refill_lock); > + mutex_lock(&last_ptr->refill_lock); > if (last_ptr->block_group && > (last_ptr->block_group->ro || > !block_group_bits(last_ptr->block_group, data))) { > @@ -5078,7 +5093,7 @@ have_block_group: > num_bytes, search_start); > if (offset) { > /* we have a block, we're done */ > - spin_unlock(&last_ptr->refill_lock); > + mutex_unlock(&last_ptr->refill_lock); > goto checks; > } > =20 > @@ -5097,7 +5112,7 @@ have_block_group: > block_group =3D last_ptr->block_group; > btrfs_get_block_group(block_group); > spin_unlock(&last_ptr->lock); > - spin_unlock(&last_ptr->refill_lock); > + mutex_unlock(&last_ptr->refill_lock); > =20 > last_ptr_loop =3D 1; > search_start =3D block_group->key.objectid; > @@ -5123,7 +5138,7 @@ refill_cluster: > /* allocate a cluster in this block group */ > ret =3D btrfs_find_space_cluster(trans, root, > block_group, last_ptr, > - offset, num_bytes, > + search_start, num_bytes, > empty_cluster + empty_size); > if (ret =3D=3D 0) { > /* > @@ -5135,12 +5150,12 @@ refill_cluster: > search_start); > if (offset) { > /* we found one, proceed */ > - spin_unlock(&last_ptr->refill_lock); > + mutex_unlock(&last_ptr->refill_lock); > goto checks; > } > } else if (!cached && loop > LOOP_CACHING_NOWAIT > && !failed_cluster_refill) { > - spin_unlock(&last_ptr->refill_lock); > + mutex_unlock(&last_ptr->refill_lock); > =20 > failed_cluster_refill =3D true; > wait_block_group_cache_progress(block_group, > @@ -5155,7 +5170,7 @@ refill_cluster: > * to use, and go to the next block group > */ > btrfs_return_cluster_to_free_space(NULL, last_ptr); > - spin_unlock(&last_ptr->refill_lock); > + mutex_unlock(&last_ptr->refill_lock); > goto loop; > } > =20 > @@ -5197,9 +5212,11 @@ checks: > ins->objectid =3D search_start; > ins->offset =3D num_bytes; > =20 > - if (offset < search_start) > + if (offset < search_start) { > btrfs_add_free_space(block_group, offset, > search_start - offset); > + offset =3D search_start; > + } > BUG_ON(offset > search_start); > =20 > ret =3D btrfs_update_reserved_bytes(block_group, num_bytes, 1, > @@ -5210,13 +5227,6 @@ checks: > } > =20 > /* we are all good, lets return */ > - ins->objectid =3D search_start; > - ins->offset =3D num_bytes; > - > - if (offset < search_start) > - btrfs_add_free_space(block_group, offset, > - search_start - offset); > - BUG_ON(offset > search_start); > btrfs_put_block_group(block_group); > break; > loop: > @@ -5236,6 +5246,12 @@ loop: > * LOOP_CACHING_NOWAIT, search partially cached block groups, kicki= ng > * caching kthreads as we move along > * LOOP_CACHING_WAIT, search everything, and wait if our bg is cach= ing > + * LOOP_RECLAIM_CLUSTERS, reclaim free space from some clusters, an= d > + * by this way, we may find enough continuous > + * space to fill the cluster, and then search > + * the free space again. > + * LOOP_RECLAIM_ALL_CLUSTERS, reclaim free space from all the clust= ers, > + * and then search again. > * LOOP_ALLOC_CHUNK, force a chunk allocation and try again > * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and tr= y > * again > @@ -5362,12 +5378,12 @@ again: > up_read(&info->groups_sem); > } > =20 > -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, > - struct btrfs_root *root, > - u64 num_bytes, u64 min_alloc_size, > - u64 empty_size, u64 hint_byte, > - u64 search_end, struct btrfs_key *ins, > - u64 data) > +int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, > + struct btrfs_root *root, > + u64 num_bytes, u64 min_alloc_size, > + u64 empty_size, u64 hint_byte, > + u64 search_end, struct btrfs_key *ins, > + u64 data, struct btrfs_free_cluster *cluster) > { > int ret; > u64 search_start =3D 0; > @@ -5386,7 +5402,7 @@ again: > WARN_ON(num_bytes < root->sectorsize); > ret =3D find_free_extent(trans, root, num_bytes, empty_size, > search_start, search_end, hint_byte, > - ins, data); > + ins, data, cluster); > =20 > if (ret =3D=3D -ENOSPC && num_bytes > min_alloc_size) { > num_bytes =3D num_bytes >> 1; > @@ -5741,13 +5757,15 @@ static void unuse_block_rsv(struct btrfs_bloc= k_rsv *block_rsv, u32 blocksize) > */ > struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_hand= le *trans, > struct btrfs_root *root, u32 blocksize, > - u64 parent, u64 root_objectid, > + struct extent_buffer *parent, > + u64 parent_start, u64 root_objectid, > struct btrfs_disk_key *key, int level, > u64 hint, u64 empty_size) > { > struct btrfs_key ins; > struct btrfs_block_rsv *block_rsv; > struct extent_buffer *buf; > + struct btrfs_free_cluster *cluster =3D NULL; > u64 flags =3D 0; > int ret; > =20 > @@ -5756,8 +5774,19 @@ struct extent_buffer *btrfs_alloc_free_block(s= truct btrfs_trans_handle *trans, > if (IS_ERR(block_rsv)) > return ERR_CAST(block_rsv); > =20 > - ret =3D btrfs_reserve_extent(trans, root, blocksize, blocksize, > - empty_size, hint, (u64)-1, &ins, 0); > + /* > + * We needn't worry about allocation failure, because if failed, > + * we will use the default metadata cluster in fs_info > + */ > + if (parent && !parent->cluster) > + parent->cluster =3D btrfs_alloc_free_cluster(); > + > + if (parent) > + cluster =3D parent->cluster; > + > + ret =3D btrfs_reserve_extent_mdata(trans, root, blocksize, blocksiz= e, > + empty_size, hint, (u64)-1, &ins, > + cluster); > if (ret) { > unuse_block_rsv(block_rsv, blocksize); > return ERR_PTR(ret); > @@ -5768,11 +5797,11 @@ struct extent_buffer *btrfs_alloc_free_block(= struct btrfs_trans_handle *trans, > BUG_ON(IS_ERR(buf)); > =20 > if (root_objectid =3D=3D BTRFS_TREE_RELOC_OBJECTID) { > - if (parent =3D=3D 0) > - parent =3D ins.objectid; > + if (parent_start =3D=3D 0) > + parent_start =3D ins.objectid; > flags |=3D BTRFS_BLOCK_FLAG_FULL_BACKREF; > } else > - BUG_ON(parent > 0); > + BUG_ON(parent_start > 0); > =20 > if (root_objectid !=3D BTRFS_TREE_LOG_OBJECTID) { > struct btrfs_delayed_extent_op *extent_op; > @@ -5788,7 +5817,7 @@ struct extent_buffer *btrfs_alloc_free_block(st= ruct btrfs_trans_handle *trans, > extent_op->is_data =3D 0; > =20 > ret =3D btrfs_add_delayed_tree_ref(trans, ins.objectid, > - ins.offset, parent, root_objectid, > + ins.offset, parent_start, root_objectid, > level, BTRFS_ADD_DELAYED_EXTENT, > extent_op); > BUG_ON(ret); > @@ -7231,18 +7260,18 @@ int btrfs_remove_block_group(struct btrfs_tra= ns_handle *trans, > =20 > /* make sure this block group isn't part of an allocation cluster *= / > cluster =3D &root->fs_info->data_alloc_cluster; > - spin_lock(&cluster->refill_lock); > + mutex_lock(&cluster->refill_lock); > btrfs_return_cluster_to_free_space(block_group, cluster); > - spin_unlock(&cluster->refill_lock); > + mutex_unlock(&cluster->refill_lock); > =20 > /* > * make sure this block group isn't part of a metadata > * allocation cluster > */ > cluster =3D &root->fs_info->meta_alloc_cluster; > - spin_lock(&cluster->refill_lock); > + mutex_lock(&cluster->refill_lock); > btrfs_return_cluster_to_free_space(block_group, cluster); > - spin_unlock(&cluster->refill_lock); > + mutex_unlock(&cluster->refill_lock); > =20 > path =3D btrfs_alloc_path(); > if (!path) { > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c > index d418164..78196f2 100644 > --- a/fs/btrfs/extent_io.c > +++ b/fs/btrfs/extent_io.c > @@ -17,6 +17,7 @@ > #include "compat.h" > #include "ctree.h" > #include "btrfs_inode.h" > +#include "free-space-cache.h" > =20 > static struct kmem_cache *extent_state_cache; > static struct kmem_cache *extent_buffer_cache; > @@ -2988,6 +2989,7 @@ static struct extent_buffer *__alloc_extent_buf= fer(struct extent_io_tree *tree, > spin_unlock_irqrestore(&leak_lock, flags); > #endif > atomic_set(&eb->refs, 1); > + eb->cluster =3D NULL; > =20 > return eb; > } > @@ -3827,7 +3829,10 @@ out: > spin_unlock(&tree->buffer_lock); > =20 > /* at this point we can safely release the extent buffer */ > - if (atomic_read(&eb->refs) =3D=3D 0) > + if (atomic_read(&eb->refs) =3D=3D 0) { > + btrfs_free_extent_cluster(eb); > call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); > + } > return ret; > } > + > diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h > index 7b2f0c3..8ee8953 100644 > --- a/fs/btrfs/extent_io.h > +++ b/fs/btrfs/extent_io.h > @@ -146,6 +146,9 @@ struct extent_buffer { > * to unlock > */ > wait_queue_head_t read_lock_wq; > + > + /* Used for the node to allocate space for its children */ > + struct btrfs_free_cluster *cluster; > }; > =20 > static inline void extent_set_compress_type(unsigned long *bio_flags= , > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.= c > index e555899..e540e77 100644 > --- a/fs/btrfs/free-space-cache.c > +++ b/fs/btrfs/free-space-cache.c > @@ -31,9 +31,15 @@ > #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) > =20 > static struct kmem_cache *btrfs_free_space_cachep; > +static struct kmem_cache *btrfs_free_cluster_cachep; > =20 > static int link_free_space(struct btrfs_free_space_ctl *ctl, > struct btrfs_free_space *info); > +static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, > + struct btrfs_free_space *info, > + bool update_stat); > +static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, > + struct btrfs_free_space *info, bool update_stat); > =20 > int __init free_space_cache_init(void) > { > @@ -43,6 +49,14 @@ int __init free_space_cache_init(void) > if (!btrfs_free_space_cachep) > return -ENOMEM; > =20 > + btrfs_free_cluster_cachep =3D kmem_cache_create("extent_clusters", > + sizeof(struct btrfs_free_cluster), 0, > + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); > + if (!btrfs_free_cluster_cachep) { > + kmem_cache_destroy(btrfs_free_space_cachep); > + return -ENOMEM; > + } > + > return 0; > } > =20 > @@ -50,6 +64,8 @@ void free_space_cache_exit(void) > { > if (btrfs_free_space_cachep) > kmem_cache_destroy(btrfs_free_space_cachep); > + if (btrfs_free_cluster_cachep) > + kmem_cache_destroy(btrfs_free_cluster_cachep); > } > =20 > static struct inode *__lookup_free_space_inode(struct btrfs_root *ro= ot, > @@ -397,11 +413,32 @@ int __load_free_space_cache(struct btrfs_root *= root, struct inode *inode, > =20 > if (entry->type =3D=3D BTRFS_FREE_SPACE_EXTENT) { > spin_lock(&ctl->tree_lock); > + if (try_merge_free_space(ctl, e, true)) > + goto link; > + > + ret =3D insert_into_bitmap(ctl, e, true); > + if (ret < 0) { > + spin_unlock(&ctl->tree_lock); > + printk(KERN_ERR "Inserting into bitmap " > + "failed, dumping\n"); > + kmem_cache_free(btrfs_free_space_cachep, > + e); > + kunmap(page); > + unlock_page(page); > + page_cache_release(page); > + goto free_cache; > + } else if (ret) { > + ret =3D 0; > + goto next_entry; > + } > +link: > ret =3D link_free_space(ctl, e); > spin_unlock(&ctl->tree_lock); > if (ret) { > printk(KERN_ERR "Duplicate entries in " > "free space cache, dumping\n"); > + kmem_cache_free(btrfs_free_space_cachep, > + e); > kunmap(page); > unlock_page(page); > page_cache_release(page); > @@ -425,6 +462,9 @@ int __load_free_space_cache(struct btrfs_root *ro= ot, struct inode *inode, > if (ret) { > printk(KERN_ERR "Duplicate entries in " > "free space cache, dumping\n"); > + kfree(e->bitmap); > + kmem_cache_free( > + btrfs_free_space_cachep, e); > kunmap(page); > unlock_page(page); > page_cache_release(page); > @@ -432,7 +472,7 @@ int __load_free_space_cache(struct btrfs_root *ro= ot, struct inode *inode, > } > list_add_tail(&e->list, &bitmaps); > } > - > +next_entry: > num_entries--; > offset +=3D sizeof(struct btrfs_free_space_entry); > if (offset + sizeof(struct btrfs_free_space_entry) >=3D > @@ -676,7 +716,8 @@ int __btrfs_write_out_cache(struct btrfs_root *ro= ot, struct inode *inode, > while (node && !next_page) { > struct btrfs_free_space *e; > =20 > - e =3D rb_entry(node, struct btrfs_free_space, offset_index); > + e =3D rb_entry(node, struct btrfs_free_space, > + offset_index); > entries++; > =20 > entry->offset =3D cpu_to_le64(e->offset); > @@ -689,10 +730,39 @@ int __btrfs_write_out_cache(struct btrfs_root *= root, struct inode *inode, > entry->type =3D BTRFS_FREE_SPACE_EXTENT; > } > node =3D rb_next(node); > - if (!node && cluster) { > - node =3D rb_first(&cluster->root); > + offset +=3D sizeof(struct btrfs_free_space_entry); > + if (offset + sizeof(struct btrfs_free_space_entry) >=3D > + PAGE_CACHE_SIZE) > + next_page =3D true; > + entry++; > + } > + > + /* > + * We want to write the extent in the cluster to our free space > + * cache. > + */ > + while (cluster && !next_page) { > + struct btrfs_free_space *e; > + > + e =3D cluster->info; > + if (!e || !e->bytes) > + break; > + > + entries++; > + > + entry->offset =3D cpu_to_le64(e->offset); > + entry->bytes =3D cpu_to_le64(e->bytes); > + entry->type =3D BTRFS_FREE_SPACE_EXTENT; > + > + if (list_is_last(&cluster->block_group_list, > + &block_group->cluster_list)) > cluster =3D NULL; > - } > + else > + cluster =3D list_entry( > + cluster->block_group_list.next, > + struct btrfs_free_cluster, > + block_group_list); > + > offset +=3D sizeof(struct btrfs_free_space_entry); > if (offset + sizeof(struct btrfs_free_space_entry) >=3D > PAGE_CACHE_SIZE) > @@ -1125,19 +1195,30 @@ static void unlink_free_space(struct btrfs_fr= ee_space_ctl *ctl, > ctl->free_space -=3D info->bytes; > } > =20 > +static int __link_free_space(struct btrfs_free_space_ctl *ctl, > + struct btrfs_free_space *info) > +{ > + int ret; > + > + ret =3D tree_insert_offset(&ctl->free_space_offset, info->offset, > + &info->offset_index, (info->bitmap !=3D NULL)); > + if (ret) > + return ret; > + ctl->free_extents++; > + return 0; > +} > + > static int link_free_space(struct btrfs_free_space_ctl *ctl, > struct btrfs_free_space *info) > { > - int ret =3D 0; > + int ret; > =20 > BUG_ON(!info->bitmap && !info->bytes); > - ret =3D tree_insert_offset(&ctl->free_space_offset, info->offset, > - &info->offset_index, (info->bitmap !=3D NULL)); > + ret =3D __link_free_space(ctl, info); > if (ret) > return ret; > =20 > ctl->free_space +=3D info->bytes; > - ctl->free_extents++; > return ret; > } > =20 > @@ -1212,7 +1293,7 @@ static void bitmap_clear_bits(struct btrfs_free= _space_ctl *ctl, > =20 > static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, > struct btrfs_free_space *info, u64 offset, > - u64 bytes) > + u64 bytes, bool update_stat) > { > unsigned long start, count; > =20 > @@ -1223,7 +1304,8 @@ static void bitmap_set_bits(struct btrfs_free_s= pace_ctl *ctl, > bitmap_set(info->bitmap, start, count); > =20 > info->bytes +=3D bytes; > - ctl->free_space +=3D bytes; > + if (update_stat) > + ctl->free_space +=3D bytes; > } > =20 > static int search_bitmap(struct btrfs_free_space_ctl *ctl, > @@ -1394,7 +1476,7 @@ again: > =20 > static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, > struct btrfs_free_space *info, u64 offset, > - u64 bytes) > + u64 bytes, bool update_stat) > { > u64 bytes_to_set =3D 0; > u64 end; > @@ -1403,7 +1485,7 @@ static u64 add_bytes_to_bitmap(struct btrfs_fre= e_space_ctl *ctl, > =20 > bytes_to_set =3D min(end - offset, bytes); > =20 > - bitmap_set_bits(ctl, info, offset, bytes_to_set); > + bitmap_set_bits(ctl, info, offset, bytes_to_set, update_stat); > =20 > return bytes_to_set; > =20 > @@ -1451,10 +1533,9 @@ static struct btrfs_free_space_op free_space_o= p =3D { > }; > =20 > static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, > - struct btrfs_free_space *info) > + struct btrfs_free_space *info, bool update_stat) > { > struct btrfs_free_space *bitmap_info; > - struct btrfs_block_group_cache *block_group =3D NULL; > int added =3D 0; > u64 bytes, offset, bytes_added; > int ret; > @@ -1465,49 +1546,7 @@ static int insert_into_bitmap(struct btrfs_fre= e_space_ctl *ctl, > if (!ctl->op->use_bitmap(ctl, info)) > return 0; > =20 > - if (ctl->op =3D=3D &free_space_op) > - block_group =3D ctl->private; > again: > - /* > - * Since we link bitmaps right into the cluster we need to see if w= e > - * have a cluster here, and if so and it has our bitmap we need to = add > - * the free space to that bitmap. > - */ > - if (block_group && !list_empty(&block_group->cluster_list)) { > - struct btrfs_free_cluster *cluster; > - struct rb_node *node; > - struct btrfs_free_space *entry; > - > - cluster =3D list_entry(block_group->cluster_list.next, > - struct btrfs_free_cluster, > - block_group_list); > - spin_lock(&cluster->lock); > - node =3D rb_first(&cluster->root); > - if (!node) { > - spin_unlock(&cluster->lock); > - goto no_cluster_bitmap; > - } > - > - entry =3D rb_entry(node, struct btrfs_free_space, offset_index); > - if (!entry->bitmap) { > - spin_unlock(&cluster->lock); > - goto no_cluster_bitmap; > - } > - > - if (entry->offset =3D=3D offset_to_bitmap(ctl, offset)) { > - bytes_added =3D add_bytes_to_bitmap(ctl, entry, > - offset, bytes); > - bytes -=3D bytes_added; > - offset +=3D bytes_added; > - } > - spin_unlock(&cluster->lock); > - if (!bytes) { > - ret =3D 1; > - goto out; > - } > - } > - > -no_cluster_bitmap: > bitmap_info =3D tree_search_offset(ctl, offset_to_bitmap(ctl, offse= t), > 1, 0); > if (!bitmap_info) { > @@ -1515,7 +1554,8 @@ no_cluster_bitmap: > goto new_bitmap; > } > =20 > - bytes_added =3D add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes= ); > + bytes_added =3D add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes= , > + update_stat); > bytes -=3D bytes_added; > offset +=3D bytes_added; > added =3D 0; > @@ -1533,6 +1573,12 @@ new_bitmap: > info =3D NULL; > goto again; > } else { > + if (current->flags & PF_MEMALLOC) { > + printk(KERN_INFO "Alloc memory when reclaim " > + "the pages\n"); > + return 0; > + } > + > spin_unlock(&ctl->tree_lock); > =20 > /* no pre-allocated info, allocate a new one */ > @@ -1635,7 +1681,7 @@ int __btrfs_add_free_space(struct btrfs_free_sp= ace_ctl *ctl, > * extent then we know we're going to have to allocate a new extent= , so > * before we do that see if we need to drop this into a bitmap > */ > - ret =3D insert_into_bitmap(ctl, info); > + ret =3D insert_into_bitmap(ctl, info, true); > if (ret < 0) { > goto out; > } else if (ret) { > @@ -1829,36 +1875,50 @@ __btrfs_return_cluster_to_free_space( > { > struct btrfs_free_space_ctl *ctl =3D block_group->free_space_ctl; > struct btrfs_free_space *entry; > - struct rb_node *node; > + int ret =3D 0; > =20 > spin_lock(&cluster->lock); > - if (cluster->block_group !=3D block_group) > + if (cluster->block_group !=3D block_group) { > + spin_unlock(&cluster->lock); > goto out; > + } > =20 > cluster->block_group =3D NULL; > - cluster->window_start =3D 0; > + > + entry =3D cluster->info; > + cluster->info =3D NULL; > + > list_del_init(&cluster->block_group_list); > + spin_unlock(&cluster->lock); > =20 > - node =3D rb_first(&cluster->root); > - while (node) { > - bool bitmap; > + if (!entry) > + goto out; > =20 > - entry =3D rb_entry(node, struct btrfs_free_space, offset_index); > - node =3D rb_next(&entry->offset_index); > - rb_erase(&entry->offset_index, &cluster->root); > - > - bitmap =3D (entry->bitmap !=3D NULL); > - if (!bitmap) > - try_merge_free_space(ctl, entry, false); > - tree_insert_offset(&ctl->free_space_offset, > - entry->offset, &entry->offset_index, bitmap); > + if (!entry->bytes) { > + kmem_cache_free(btrfs_free_space_cachep, entry); > + goto out; > } > - cluster->root =3D RB_ROOT; > =20 > + if (try_merge_free_space(ctl, entry, false)) > + goto link; > + > + ret =3D insert_into_bitmap(ctl, entry, false); > + if (ret < 0) > + goto out; > + else if (ret) { > + ret =3D 0; > + goto out; > + } > +link: > + ret =3D __link_free_space(ctl, entry); > + if (ret) { > + kmem_cache_free(btrfs_free_space_cachep, entry); > + printk(KERN_ERR "Duplicate entries in free space cache, " > + "dumping\n"); > + } > out: > - spin_unlock(&cluster->lock); > btrfs_put_block_group(block_group); > - return 0; > + return ret; > } > =20 > void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_= ctl *ctl) > @@ -1889,29 +1949,58 @@ void __btrfs_remove_free_space_cache(struct b= trfs_free_space_ctl *ctl) > spin_unlock(&ctl->tree_lock); > } > =20 > -void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *b= lock_group) > +static void __btrfs_reclaim_extent_clusters( > + struct btrfs_block_group_cache *block_group, > + u64 to_reclaim) > { > - struct btrfs_free_space_ctl *ctl =3D block_group->free_space_ctl; > struct btrfs_free_cluster *cluster; > struct list_head *head; > + u64 bytes; > + bool reclaim_all =3D (to_reclaim =3D=3D 0); > =20 > - spin_lock(&ctl->tree_lock); > while ((head =3D block_group->cluster_list.next) !=3D > &block_group->cluster_list) { > cluster =3D list_entry(head, struct btrfs_free_cluster, > block_group_list); > =20 > WARN_ON(cluster->block_group !=3D block_group); > + > + bytes =3D cluster->info->bytes; > __btrfs_return_cluster_to_free_space(block_group, cluster); > + > + if (!reclaim_all) { > + if (to_reclaim > bytes) > + to_reclaim -=3D bytes; > + else { > + to_reclaim =3D 0; > + break; > + } > + } > + > if (need_resched()) { > - spin_unlock(&ctl->tree_lock); > + spin_unlock(&block_group->free_space_ctl->tree_lock); > cond_resched(); > - spin_lock(&ctl->tree_lock); > + spin_lock(&block_group->free_space_ctl->tree_lock); > } > } > +} > + > +void btrfs_reclaim_extent_clusters(struct btrfs_block_group_cache *b= lock_group, > + u64 to_reclaim) > +{ > + spin_lock(&block_group->free_space_ctl->tree_lock); > + __btrfs_reclaim_extent_clusters(block_group, to_reclaim); > + spin_unlock(&block_group->free_space_ctl->tree_lock); > +} > + > +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *b= lock_group) > +{ > + struct btrfs_free_space_ctl *ctl =3D block_group->free_space_ctl; > + > + spin_lock(&ctl->tree_lock); > + __btrfs_reclaim_extent_clusters(block_group, 0); > __btrfs_remove_free_space_cache_locked(ctl); > spin_unlock(&ctl->tree_lock); > - > } > =20 > u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block= _group, > @@ -1991,30 +2080,6 @@ int btrfs_return_cluster_to_free_space( > return ret; > } > =20 > -static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *b= lock_group, > - struct btrfs_free_cluster *cluster, > - struct btrfs_free_space *entry, > - u64 bytes, u64 min_start) > -{ > - struct btrfs_free_space_ctl *ctl =3D block_group->free_space_ctl; > - int err; > - u64 search_start =3D cluster->window_start; > - u64 search_bytes =3D bytes; > - u64 ret =3D 0; > - > - search_start =3D min_start; > - search_bytes =3D bytes; > - > - err =3D search_bitmap(ctl, entry, &search_start, &search_bytes); > - if (err) > - return 0; > - > - ret =3D search_start; > - __bitmap_clear_bits(ctl, entry, ret, bytes); > - > - return ret; > -} > - > /* > * given a cluster, try to allocate 'bytes' from it, returns 0 > * if it couldn't find anything suitably large, or a logical disk of= fset > @@ -2025,56 +2090,26 @@ u64 btrfs_alloc_from_cluster(struct btrfs_blo= ck_group_cache *block_group, > u64 min_start) > { > struct btrfs_free_space_ctl *ctl =3D block_group->free_space_ctl; > - struct btrfs_free_space *entry =3D NULL; > - struct rb_node *node; > u64 ret =3D 0; > =20 > spin_lock(&cluster->lock); > - if (bytes > cluster->max_size) > - goto out; > - > if (cluster->block_group !=3D block_group) > goto out; > =20 > - node =3D rb_first(&cluster->root); > - if (!node) > + /* > + * If we set ->block_group, it means we have filled this cluster, > + * and ->info also has been set. So we needn't check ->info is > + * NULL or not now. > + */ > + if (bytes > cluster->info->bytes) > goto out; > =20 > - entry =3D rb_entry(node, struct btrfs_free_space, offset_index); > - while(1) { > - if (entry->bytes < bytes || > - (!entry->bitmap && entry->offset < min_start)) { > - node =3D rb_next(&entry->offset_index); > - if (!node) > - break; > - entry =3D rb_entry(node, struct btrfs_free_space, > - offset_index); > - continue; > - } > - > - if (entry->bitmap) { > - ret =3D btrfs_alloc_from_bitmap(block_group, > - cluster, entry, bytes, > - min_start); > - if (ret =3D=3D 0) { > - node =3D rb_next(&entry->offset_index); > - if (!node) > - break; > - entry =3D rb_entry(node, struct btrfs_free_space, > - offset_index); > - continue; > - } > - } else { > - ret =3D entry->offset; > - > - entry->offset +=3D bytes; > - entry->bytes -=3D bytes; > - } > + if (min_start >=3D cluster->info->offset + cluster->info->bytes) > + goto out; > =20 > - if (entry->bytes =3D=3D 0) > - rb_erase(&entry->offset_index, &cluster->root); > - break; > - } > + ret =3D cluster->info->offset; > + cluster->info->offset +=3D bytes; > + cluster->info->bytes -=3D bytes; > out: > spin_unlock(&cluster->lock); > =20 > @@ -2082,260 +2117,12 @@ out: > return 0; > =20 > spin_lock(&ctl->tree_lock); > - > ctl->free_space -=3D bytes; > - if (entry->bytes =3D=3D 0) { > - ctl->free_extents--; > - if (entry->bitmap) { > - kfree(entry->bitmap); > - ctl->total_bitmaps--; > - ctl->op->recalc_thresholds(ctl); > - } > - kmem_cache_free(btrfs_free_space_cachep, entry); > - } > - > spin_unlock(&ctl->tree_lock); > =20 > return ret; > } > =20 > -static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *bloc= k_group, > - struct btrfs_free_space *entry, > - struct btrfs_free_cluster *cluster, > - u64 offset, u64 bytes, u64 min_bytes) > -{ > - struct btrfs_free_space_ctl *ctl =3D block_group->free_space_ctl; > - unsigned long next_zero; > - unsigned long i; > - unsigned long search_bits; > - unsigned long total_bits; > - unsigned long found_bits; > - unsigned long start =3D 0; > - unsigned long total_found =3D 0; > - int ret; > - bool found =3D false; > - > - i =3D offset_to_bit(entry->offset, block_group->sectorsize, > - max_t(u64, offset, entry->offset)); > - search_bits =3D bytes_to_bits(bytes, block_group->sectorsize); > - total_bits =3D bytes_to_bits(min_bytes, block_group->sectorsize); > - > -again: > - found_bits =3D 0; > - for (i =3D find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); > - i < BITS_PER_BITMAP; > - i =3D find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { > - next_zero =3D find_next_zero_bit(entry->bitmap, > - BITS_PER_BITMAP, i); > - if (next_zero - i >=3D search_bits) { > - found_bits =3D next_zero - i; > - break; > - } > - i =3D next_zero; > - } > - > - if (!found_bits) > - return -ENOSPC; > - > - if (!found) { > - start =3D i; > - found =3D true; > - } > - > - total_found +=3D found_bits; > - > - if (cluster->max_size < found_bits * block_group->sectorsize) > - cluster->max_size =3D found_bits * block_group->sectorsize; > - > - if (total_found < total_bits) { > - i =3D find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); > - if (i - start > total_bits * 2) { > - total_found =3D 0; > - cluster->max_size =3D 0; > - found =3D false; > - } > - goto again; > - } > - > - cluster->window_start =3D start * block_group->sectorsize + > - entry->offset; > - rb_erase(&entry->offset_index, &ctl->free_space_offset); > - ret =3D tree_insert_offset(&cluster->root, entry->offset, > - &entry->offset_index, 1); > - BUG_ON(ret); > - > - return 0; > -} > - > -/* > - * This searches the block group for just extents to fill the cluste= r with. > - */ > -static noinline int > -setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, > - struct btrfs_free_cluster *cluster, > - struct list_head *bitmaps, u64 offset, u64 bytes, > - u64 min_bytes) > -{ > - struct btrfs_free_space_ctl *ctl =3D block_group->free_space_ctl; > - struct btrfs_free_space *first =3D NULL; > - struct btrfs_free_space *entry =3D NULL; > - struct btrfs_free_space *prev =3D NULL; > - struct btrfs_free_space *last; > - struct rb_node *node; > - u64 window_start; > - u64 window_free; > - u64 max_extent; > - u64 max_gap =3D 128 * 1024; > - > - entry =3D tree_search_offset(ctl, offset, 0, 1); > - if (!entry) > - return -ENOSPC; > - > - /* > - * We don't want bitmaps, so just move along until we find a normal > - * extent entry. > - */ > - while (entry->bitmap) { > - if (list_empty(&entry->list)) > - list_add_tail(&entry->list, bitmaps); > - node =3D rb_next(&entry->offset_index); > - if (!node) > - return -ENOSPC; > - entry =3D rb_entry(node, struct btrfs_free_space, offset_index); > - } > - > - window_start =3D entry->offset; > - window_free =3D entry->bytes; > - max_extent =3D entry->bytes; > - first =3D entry; > - last =3D entry; > - prev =3D entry; > - > - while (window_free <=3D min_bytes) { > - node =3D rb_next(&entry->offset_index); > - if (!node) > - return -ENOSPC; > - entry =3D rb_entry(node, struct btrfs_free_space, offset_index); > - > - if (entry->bitmap) { > - if (list_empty(&entry->list)) > - list_add_tail(&entry->list, bitmaps); > - continue; > - } > - > - /* > - * we haven't filled the empty size and the window is > - * very large. reset and try again > - */ > - if (entry->offset - (prev->offset + prev->bytes) > max_gap || > - entry->offset - window_start > (min_bytes * 2)) { > - first =3D entry; > - window_start =3D entry->offset; > - window_free =3D entry->bytes; > - last =3D entry; > - max_extent =3D entry->bytes; > - } else { > - last =3D entry; > - window_free +=3D entry->bytes; > - if (entry->bytes > max_extent) > - max_extent =3D entry->bytes; > - } > - prev =3D entry; > - } > - > - cluster->window_start =3D first->offset; > - > - node =3D &first->offset_index; > - > - /* > - * now we've found our entries, pull them out of the free space > - * cache and put them into the cluster rbtree > - */ > - do { > - int ret; > - > - entry =3D rb_entry(node, struct btrfs_free_space, offset_index); > - node =3D rb_next(&entry->offset_index); > - if (entry->bitmap) > - continue; > - > - rb_erase(&entry->offset_index, &ctl->free_space_offset); > - ret =3D tree_insert_offset(&cluster->root, entry->offset, > - &entry->offset_index, 0); > - BUG_ON(ret); > - } while (node && entry !=3D last); > - > - cluster->max_size =3D max_extent; > - > - return 0; > -} > - > -/* > - * This specifically looks for bitmaps that may work in the cluster,= we assume > - * that we have already failed to find extents that will work. > - */ > -static noinline int > -setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, > - struct btrfs_free_cluster *cluster, > - struct list_head *bitmaps, u64 offset, u64 bytes, > - u64 min_bytes) > -{ > - struct btrfs_free_space_ctl *ctl =3D block_group->free_space_ctl; > - struct btrfs_free_space *entry; > - struct rb_node *node; > - int ret =3D -ENOSPC; > - > - if (ctl->total_bitmaps =3D=3D 0) > - return -ENOSPC; > - > - /* > - * First check our cached list of bitmaps and see if there is an en= try > - * here that will work. > - */ > - list_for_each_entry(entry, bitmaps, list) { > - if (entry->bytes < min_bytes) > - continue; > - ret =3D btrfs_bitmap_cluster(block_group, entry, cluster, offset, > - bytes, min_bytes); > - if (!ret) > - return 0; > - } > - > - /* > - * If we do have entries on our list and we are here then we didn't= find > - * anything, so go ahead and get the next entry after the last entr= y in > - * this list and start the search from there. > - */ > - if (!list_empty(bitmaps)) { > - entry =3D list_entry(bitmaps->prev, struct btrfs_free_space, > - list); > - node =3D rb_next(&entry->offset_index); > - if (!node) > - return -ENOSPC; > - entry =3D rb_entry(node, struct btrfs_free_space, offset_index); > - goto search; > - } > - > - entry =3D tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0,= 1); > - if (!entry) > - return -ENOSPC; > - > -search: > - node =3D &entry->offset_index; > - do { > - entry =3D rb_entry(node, struct btrfs_free_space, offset_index); > - node =3D rb_next(&entry->offset_index); > - if (!entry->bitmap) > - continue; > - if (entry->bytes < min_bytes) > - continue; > - ret =3D btrfs_bitmap_cluster(block_group, entry, cluster, offset, > - bytes, min_bytes); > - } while (ret && node); > - > - return ret; > -} > - > /* > * here we try to find a cluster of blocks in a block group. The go= al > * is to find at least bytes free and up to empty_size + bytes free. > @@ -2351,11 +2138,12 @@ int btrfs_find_space_cluster(struct btrfs_tra= ns_handle *trans, > u64 offset, u64 bytes, u64 empty_size) > { > struct btrfs_free_space_ctl *ctl =3D block_group->free_space_ctl; > - struct list_head bitmaps; > - struct btrfs_free_space *entry, *tmp; > + struct btrfs_free_space *entry, *info; > u64 min_bytes; > int ret; > =20 > + BUG_ON(cluster->info); > + > /* for metadata, allow allocates with more holes */ > if (btrfs_test_opt(root, SSD_SPREAD)) { > min_bytes =3D bytes + empty_size; > @@ -2369,9 +2157,16 @@ int btrfs_find_space_cluster(struct btrfs_tran= s_handle *trans, > min_bytes =3D max(bytes, (bytes + empty_size) >> 1); > else > min_bytes =3D max(bytes, (bytes + empty_size) >> 4); > + min_bytes =3D max(min_bytes, (u64)256 * 1024); > } else > min_bytes =3D max(bytes, (bytes + empty_size) >> 2); > =20 > + min_bytes =3D round_up(min_bytes, root->sectorsize); > + > + info =3D kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); > + if (!info) > + return -ENOMEM; > + > spin_lock(&ctl->tree_lock); > =20 > /* > @@ -2380,6 +2175,7 @@ int btrfs_find_space_cluster(struct btrfs_trans= _handle *trans, > */ > if (ctl->free_space < min_bytes) { > spin_unlock(&ctl->tree_lock); > + kmem_cache_free(btrfs_free_space_cachep, info); > return -ENOSPC; > } > =20 > @@ -2388,26 +2184,44 @@ int btrfs_find_space_cluster(struct btrfs_tra= ns_handle *trans, > /* someone already found a cluster, hooray */ > if (cluster->block_group) { > ret =3D 0; > + kmem_cache_free(btrfs_free_space_cachep, info); > goto out; > } > =20 > - INIT_LIST_HEAD(&bitmaps); > - ret =3D setup_cluster_no_bitmap(block_group, cluster, &bitmaps, off= set, > - bytes, min_bytes); > - if (ret) > - ret =3D setup_cluster_bitmap(block_group, cluster, &bitmaps, > - offset, bytes, min_bytes); > + bytes =3D min_bytes; > + entry =3D find_free_space(ctl, &offset, &min_bytes); > + if (!entry) { > + ret =3D -ENOSPC; > + kmem_cache_free(btrfs_free_space_cachep, info); > + goto out; > + } > =20 > - /* Clear our temporary list */ > - list_for_each_entry_safe(entry, tmp, &bitmaps, list) > - list_del_init(&entry->list); > + BUG_ON(min_bytes < bytes); > =20 > - if (!ret) { > - atomic_inc(&block_group->count); > - list_add_tail(&cluster->block_group_list, > - &block_group->cluster_list); > - cluster->block_group =3D block_group; > + ret =3D 0; > + if (entry->bitmap) { > + __bitmap_clear_bits(ctl, entry, offset, bytes); > + if (!entry->bytes) > + free_bitmap(ctl, entry); > + } else { > + __unlink_free_space(ctl, entry); > + entry->offset +=3D bytes; > + entry->bytes -=3D bytes; > + if (!entry->bytes) > + kmem_cache_free(btrfs_free_space_cachep, entry); > + else > + __link_free_space(ctl, entry); > } > + > + info->offset =3D offset; > + info->bytes =3D bytes; > + > + cluster->info =3D info; > + > + atomic_inc(&block_group->count); > + list_add_tail(&cluster->block_group_list, > + &block_group->cluster_list); > + cluster->block_group =3D block_group; > out: > spin_unlock(&cluster->lock); > spin_unlock(&ctl->tree_lock); > @@ -2421,11 +2235,10 @@ out: > void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) > { > spin_lock_init(&cluster->lock); > - spin_lock_init(&cluster->refill_lock); > - cluster->root =3D RB_ROOT; > - cluster->max_size =3D 0; > + mutex_init(&cluster->refill_lock); > INIT_LIST_HEAD(&cluster->block_group_list); > cluster->block_group =3D NULL; > + cluster->info =3D NULL; > } > =20 > int btrfs_trim_block_group(struct btrfs_block_group_cache *block_gro= up, > @@ -2665,3 +2478,27 @@ int btrfs_write_out_ino_cache(struct btrfs_roo= t *root, > iput(inode); > return ret; > } > + > +struct btrfs_free_cluster *btrfs_alloc_free_cluster(void) > +{ > + struct btrfs_free_cluster *cluster; > + > + cluster =3D kmem_cache_zalloc(btrfs_free_cluster_cachep, GFP_NOFS); > + if (!cluster) > + return NULL; > + > + btrfs_init_free_cluster(cluster); > + return cluster; > +} > + > +void btrfs_release_free_cluster(struct btrfs_free_cluster *cluster) > +{ > + /* return the free space in the cluster to the space cache. */ > + mutex_lock(&cluster->refill_lock); > + btrfs_return_cluster_to_free_space(NULL, cluster); > + mutex_unlock(&cluster->refill_lock); > + > + /* free the cluster. */ > + kmem_cache_free(btrfs_free_cluster_cachep, cluster); > +} > + > diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.= h > index c27ccba..1f7e75c 100644 > --- a/fs/btrfs/free-space-cache.h > +++ b/fs/btrfs/free-space-cache.h > @@ -34,6 +34,7 @@ struct btrfs_free_space_ctl { > int extents_thresh; > int free_extents; > int total_bitmaps; > + int total_clusters; > int unit; > u64 start; > struct btrfs_free_space_op *op; > @@ -113,4 +114,16 @@ int btrfs_return_cluster_to_free_space( > struct btrfs_free_cluster *cluster); > int btrfs_trim_block_group(struct btrfs_block_group_cache *block_gro= up, > u64 *trimmed, u64 start, u64 end, u64 minlen); > +void btrfs_reclaim_extent_clusters(struct btrfs_block_group_cache *b= lock_group, > + u64 to_reclaim); > +struct btrfs_free_cluster *btrfs_alloc_free_cluster(void); > +void btrfs_release_free_cluster(struct btrfs_free_cluster *cluster); > +static inline void btrfs_free_extent_cluster(struct extent_buffer *e= b) > +{ > + if (!eb->cluster) > + return; > + > + btrfs_release_free_cluster(eb->cluster); > + eb->cluster =3D NULL; > +} > #endif > diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c > index b4087e0..d501c1f 100644 > --- a/fs/btrfs/inode-map.c > +++ b/fs/btrfs/inode-map.c > @@ -457,6 +457,7 @@ again: > spin_unlock(&root->cache_lock); > =20 > spin_lock(&ctl->tree_lock); > + WARN_ON(ctl->total_clusters); > prealloc =3D sizeof(struct btrfs_free_space) * ctl->free_extents; > prealloc =3D ALIGN(prealloc, PAGE_CACHE_SIZE); > prealloc +=3D ctl->total_bitmaps * PAGE_CACHE_SIZE; > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 7a9e01f..38a5da9 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -51,7 +51,6 @@ > #include "tree-log.h" > #include "compression.h" > #include "locking.h" > -#include "free-space-cache.h" > #include "inode-map.h" > =20 > struct btrfs_iget_args { > @@ -623,11 +622,10 @@ retry: > trans =3D btrfs_join_transaction(root); > BUG_ON(IS_ERR(trans)); > trans->block_rsv =3D &root->fs_info->delalloc_block_rsv; > - ret =3D btrfs_reserve_extent(trans, root, > - async_extent->compressed_size, > - async_extent->compressed_size, > - 0, alloc_hint, > - (u64)-1, &ins, 1); > + ret =3D btrfs_reserve_extent_data(trans, root, > + async_extent->compressed_size, > + async_extent->compressed_size, > + 0, alloc_hint, (u64)-1, &ins); > btrfs_end_transaction(trans, root); > =20 > if (ret) { > @@ -828,9 +826,9 @@ static noinline int cow_file_range(struct inode *= inode, > unsigned long op; > =20 > cur_alloc_size =3D disk_num_bytes; > - ret =3D btrfs_reserve_extent(trans, root, cur_alloc_size, > - root->sectorsize, 0, alloc_hint, > - (u64)-1, &ins, 1); > + ret =3D btrfs_reserve_extent_data(trans, root, cur_alloc_size, > + root->sectorsize, 0, alloc_hint, > + (u64)-1, &ins); > BUG_ON(ret); > =20 > em =3D alloc_extent_map(); > @@ -5409,8 +5407,8 @@ static struct extent_map *btrfs_new_extent_dire= ct(struct inode *inode, > trans->block_rsv =3D &root->fs_info->delalloc_block_rsv; > =20 > alloc_hint =3D get_extent_allocation_hint(inode, start, len); > - ret =3D btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, > - alloc_hint, (u64)-1, &ins, 1); > + ret =3D btrfs_reserve_extent_data(trans, root, len, root->sectorsiz= e, 0, > + alloc_hint, (u64)-1, &ins); > if (ret) { > em =3D ERR_PTR(ret); > goto out; > @@ -7276,8 +7274,9 @@ static int __btrfs_prealloc_file_range(struct i= node *inode, int mode, > } > } > =20 > - ret =3D btrfs_reserve_extent(trans, root, num_bytes, min_size, > - 0, *alloc_hint, (u64)-1, &ins, 1); > + ret =3D btrfs_reserve_extent_data(trans, root, num_bytes, > + min_size, 0, *alloc_hint, > + (u64)-1, &ins); > if (ret) { > if (own_trans) > btrfs_end_transaction(trans, root); > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > index 970977a..808203c 100644 > --- a/fs/btrfs/ioctl.c > +++ b/fs/btrfs/ioctl.c > @@ -347,7 +347,7 @@ static noinline int create_subvol(struct btrfs_ro= ot *root, > if (IS_ERR(trans)) > return PTR_ERR(trans); > =20 > - leaf =3D btrfs_alloc_free_block(trans, root, root->leafsize, > + leaf =3D btrfs_alloc_free_block(trans, root, root->leafsize, NULL, > 0, objectid, NULL, 0, 0, 0); > if (IS_ERR(leaf)) { > ret =3D PTR_ERR(leaf); > diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c > index 786639f..21d08e2 100644 > --- a/fs/btrfs/tree-log.c > +++ b/fs/btrfs/tree-log.c > @@ -25,6 +25,7 @@ > #include "print-tree.h" > #include "compat.h" > #include "tree-log.h" > +#include "free-space-cache.h" > =20 > /* magic values for the inode_only field in btrfs_log_inode: > * > @@ -1763,6 +1764,8 @@ static noinline int walk_down_log_tree(struct b= trfs_trans_handle *trans, > ret =3D btrfs_free_reserved_extent(root, > bytenr, blocksize); > BUG_ON(ret); > + > + btrfs_free_extent_cluster(next); > } > free_extent_buffer(next); > continue; > @@ -1832,6 +1835,8 @@ static noinline int walk_up_log_tree(struct btr= fs_trans_handle *trans, > path->nodes[*level]->start, > path->nodes[*level]->len); > BUG_ON(ret); > + > + btrfs_free_extent_cluster(next); > } > free_extent_buffer(path->nodes[*level]); > path->nodes[*level] =3D NULL; > @@ -1900,6 +1905,8 @@ static int walk_log_tree(struct btrfs_trans_han= dle *trans, > ret =3D btrfs_free_reserved_extent(log, next->start, > next->len); > BUG_ON(ret); > + > + btrfs_free_extent_cluster(next); > } > } > =20 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" = in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html