* [PATCH] ext4: Don't allow lg prealloc list to be grow large.
@ 2008-07-21 9:40 Aneesh Kumar K.V
2008-07-21 19:37 ` Eric Sandeen
0 siblings, 1 reply; 7+ messages in thread
From: Aneesh Kumar K.V @ 2008-07-21 9:40 UTC (permalink / raw)
To: cmm, tytso, sandeen; +Cc: linux-ext4, Aneesh Kumar K.V
The locality group prealloc list is freed only when there is a block allocation
failure. This can result in large number of per cpu locality group prealloc space
and also make the ext4_mb_use_preallocated expensive. Add a tunable max_lg_prealloc
which default to 1000. If we have more than 1000 Per-CPU prealloc space and if we
fail to find a suitable prealloc space during allocation we will now free all
the prealloc space in the locality group.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
fs/ext4/ext4_sb.h | 1 +
fs/ext4/mballoc.c | 151 +++++++++++++++++++++++++++++++++++++++-------------
fs/ext4/mballoc.h | 6 ++
3 files changed, 120 insertions(+), 38 deletions(-)
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 6300226..f8bf8b0 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -115,6 +115,7 @@ struct ext4_sb_info {
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
+ unsigned long s_mb_max_lg_prealloc;
/* history to debug policy */
struct ext4_mb_history *s_mb_history;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9db0f4d..4139da0 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2540,6 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
+ sbi->s_mb_max_lg_prealloc = MB_DEFAULT_LG_PREALLOC;
i = sizeof(struct ext4_locality_group) * NR_CPUS;
sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
@@ -2720,6 +2721,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
#define EXT4_MB_ORDER2_REQ "order2_req"
#define EXT4_MB_STREAM_REQ "stream_req"
#define EXT4_MB_GROUP_PREALLOC "group_prealloc"
+#define EXT4_MB_MAX_LG_PREALLOC "max_lg_prealloc"
@@ -2769,6 +2771,7 @@ MB_PROC_FOPS(min_to_scan);
MB_PROC_FOPS(order2_reqs);
MB_PROC_FOPS(stream_request);
MB_PROC_FOPS(group_prealloc);
+MB_PROC_FOPS(max_lg_prealloc);
#define MB_PROC_HANDLER(name, var) \
do { \
@@ -2800,11 +2803,13 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs);
MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request);
MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc);
+ MB_PROC_HANDLER(EXT4_MB_MAX_LG_PREALLOC, max_lg_prealloc);
return 0;
err_out:
printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname);
+ remove_proc_entry(EXT4_MB_MAX_LG_PREALLOC, sbi->s_mb_proc);
remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
@@ -2826,6 +2831,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
return -EINVAL;
bdevname(sb->s_bdev, devname);
+ remove_proc_entry(EXT4_MB_MAX_LG_PREALLOC, sbi->s_mb_proc);
remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
@@ -3280,6 +3286,107 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
}
+static noinline_for_stack int
+ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+ struct ext4_prealloc_space *pa,
+ struct ext4_allocation_context *ac)
+{
+ struct super_block *sb = e4b->bd_sb;
+ ext4_group_t group;
+ ext4_grpblk_t bit;
+
+ if (ac)
+ ac->ac_op = EXT4_MB_HISTORY_DISCARD;
+
+ BUG_ON(pa->pa_deleted == 0);
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
+ BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
+ mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
+ atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
+
+ if (ac) {
+ ac->ac_sb = sb;
+ ac->ac_inode = NULL;
+ ac->ac_b_ex.fe_group = group;
+ ac->ac_b_ex.fe_start = bit;
+ ac->ac_b_ex.fe_len = pa->pa_len;
+ ac->ac_b_ex.fe_logical = 0;
+ ext4_mb_store_history(ac);
+ }
+
+ return 0;
+}
+
+static void ext4_mb_pa_callback(struct rcu_head *head)
+{
+ struct ext4_prealloc_space *pa;
+ pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
+ kmem_cache_free(ext4_pspace_cachep, pa);
+}
+
+/*
+ * release the locality group prealloc space.
+ * called with lg_mutex held
+ */
+static noinline_for_stack void
+ext4_mb_discard_lg_preallocations(struct super_block *sb,
+ struct ext4_locality_group *lg)
+{
+ ext4_group_t group = 0;
+ struct list_head list;
+ struct ext4_buddy e4b;
+ struct ext4_allocation_context *ac;
+ struct ext4_prealloc_space *pa, *tmp;
+
+ INIT_LIST_HEAD(&list);
+ ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+
+ list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (atomic_read(&pa->pa_count)) {
+ /* This should not happen */
+ spin_unlock(&pa->pa_lock);
+ printk(KERN_ERR "uh-oh! used pa while discarding\n");
+ WARN_ON(1);
+ continue;
+ }
+ if (pa->pa_deleted) {
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ /* only lg prealloc space */
+ BUG_ON(!pa->pa_linear);
+
+ /* seems this one can be freed ... */
+ pa->pa_deleted = 1;
+ spin_unlock(&pa->pa_lock);
+
+ list_del_rcu(&pa->pa_inode_list);
+ list_add(&pa->u.pa_tmp_list, &list);
+ }
+
+ list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
+
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
+ if (ext4_mb_load_buddy(sb, group, &e4b)) {
+ ext4_error(sb, __func__, "Error in loading buddy "
+ "information for %lu\n", group);
+ continue;
+ }
+ ext4_lock_group(sb, group);
+ list_del(&pa->pa_group_list);
+ ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_unlock_group(sb, group);
+
+ ext4_mb_release_desc(&e4b);
+ list_del(&pa->u.pa_tmp_list);
+ call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
+ }
+ if (ac)
+ kmem_cache_free(ext4_ac_cachep, ac);
+ return;
+}
+
/*
* search goal blocks in preallocated space
*/
@@ -3287,8 +3394,10 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
{
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
+ struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_locality_group *lg;
struct ext4_prealloc_space *pa;
+ unsigned long lg_prealloc_count = 0;
/* only data can be preallocated */
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3339,9 +3448,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
return 1;
}
spin_unlock(&pa->pa_lock);
+ lg_prealloc_count++;
}
rcu_read_unlock();
+ if (lg_prealloc_count > sbi->s_mb_max_lg_prealloc)
+ ext4_mb_discard_lg_preallocations(ac->ac_sb, lg);
+
return 0;
}
@@ -3388,13 +3501,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
mb_debug("prellocated %u for group %lu\n", preallocated, group);
}
-static void ext4_mb_pa_callback(struct rcu_head *head)
-{
- struct ext4_prealloc_space *pa;
- pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
- kmem_cache_free(ext4_pspace_cachep, pa);
-}
-
/*
* drops a reference to preallocated space descriptor
* if this was the last reference and the space is consumed
@@ -3676,37 +3782,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
return err;
}
-static noinline_for_stack int
-ext4_mb_release_group_pa(struct ext4_buddy *e4b,
- struct ext4_prealloc_space *pa,
- struct ext4_allocation_context *ac)
-{
- struct super_block *sb = e4b->bd_sb;
- ext4_group_t group;
- ext4_grpblk_t bit;
-
- if (ac)
- ac->ac_op = EXT4_MB_HISTORY_DISCARD;
-
- BUG_ON(pa->pa_deleted == 0);
- ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
- BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
- mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
- atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
-
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = NULL;
- ac->ac_b_ex.fe_group = group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = pa->pa_len;
- ac->ac_b_ex.fe_logical = 0;
- ext4_mb_store_history(ac);
- }
-
- return 0;
-}
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] ext4: Don't allow lg prealloc list to be grow large.
2008-07-21 9:40 Aneesh Kumar K.V
@ 2008-07-21 19:37 ` Eric Sandeen
0 siblings, 0 replies; 7+ messages in thread
From: Eric Sandeen @ 2008-07-21 19:37 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: cmm, tytso, linux-ext4
Aneesh Kumar K.V wrote:
> The locality group prealloc list is freed only when there is a block allocation
> failure. This can result in large number of per cpu locality group prealloc space
> and also make the ext4_mb_use_preallocated expensive. Add a tunable max_lg_prealloc
> which default to 1000. If we have more than 1000 Per-CPU prealloc space and if we
> fail to find a suitable prealloc space during allocation we will now free all
> the prealloc space in the locality group.
It looks like this helps, but does not fare as well as the "perfectly
tuned" default (where the prealloc size is a multiple of the 20k/5 block
file size used in the test.)
I've added a plot of a delalloc run with your patch to the graph at:
http://people.redhat.com/esandeen/ext4/fs_mark.png
-Eric
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH] ext4: Don't allow lg prealloc list to be grow large.
@ 2008-07-22 11:10 Aneesh Kumar K.V
2008-07-22 12:36 ` Aneesh Kumar K.V
2008-07-22 17:42 ` Eric Sandeen
0 siblings, 2 replies; 7+ messages in thread
From: Aneesh Kumar K.V @ 2008-07-22 11:10 UTC (permalink / raw)
To: cmm, tytso, sandeen; +Cc: linux-ext4, Aneesh Kumar K.V
Currently locality group prealloc list is freed only when there is a block allocation
failure. This can result in large number of per cpu locality group prealloc space
and also make the ext4_mb_use_preallocated expensive. Convert the locality group
prealloc list to a hash list. The hash index is the order of number of blocks
in the prealloc space with a max order of 9. When adding prealloc space to the
list we make sure total entries for each order does not exceed 8. If it is more
than 8 we discard few entries and make sure the we have only <= 5 entries.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
fs/ext4/mballoc.c | 266 +++++++++++++++++++++++++++++++++++++++++------------
fs/ext4/mballoc.h | 10 ++-
2 files changed, 215 insertions(+), 61 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5b854b7..e058509 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2481,7 +2481,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
int ext4_mb_init(struct super_block *sb, int needs_recovery)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- unsigned i;
+ unsigned i, j;
unsigned offset;
unsigned max;
int ret;
@@ -2553,7 +2553,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
struct ext4_locality_group *lg;
lg = &sbi->s_locality_groups[i];
mutex_init(&lg->lg_mutex);
- INIT_LIST_HEAD(&lg->lg_prealloc_list);
+ for (j = 0; j < PREALLOC_TB_SIZE; j++)
+ INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
spin_lock_init(&lg->lg_prealloc_lock);
}
@@ -3258,12 +3259,68 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
}
/*
+ * release the locality group prealloc space.
+ * called with lg_mutex held
+ * called with lg->lg_prealloc_lock held
+ */
+static noinline_for_stack void
+ext4_mb_discard_lg_preallocations_prep(struct list_head *discard_list,
+ struct list_head *lg_prealloc_list,
+ int total_entries)
+{
+ struct ext4_prealloc_space *pa;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(pa, lg_prealloc_list, pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (atomic_read(&pa->pa_count)) {
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ if (pa->pa_deleted) {
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ /* only lg prealloc space */
+ BUG_ON(!pa->pa_linear);
+
+ /* seems this one can be freed ... */
+ pa->pa_deleted = 1;
+ spin_unlock(&pa->pa_lock);
+
+
+ list_del_rcu(&pa->pa_inode_list);
+ list_add(&pa->u.pa_tmp_list, discard_list);
+
+ total_entries--;
+ if (total_entries <= 5) {
+ /*
+ * we want to keep only 5 entries
+ * allowing it to grow to 8. This
+ * mak sure we don't call discard
+ * soon for this list.
+ */
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return;
+}
+
+/*
* use blocks preallocated to locality group
+ * called with lg->lg_prealloc_lock held
*/
static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
- struct ext4_prealloc_space *pa)
+ struct ext4_prealloc_space *pa,
+ struct list_head *discard_list)
{
+ int order, added = 0, lg_prealloc_count = 1;
unsigned int len = ac->ac_o_ex.fe_len;
+ struct ext4_prealloc_space *tmp_pa;
+ struct ext4_locality_group *lg = ac->ac_lg;
+
ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
&ac->ac_b_ex.fe_group,
&ac->ac_b_ex.fe_start);
@@ -3278,6 +3335,112 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
* Other CPUs are prevented from allocating from this pa by lg_mutex
*/
mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
+
+ /* remove the pa from the current list and add it to the new list */
+ pa->pa_free -= len;
+ order = fls(pa->pa_free) - 1;
+
+ /* remove from the old list */
+ list_del_rcu(&pa->pa_inode_list);
+
+ list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
+ pa_inode_list) {
+ if (!added && pa->pa_free < tmp_pa->pa_free) {
+ /* Add to the tail of the previous entry */
+ list_add_tail_rcu(&pa->pa_inode_list,
+ tmp_pa->pa_inode_list.prev);
+ added = 1;
+ /* we want to count the total
+ * number of entries in the list
+ */
+ }
+ lg_prealloc_count++;
+ }
+ if (!added)
+ list_add_tail_rcu(&pa->pa_inode_list, &tmp_pa->pa_inode_list);
+
+ /* Now trim the list to be not more than 8 elements */
+ if (lg_prealloc_count > 8) {
+ /*
+ * We can remove the prealloc space from grp->bb_prealloc_list
+ * here because we are holding lg_prealloc_lock and can't take
+ * group lock.
+ */
+ ext4_mb_discard_lg_preallocations_prep(discard_list,
+ &lg->lg_prealloc_list[order],
+ lg_prealloc_count);
+ }
+}
+
+static noinline_for_stack int
+ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+ struct ext4_prealloc_space *pa,
+ struct ext4_allocation_context *ac)
+{
+ struct super_block *sb = e4b->bd_sb;
+ ext4_group_t group;
+ ext4_grpblk_t bit;
+
+ if (ac)
+ ac->ac_op = EXT4_MB_HISTORY_DISCARD;
+
+ BUG_ON(pa->pa_deleted == 0);
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
+ BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
+ mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
+ atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
+
+ if (ac) {
+ ac->ac_sb = sb;
+ ac->ac_inode = NULL;
+ ac->ac_b_ex.fe_group = group;
+ ac->ac_b_ex.fe_start = bit;
+ ac->ac_b_ex.fe_len = pa->pa_len;
+ ac->ac_b_ex.fe_logical = 0;
+ ext4_mb_store_history(ac);
+ }
+
+ return 0;
+}
+
+static void ext4_mb_pa_callback(struct rcu_head *head)
+{
+ struct ext4_prealloc_space *pa;
+ pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
+ kmem_cache_free(ext4_pspace_cachep, pa);
+}
+
+static noinline_for_stack void
+ext4_mb_discard_lg_preallocations_commit(struct super_block *sb,
+ struct list_head *discard_list)
+{
+ ext4_group_t group = 0;
+ struct ext4_buddy e4b;
+ struct ext4_allocation_context *ac;
+ struct ext4_prealloc_space *pa, *tmp;
+
+ ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+
+ list_for_each_entry_safe(pa, tmp, discard_list, u.pa_tmp_list) {
+
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
+ if (ext4_mb_load_buddy(sb, group, &e4b)) {
+ ext4_error(sb, __func__, "Error in loading buddy "
+ "information for %lu\n", group);
+ continue;
+ }
+ ext4_lock_group(sb, group);
+ list_del(&pa->pa_group_list);
+ ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_unlock_group(sb, group);
+
+ ext4_mb_release_desc(&e4b);
+ list_del(&pa->u.pa_tmp_list);
+ call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
+ }
+ if (ac)
+ kmem_cache_free(ext4_ac_cachep, ac);
+ return;
}
/*
@@ -3286,14 +3449,17 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
static noinline_for_stack int
ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
{
+ int order, lg_prealloc_count = 0, i;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg;
struct ext4_prealloc_space *pa;
+ struct list_head discard_list;
/* only data can be preallocated */
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
return 0;
+ INIT_LIST_HEAD(&discard_list);
/* first, try per-file preallocation */
rcu_read_lock();
list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
@@ -3326,22 +3492,39 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
lg = ac->ac_lg;
if (lg == NULL)
return 0;
-
- rcu_read_lock();
- list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
- spin_lock(&pa->pa_lock);
- if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
- atomic_inc(&pa->pa_count);
- ext4_mb_use_group_pa(ac, pa);
+ order = fls(ac->ac_o_ex.fe_len) - 1;
+ if (order > PREALLOC_TB_SIZE - 1)
+ /* The max size of hash table is PREALLOC_TB_SIZE */
+ order = PREALLOC_TB_SIZE - 1;
+ /*
+ * We take the lock on the locality object to prevent a
+ * discard via ext4_mb_discard_group_preallocations
+ */
+ spin_lock(&lg->lg_prealloc_lock);
+ for (i = order; i < PREALLOC_TB_SIZE; i++) {
+ lg_prealloc_count = 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
+ pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (pa->pa_deleted == 0 &&
+ pa->pa_free >= ac->ac_o_ex.fe_len) {
+ atomic_inc(&pa->pa_count);
+ ext4_mb_use_group_pa(ac, pa, &discard_list);
+ spin_unlock(&pa->pa_lock);
+ ac->ac_criteria = 20;
+ rcu_read_unlock();
+ spin_unlock(&lg->lg_prealloc_lock);
+ return 1;
+ }
spin_unlock(&pa->pa_lock);
- ac->ac_criteria = 20;
- rcu_read_unlock();
- return 1;
+ lg_prealloc_count++;
}
- spin_unlock(&pa->pa_lock);
+ rcu_read_unlock();
}
- rcu_read_unlock();
-
+ spin_unlock(&lg->lg_prealloc_lock);
+ ext4_mb_discard_lg_preallocations_commit(ac->ac_sb,
+ &discard_list);
return 0;
}
@@ -3388,13 +3571,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
mb_debug("prellocated %u for group %lu\n", preallocated, group);
}
-static void ext4_mb_pa_callback(struct rcu_head *head)
-{
- struct ext4_prealloc_space *pa;
- pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
- kmem_cache_free(ext4_pspace_cachep, pa);
-}
-
/*
* drops a reference to preallocated space descriptor
* if this was the last reference and the space is consumed
@@ -3543,6 +3719,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
struct ext4_locality_group *lg;
struct ext4_prealloc_space *pa;
struct ext4_group_info *grp;
+ struct list_head discard_list;
/* preallocate only when found space is larger then requested */
BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
@@ -3554,6 +3731,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
if (pa == NULL)
return -ENOMEM;
+ INIT_LIST_HEAD(&discard_list);
/* preallocation can change ac_b_ex, thus we store actually
* allocated blocks for history */
ac->ac_f_ex = ac->ac_b_ex;
@@ -3564,13 +3742,13 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
pa->pa_free = pa->pa_len;
atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock);
+ INIT_LIST_HEAD(&pa->pa_inode_list);
pa->pa_deleted = 0;
pa->pa_linear = 1;
mb_debug("new group pa %p: %llu/%u for %u\n", pa,
pa->pa_pstart, pa->pa_len, pa->pa_lstart);
- ext4_mb_use_group_pa(ac, pa);
atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
@@ -3584,10 +3762,12 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
- spin_lock(pa->pa_obj_lock);
- list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list);
- spin_unlock(pa->pa_obj_lock);
+ /* ext4_mb_use_group_pa will also add the pa to the lg list */
+ spin_lock(&lg->lg_prealloc_lock);
+ ext4_mb_use_group_pa(ac, pa, &discard_list);
+ spin_unlock(&lg->lg_prealloc_lock);
+ ext4_mb_discard_lg_preallocations_commit(sb, &discard_list);
return 0;
}
@@ -3676,37 +3856,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
return err;
}
-static noinline_for_stack int
-ext4_mb_release_group_pa(struct ext4_buddy *e4b,
- struct ext4_prealloc_space *pa,
- struct ext4_allocation_context *ac)
-{
- struct super_block *sb = e4b->bd_sb;
- ext4_group_t group;
- ext4_grpblk_t bit;
-
- if (ac)
- ac->ac_op = EXT4_MB_HISTORY_DISCARD;
-
- BUG_ON(pa->pa_deleted == 0);
- ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
- BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
- mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
- atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
-
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = NULL;
- ac->ac_b_ex.fe_group = group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = pa->pa_len;
- ac->ac_b_ex.fe_logical = 0;
- ext4_mb_store_history(ac);
- }
-
- return 0;
-}
-
/*
* releases all preallocations in given group
*
@@ -4136,7 +4285,6 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
spin_lock(&ac->ac_pa->pa_lock);
ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len;
ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len;
- ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len;
ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len;
spin_unlock(&ac->ac_pa->pa_lock);
}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 1141ad5..6b46c86 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -164,11 +164,17 @@ struct ext4_free_extent {
* Locality group:
* we try to group all related changes together
* so that writeback can flush/allocate them together as well
+ * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
+ * (512). We store prealloc space into the hash based on the pa_free blocks
+ * order value.ie, fls(pa_free)-1;
*/
+#define PREALLOC_TB_SIZE 10
struct ext4_locality_group {
/* for allocator */
- struct mutex lg_mutex; /* to serialize allocates */
- struct list_head lg_prealloc_list;/* list of preallocations */
+ /* to serialize allocates */
+ struct mutex lg_mutex;
+ /* list of preallocations */
+ struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
spinlock_t lg_prealloc_lock;
};
--
1.5.6.3.439.g1e10.dirty
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] ext4: Don't allow lg prealloc list to be grow large.
2008-07-22 11:10 [PATCH] ext4: Don't allow lg prealloc list to be grow large Aneesh Kumar K.V
@ 2008-07-22 12:36 ` Aneesh Kumar K.V
2008-07-22 17:42 ` Eric Sandeen
1 sibling, 0 replies; 7+ messages in thread
From: Aneesh Kumar K.V @ 2008-07-22 12:36 UTC (permalink / raw)
To: cmm, tytso, sandeen; +Cc: linux-ext4
On Tue, Jul 22, 2008 at 04:40:16PM +0530, Aneesh Kumar K.V wrote:
> Currently locality group prealloc list is freed only when there is a block allocation
> failure. This can result in large number of per cpu locality group prealloc space
> and also make the ext4_mb_use_preallocated expensive. Convert the locality group
> prealloc list to a hash list. The hash index is the order of number of blocks
> in the prealloc space with a max order of 9. When adding prealloc space to the
> list we make sure total entries for each order does not exceed 8. If it is more
> than 8 we discard few entries and make sure the we have only <= 5 entries.
>
>
small update to fix the wrong usage of list APIs
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e058509..f8da1a2 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3348,7 +3348,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
if (!added && pa->pa_free < tmp_pa->pa_free) {
/* Add to the tail of the previous entry */
list_add_tail_rcu(&pa->pa_inode_list,
- tmp_pa->pa_inode_list.prev);
+ &tmp_pa->pa_inode_list);
added = 1;
/* we want to count the total
* number of entries in the list
@@ -3357,7 +3357,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
lg_prealloc_count++;
}
if (!added)
- list_add_tail_rcu(&pa->pa_inode_list, &tmp_pa->pa_inode_list);
+ list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list[order]);
/* Now trim the list to be not more than 8 elements */
if (lg_prealloc_count > 8) {
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] ext4: Don't allow lg prealloc list to be grow large.
2008-07-22 11:10 [PATCH] ext4: Don't allow lg prealloc list to be grow large Aneesh Kumar K.V
2008-07-22 12:36 ` Aneesh Kumar K.V
@ 2008-07-22 17:42 ` Eric Sandeen
1 sibling, 0 replies; 7+ messages in thread
From: Eric Sandeen @ 2008-07-22 17:42 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: cmm, tytso, linux-ext4
Aneesh Kumar K.V wrote:
> Currently locality group prealloc list is freed only when there is a block allocation
> failure. This can result in large number of per cpu locality group prealloc space
> and also make the ext4_mb_use_preallocated expensive. Convert the locality group
> prealloc list to a hash list. The hash index is the order of number of blocks
> in the prealloc space with a max order of 9. When adding prealloc space to the
> list we make sure total entries for each order does not exceed 8. If it is more
> than 8 we discard few entries and make sure the we have only <= 5 entries.
>
This looks better on the particular benchmark:
http://people.redhat.com/esandeen/ext4/fs_mark.png
contains a run with this patch.
I must admit to not actually reading the patch yet, though :) Just ran
it in the background while working on some other things today.
-Eric
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH] ext4: Don't allow lg prealloc list to be grow large.
2008-07-23 17:10 ` [PATCH] ext4: Convert the usage of NR_CPUS to nr_cpu_ids Aneesh Kumar K.V
@ 2008-07-23 17:10 ` Aneesh Kumar K.V
2008-07-23 18:13 ` Theodore Tso
0 siblings, 1 reply; 7+ messages in thread
From: Aneesh Kumar K.V @ 2008-07-23 17:10 UTC (permalink / raw)
To: cmm, tytso, sandeen; +Cc: linux-ext4, Aneesh Kumar K.V
Currently locality group prealloc list is freed only when there is a block allocation
failure. This can result in large number of per cpu locality group prealloc space
and also make the ext4_mb_use_preallocated expensive. Convert the locality group
prealloc list to a hash list. The hash index is the order of number of blocks
in the prealloc space with a max order of 9. When adding prealloc space to the
list we make sure total entries for each order does not exceed 8. If it is more
than 8 we discard few entries and make sure the we have only <= 5 entries.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
fs/ext4/mballoc.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++-------
fs/ext4/mballoc.h | 10 ++-
2 files changed, 193 insertions(+), 30 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5b854b7..fc3d4fc 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2481,7 +2481,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
int ext4_mb_init(struct super_block *sb, int needs_recovery)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- unsigned i;
+ unsigned i, j;
unsigned offset;
unsigned max;
int ret;
@@ -2553,7 +2553,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
struct ext4_locality_group *lg;
lg = &sbi->s_locality_groups[i];
mutex_init(&lg->lg_mutex);
- INIT_LIST_HEAD(&lg->lg_prealloc_list);
+ for (j = 0; j < PREALLOC_TB_SIZE; j++)
+ INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
spin_lock_init(&lg->lg_prealloc_lock);
}
@@ -3264,6 +3265,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa)
{
unsigned int len = ac->ac_o_ex.fe_len;
+
ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
&ac->ac_b_ex.fe_group,
&ac->ac_b_ex.fe_start);
@@ -3286,6 +3288,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
static noinline_for_stack int
ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
{
+ int order, i;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg;
struct ext4_prealloc_space *pa;
@@ -3326,22 +3329,29 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
lg = ac->ac_lg;
if (lg == NULL)
return 0;
-
- rcu_read_lock();
- list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
- spin_lock(&pa->pa_lock);
- if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
- atomic_inc(&pa->pa_count);
- ext4_mb_use_group_pa(ac, pa);
+ order = fls(ac->ac_o_ex.fe_len) - 1;
+ if (order > PREALLOC_TB_SIZE - 1)
+ /* The max size of hash table is PREALLOC_TB_SIZE */
+ order = PREALLOC_TB_SIZE - 1;
+
+ for (i = order; i < PREALLOC_TB_SIZE; i++) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
+ pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (pa->pa_deleted == 0 &&
+ pa->pa_free >= ac->ac_o_ex.fe_len) {
+ atomic_inc(&pa->pa_count);
+ ext4_mb_use_group_pa(ac, pa);
+ spin_unlock(&pa->pa_lock);
+ ac->ac_criteria = 20;
+ rcu_read_unlock();
+ return 1;
+ }
spin_unlock(&pa->pa_lock);
- ac->ac_criteria = 20;
- rcu_read_unlock();
- return 1;
}
- spin_unlock(&pa->pa_lock);
+ rcu_read_unlock();
}
- rcu_read_unlock();
-
return 0;
}
@@ -3564,6 +3574,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
pa->pa_free = pa->pa_len;
atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock);
+ INIT_LIST_HEAD(&pa->pa_inode_list);
pa->pa_deleted = 0;
pa->pa_linear = 1;
@@ -3584,10 +3595,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
- spin_lock(pa->pa_obj_lock);
- list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list);
- spin_unlock(pa->pa_obj_lock);
-
+ /*
+ * We will later add the new pa to the right bucket
+ * after updating the pa_free in ext4_mb_release_context
+ */
return 0;
}
@@ -4125,22 +4136,168 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
}
+static noinline_for_stack void
+ext4_mb_discard_lg_preallocations(struct super_block *sb,
+ struct ext4_locality_group *lg,
+ int order, int total_entries)
+{
+ ext4_group_t group = 0;
+ struct ext4_buddy e4b;
+ struct list_head discard_list;
+ struct ext4_prealloc_space *pa, *tmp;
+ struct ext4_allocation_context *ac;
+
+ mb_debug("discard locality group preallocation\n");
+
+ INIT_LIST_HEAD(&discard_list);
+ ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+
+ spin_lock(&lg->lg_prealloc_lock);
+ list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
+ pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (atomic_read(&pa->pa_count)) {
+ /*
+ * This is the pa that we just used
+ * for block allocation. So don't
+ * free that
+ */
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ if (pa->pa_deleted) {
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ /* only lg prealloc space */
+ BUG_ON(!pa->pa_linear);
+
+ /* seems this one can be freed ... */
+ pa->pa_deleted = 1;
+ spin_unlock(&pa->pa_lock);
+
+ list_del_rcu(&pa->pa_inode_list);
+ list_add(&pa->u.pa_tmp_list, &discard_list);
+
+ total_entries--;
+ if (total_entries <= 5) {
+ /*
+ * we want to keep only 5 entries
+ * allowing it to grow to 8. This
+ * mak sure we don't call discard
+ * soon for this list.
+ */
+ break;
+ }
+ }
+ spin_unlock(&lg->lg_prealloc_lock);
+
+ list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
+
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
+ if (ext4_mb_load_buddy(sb, group, &e4b)) {
+ ext4_error(sb, __func__, "Error in loading buddy "
+ "information for %lu\n", group);
+ continue;
+ }
+ ext4_lock_group(sb, group);
+ list_del(&pa->pa_group_list);
+ ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_unlock_group(sb, group);
+
+ ext4_mb_release_desc(&e4b);
+ list_del(&pa->u.pa_tmp_list);
+ call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
+ }
+ if (ac)
+ kmem_cache_free(ext4_ac_cachep, ac);
+}
+
+/*
+ * We have incremented pa_count. So it cannot be freed at this
+ * point. Also we hold lg_mutex. So no parallel allocation is
+ * possible from this lg. That means pa_free cannot be updated.
+ *
+ * A parallel ext4_mb_discard_group_preallocations is possible.
+ * which can cause the lg_prealloc_list to be updated.
+ */
+
+static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
+{
+ int order, added = 0, lg_prealloc_count = 1;
+ struct super_block *sb = ac->ac_sb;
+ struct ext4_locality_group *lg = ac->ac_lg;
+ struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
+
+ order = fls(pa->pa_free) - 1;
+ if (order > PREALLOC_TB_SIZE - 1)
+ /* The max size of hash table is PREALLOC_TB_SIZE */
+ order = PREALLOC_TB_SIZE - 1;
+ /* Add the prealloc space to lg */
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
+ pa_inode_list) {
+ spin_lock(&tmp_pa->pa_lock);
+ if (tmp_pa->pa_deleted) {
+ spin_unlock(&pa->pa_lock);
+ continue;
+ }
+ if (!added && pa->pa_free < tmp_pa->pa_free) {
+ /* Add to the tail of the previous entry */
+ list_add_tail_rcu(&pa->pa_inode_list,
+ &tmp_pa->pa_inode_list);
+ added = 1;
+ /*
+ * we want to count the total
+ * number of entries in the list
+ */
+ }
+ spin_unlock(&tmp_pa->pa_lock);
+ lg_prealloc_count++;
+ }
+ if (!added)
+ list_add_tail_rcu(&pa->pa_inode_list,
+ &lg->lg_prealloc_list[order]);
+ rcu_read_unlock();
+
+ /* Now trim the list to be not more than 8 elements */
+ if (lg_prealloc_count > 8) {
+ ext4_mb_discard_lg_preallocations(sb, lg,
+ order, lg_prealloc_count);
+ return;
+ }
+ return ;
+}
+
/*
* release all resource we used in allocation
*/
static int ext4_mb_release_context(struct ext4_allocation_context *ac)
{
- if (ac->ac_pa) {
- if (ac->ac_pa->pa_linear) {
+ struct ext4_prealloc_space *pa = ac->ac_pa;
+ if (pa) {
+ if (pa->pa_linear) {
/* see comment in ext4_mb_use_group_pa() */
- spin_lock(&ac->ac_pa->pa_lock);
- ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len;
- ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len;
- ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len;
- ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len;
- spin_unlock(&ac->ac_pa->pa_lock);
+ spin_lock(&pa->pa_lock);
+ pa->pa_pstart += ac->ac_b_ex.fe_len;
+ pa->pa_lstart += ac->ac_b_ex.fe_len;
+ pa->pa_free -= ac->ac_b_ex.fe_len;
+ pa->pa_len -= ac->ac_b_ex.fe_len;
+ spin_unlock(&pa->pa_lock);
+ /*
+ * We want to add the pa to the right bucket.
+ * Remove it from the list and while adding
+ * make sure the list to which we are adding
+ * doesn't grow big.
+ */
+ if (likely(pa->pa_free)) {
+ spin_lock(pa->pa_obj_lock);
+ list_del_rcu(&pa->pa_inode_list);
+ spin_unlock(pa->pa_obj_lock);
+ ext4_mb_add_n_trim(ac);
+ }
}
- ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa);
+ ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
if (ac->ac_bitmap_page)
page_cache_release(ac->ac_bitmap_page);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 1141ad5..6b46c86 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -164,11 +164,17 @@ struct ext4_free_extent {
* Locality group:
* we try to group all related changes together
* so that writeback can flush/allocate them together as well
+ * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
+ * (512). We store prealloc space into the hash based on the pa_free blocks
+ * order value.ie, fls(pa_free)-1;
*/
+#define PREALLOC_TB_SIZE 10
struct ext4_locality_group {
/* for allocator */
- struct mutex lg_mutex; /* to serialize allocates */
- struct list_head lg_prealloc_list;/* list of preallocations */
+ /* to serialize allocates */
+ struct mutex lg_mutex;
+ /* list of preallocations */
+ struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
spinlock_t lg_prealloc_lock;
};
--
1.5.6.3.439.g1e10.dirty
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] ext4: Don't allow lg prealloc list to be grow large.
2008-07-23 17:10 ` [PATCH] ext4: Don't allow lg prealloc list to be grow large Aneesh Kumar K.V
@ 2008-07-23 18:13 ` Theodore Tso
0 siblings, 0 replies; 7+ messages in thread
From: Theodore Tso @ 2008-07-23 18:13 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: cmm, sandeen, linux-ext4
On Wed, Jul 23, 2008 at 10:40:47PM +0530, Aneesh Kumar K.V wrote:
> Currently locality group prealloc list is freed only when there is a block allocation
> failure. This can result in large number of per cpu locality group prealloc space
> and also make the ext4_mb_use_preallocated expensive. Convert the locality group
> prealloc list to a hash list. The hash index is the order of number of blocks
> in the prealloc space with a max order of 9. When adding prealloc space to the
> list we make sure total entries for each order does not exceed 8. If it is more
> than 8 we discard few entries and make sure the we have only <= 5 entries.
So the second sentence made my english parser core dump. :-)
I rewrote the patch comments as follows; is it still a fair summary?
Currently, the locality group prealloc list is freed only when there
is a block allocation failure. This can result in large number of
entries in the preallocation list making ext4_mb_use_preallocated()
expensive.
To fix this, we convert the locality group prealloc list to a hash
list. The hash index is the order of number of blocks in the prealloc
space with a max order of 9. When adding prealloc space to the list we
make sure total entries for each order does not exceed 8. If it is
more than 8 we discard few entries and make sure the we have only <= 5
entries.
- Ted
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2008-07-23 18:13 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-22 11:10 [PATCH] ext4: Don't allow lg prealloc list to be grow large Aneesh Kumar K.V
2008-07-22 12:36 ` Aneesh Kumar K.V
2008-07-22 17:42 ` Eric Sandeen
-- strict thread matches above, loose matches on Subject: below --
2008-07-23 17:10 Patches for patchqueue Aneesh Kumar K.V
2008-07-23 17:10 ` [PATCH] ext4: Improve error handling in mballoc Aneesh Kumar K.V
2008-07-23 17:10 ` [PATCH] ext4: Convert the usage of NR_CPUS to nr_cpu_ids Aneesh Kumar K.V
2008-07-23 17:10 ` [PATCH] ext4: Don't allow lg prealloc list to be grow large Aneesh Kumar K.V
2008-07-23 18:13 ` Theodore Tso
2008-07-21 9:40 Aneesh Kumar K.V
2008-07-21 19:37 ` Eric Sandeen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).