diff -ur linux.beta3.1/fs/reiserfs/bitmap.c linux.beta3/fs/reiserfs/bitmap.c --- linux.beta3.1/fs/reiserfs/bitmap.c 2003-03-10 20:36:37.000000000 -0500 +++ linux.beta3/fs/reiserfs/bitmap.c 2003-03-11 09:50:26.000000000 -0500 @@ -34,6 +34,8 @@ #define _ALLOC_hashed_formatted_nodes 7 #define _ALLOC_old_way 8 #define _ALLOC_hundredth_slices 9 +#define _ALLOC_dirid_groups 10 +#define _ALLOC_oid_groups 11 #define concentrating_formatted_nodes(s) test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s)) #define displacing_large_files(s) test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s)) @@ -261,8 +263,18 @@ get_bit_address (s, *start, &bm, &off); get_bit_address (s, finish, &end_bm, &end_off); - // With this option set first we try to find a bitmap that is at least 10% - // free, and if that fails, then we fall back to old whole bitmap scanning + /* When the bitmap is more than 10% free, anyone can allocate. + * When it's less than 10% free, only files that already use the + * bitmap are allowed. Once we pass 80% full, this restriction + * is lifted. + * + * We do this so that files that grow later still have space close to + * their original allocation. This improves locality, and presumably + * performance as a result. + * + * This is only an allocation policy and does not make up for getting a + * bad hint. Decent hinting must be implemented for this to work well. + */ if ( TEST_OPTION(skip_busy, s) && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s)/20 ) { for (;bm < end_bm; bm++, off = 0) { if ( ( off && (!unfm || (file_block != 0))) || SB_AP_BITMAP(s)[bm].free_count > (s->s_blocksize << 3) / 10 ) @@ -408,6 +420,16 @@ } } +void reiserfs_init_alloc_options (struct super_block *s) +{ + set_bit (_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s)); + set_bit (_ALLOC_skip_busy, &SB_ALLOC_OPTS(s)); + set_bit (_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s)); + s->u.reiserfs_sb.s_alloc_options.border = 5; + + reiserfs_warning ("allocator defaults = [%08x]\n", SB_ALLOC_OPTS(s)); +} + /* block allocator related options are parsed here */ int reiserfs_parse_alloc_options(struct super_block * s, char * options) { @@ -451,6 +473,15 @@ continue; } + if (!strcmp(this_char, "dirid_groups")) { + SET_OPTION(dirid_groups); + continue; + } + if (!strcmp(this_char, "oid_groups")) { + SET_OPTION(oid_groups); + continue; + } + if (!strcmp(this_char, "hashed_formatted_nodes")) { SET_OPTION(hashed_formatted_nodes); continue; @@ -492,6 +523,7 @@ return 1; } + reiserfs_warning ("allocator options = [%08x]\n", SB_ALLOC_OPTS(s)); return 0; } @@ -514,17 +546,81 @@ hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); } -static void inline get_left_neighbor(reiserfs_blocknr_hint_t *hint) +/* + * Relocation based on dirid, hashing them into a given bitmap block + * files. Formatted nodes are unaffected, a seperate policy covers them + */ +static void +dirid_groups (reiserfs_blocknr_hint_t *hint) +{ + if (hint->inode) { + char * hash_in = NULL; + unsigned long hash; + unsigned long mask; + __u32 dirid; + + dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); + hash_in = (char *)(&dirid); + + /* effectively turns the disk in 64MB groups (4k blocksize), + * but the bigger your disk is the less likely hash collisions + * are, leading to dynamically bigger groups based on + * your disk size + */ + mask = (hint->inode->i_sb->s_blocksize << 2) - 1; + hash = keyed_hash(hash_in, 4); + hash = hint->beg + hash % (hint->end - hint->beg); + hash &= ~mask; + + hint->search_start = hash; + } +} + +/* + * Relocation based on oid, hashing them into a given bitmap block + * files. Formatted nodes are unaffected, a seperate policy covers them + */ +static void +oid_groups (reiserfs_blocknr_hint_t *hint) +{ + if (hint->inode) { + char * hash_in = NULL; + unsigned long hash; + unsigned long mask; + __u32 oid; + + oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid); + hash_in = (char *)(&oid); + + /* effectively turns the disk in 64MB groups (4k blocksize), + * but the bigger your disk is the less likely hash collisions + * are, leading to dynamically bigger groups based on + * your disk size + */ + mask = (hint->inode->i_sb->s_blocksize << 2) - 1; + hash = keyed_hash(hash_in, 4); + hash = hint->beg + hash % (hint->end - hint->beg); + hash &= ~mask; + + hint->search_start = hash; + } +} + +/* returns 1 if it finds an indirect item and gets valid hint info + * from it, otherwise 0 + */ +static int get_left_neighbor(reiserfs_blocknr_hint_t *hint) { struct path * path; struct buffer_head * bh; struct item_head * ih; int pos_in_item; __u32 * item; + int ret = 0; if (!hint->path) /* reiserfs code can call this function w/o pointer to path * structure supplied; then we rely on supplied search_start */ - return; + return 0; path = hint->path; bh = get_last_bh(path); @@ -545,6 +641,7 @@ int t=get_block_num(item,pos_in_item); if (t) { hint->search_start = t; + ret = 1; break; } pos_in_item --; @@ -553,7 +650,7 @@ } /* does result value fit into specified region? */ - return; + return ret; } /* should be, if formatted node, then try to put on first part of the device @@ -655,6 +752,7 @@ struct super_block *s = hint->th->t_super; hint->beg = 0; hint->end = SB_BLOCK_COUNT(s) - 1; + int unfm_hint; /* This is former border algorithm. Now with tunable border offset */ if (concentrating_formatted_nodes(s)) @@ -683,19 +781,14 @@ return; } - /* attempt to copy a feature from old block allocator code */ - if (TEST_OPTION(old_hashed_relocation, s) && !hint->formatted_node) { - old_hashed_relocation(hint); - } - /* if none of our special cases is relevant, use the left neighbor in the tree order of the new node we are allocating for */ if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes,s)) { - hash_formatted_node(hint); + hash_formatted_node(hint); return; - } + } - get_left_neighbor(hint); + unfm_hint = get_left_neighbor(hint); /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation, new blocks are displaced based on directory ID. Also, if suggested search_start @@ -720,10 +813,29 @@ return; } - if (TEST_OPTION(old_hashed_relocation, s)) + /* old_hashed_relocation only works on unformatted */ + if (!unfm_hint && !hint->formatted_node && + TEST_OPTION(old_hashed_relocation, s)) + { old_hashed_relocation(hint); - if (TEST_OPTION(new_hashed_relocation, s)) + } + /* new_hashed_relocation works with both formatted/unformatted nodes */ + if ((!unfm_hint || hint->formatted_node) && + TEST_OPTION(new_hashed_relocation, s)) + { new_hashed_relocation(hint); + } + /* dirid grouping works only on unformatted nodes */ + if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups,s)) + { + dirid_groups(hint); + } + + /* oid grouping works only on unformatted nodes */ + if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups,s)) + { + oid_groups(hint); + } return; } @@ -787,7 +899,7 @@ struct super_block *s = hint->th->t_super; b_blocknr_t start = hint->search_start; b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; - int second_pass = 0; + int passno = 0; int nr_allocated = 0; determine_prealloc_size(hint); @@ -809,30 +921,44 @@ } } - while((nr_allocated - += allocate_without_wrapping_disk(hint, new_blocknrs + nr_allocated, start, finish, - amount_needed - nr_allocated, hint->prealloc_size)) - < amount_needed) { - - /* not all blocks were successfully allocated yet*/ - if (second_pass) { /* it was a second pass; we must free all blocks */ + do { + switch (passno++) { + case 0: /* Search from hint->search_start to end of disk */ + start = hint->search_start; + finish = SB_BLOCK_COUNT(s) - 1; + break; + case 1: /* Search from hint->beg to hint->search_start */ + start = hint->beg; + finish = hint->search_start; + break; + case 2: /* Last chance: Search from 0 to hint->beg */ + start = 0; + finish = hint->beg; + break; + default: /* We've tried searching everywhere, not enough space */ if (!hint->formatted_node) { #ifdef REISERQUOTA_DEBUG - printk(KERN_DEBUG "reiserquota: freeing (nospace) %d blocks id=%u\n", amount_needed + hint->prealloc_size - nr_allocated, hint->inode->i_uid); + printk(KERN_DEBUG "reiserquota: freeing (nospace) %d blocks id=%u\n", + amount_needed + hint->prealloc_size - nr_allocated, + hint->inode->i_uid); #endif - DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */ + /* Free not allocated blocks */ + DQUOT_FREE_BLOCK_NODIRTY(hint->inode, + amount_needed + hint->prealloc_size - nr_allocated); } + /* Free the blocks */ while (nr_allocated --) - reiserfs_free_block(hint->th, hint->inode, new_blocknrs[nr_allocated], !hint->formatted_node); + reiserfs_free_block(hint->th, hint->inode, + new_blocknrs[nr_allocated], + !hint->formatted_node); + + return NO_DISK_SPACE; + } + } while ((nr_allocated += allocate_without_wrapping_disk (hint, + new_blocknrs + nr_allocated, start, finish, + amount_needed - nr_allocated, hint->prealloc_size)) + < amount_needed); - return NO_DISK_SPACE; - } else { /* refine search parameters for next pass */ - second_pass = 1; - finish = start; - start = 0; - continue; - } - } if ( !hint->formatted_node && amount_needed + hint->prealloc_size > nr_allocated + INODE_INFO(hint->inode)->i_prealloc_count) { /* Some of preallocation blocks were not allocated */ #ifdef REISERQUOTA_DEBUG diff -ur linux.beta3.1/fs/reiserfs/super.c linux.beta3/fs/reiserfs/super.c --- linux.beta3.1/fs/reiserfs/super.c 2003-03-10 20:36:37.000000000 -0500 +++ linux.beta3/fs/reiserfs/super.c 2003-03-10 20:37:31.000000000 -0500 @@ -1288,18 +1288,17 @@ char *jdev_name; struct reiserfs_super_block * rs; - memset (&s->u.reiserfs_sb, 0, sizeof (struct reiserfs_sb_info)); /* Set default values for options: non-aggressive tails */ s->u.reiserfs_sb.s_mount_opt = ( 1 << REISERFS_SMALLTAIL ); - /* default block allocator option: skip_busy */ - s->u.reiserfs_sb.s_alloc_options.bits = ( 1 << 5); /* If file grew past 4 blocks, start preallocation blocks for it. */ s->u.reiserfs_sb.s_alloc_options.preallocmin = 4; /* Preallocate by 8 blocks (9-1) at once */ s->u.reiserfs_sb.s_alloc_options.preallocsize = 9; /* Initialize the rwsem for xattr dir */ init_rwsem(&s->u.reiserfs_sb.xattr_dir_sem); + /* Setup default block allocator options */ + reiserfs_init_alloc_options (s); if (reiserfs_parse_options (s, (char *) data, &(s->u.reiserfs_sb.s_mount_opt), &blocks) == 0) { return NULL; diff -ur linux.beta3.1/include/linux/reiserfs_fs.h linux.beta3/include/linux/reiserfs_fs.h --- linux.beta3.1/include/linux/reiserfs_fs.h 2003-03-10 20:36:37.000000000 -0500 +++ linux.beta3/include/linux/reiserfs_fs.h 2003-03-10 20:41:55.000000000 -0500 @@ -2228,6 +2228,7 @@ typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t; int reiserfs_parse_alloc_options (struct super_block *, char *); +void reiserfs_init_alloc_options (struct super_block *s); int is_reusable (struct super_block * s, unsigned long block, int bit_value); void reiserfs_free_block (struct reiserfs_transaction_handle *th, struct inode *inode, unsigned long, int); int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t * , int, int);