All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: Theodore Tso <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org, Andreas Dilger <adilger@sun.com>,
	Eric Sandeen <sandeen@redhat.com>
Subject: Re: [PATCH, RFC] ext4: New inode/block allocation algorithms for flex_bg filesystems
Date: Fri, 27 Feb 2009 00:08:12 +0530	[thread overview]
Message-ID: <20090226183812.GA21612@skywalker> (raw)
In-Reply-To: <20090226182156.GL7227@mit.edu>

....

>  static int find_group_orlov(struct super_block *sb, struct inode *parent,
> -				ext4_group_t *group)
> +			    ext4_group_t *group, int mode)
>  {
>  	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
>  	struct ext4_sb_info *sbi = EXT4_SB(sb);
> -	struct ext4_super_block *es = sbi->s_es;
>  	ext4_group_t ngroups = sbi->s_groups_count;
>  	int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
>  	unsigned int freei, avefreei;
>  	ext4_fsblk_t freeb, avefreeb;
> -	ext4_fsblk_t blocks_per_dir;
>  	unsigned int ndirs;
> -	int max_debt, max_dirs, min_inodes;
> +	int max_dirs, min_inodes;
>  	ext4_grpblk_t min_blocks;
> -	ext4_group_t i;
> +	ext4_group_t i, grp, g;
>  	struct ext4_group_desc *desc;
> +	struct orlov_stats stats;
> +	int flex_size = ext4_flex_bg_size(sbi);
> +
> +	if (flex_size > 1) {
> +		ngroups = (ngroups + flex_size - 1) >>
> +			sbi->s_log_groups_per_flex;
> +		parent_group >>= sbi->s_log_groups_per_flex;
> +	}
> 
>  	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
>  	avefreei = freei / ngroups;
> @@ -460,71 +496,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
>  	do_div(avefreeb, ngroups);
>  	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
> 
> -	if ((parent == sb->s_root->d_inode) ||
> -	    (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) {
> +	if (S_ISDIR(mode) &&
> +	    ((parent == sb->s_root->d_inode) ||
> +	     (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
>  		int best_ndir = inodes_per_group;
> -		ext4_group_t grp;
>  		int ret = -1;
> 
>  		get_random_bytes(&grp, sizeof(grp));
>  		parent_group = (unsigned)grp % ngroups;
>  		for (i = 0; i < ngroups; i++) {
> -			grp = (parent_group + i) % ngroups;
> -			desc = ext4_get_group_desc(sb, grp, NULL);
> -			if (!desc || !ext4_free_inodes_count(sb, desc))
> +			g = (parent_group + i) % ngroups;
> +			get_orlov_stats(sb, g, flex_size, &stats);
> +			if (!stats.free_inodes)
>  				continue;
> -			if (ext4_used_dirs_count(sb, desc) >= best_ndir)
> +			if (stats.used_dirs >= best_ndir)
>  				continue;
> -			if (ext4_free_inodes_count(sb, desc) < avefreei)
> +			if (stats.free_inodes < avefreei)
>  				continue;
> -			if (ext4_free_blks_count(sb, desc) < avefreeb)
> +			if (stats.free_blocks < avefreeb)
>  				continue;
> -			*group = grp;
> +			grp = g;
>  			ret = 0;
> -			best_ndir = ext4_used_dirs_count(sb, desc);
> +			best_ndir = stats.used_dirs;
> +		}
> +		if (ret)
> +			goto fallback;
> +	found_flex_bg:
> +		if (flex_size == 1) {
> +			*group = grp;
> +			return 0;
> +		}
> +
> +		/*
> +		 * We pack inodes at the beginning of the flexgroup's
> +		 * inode tables.  Block allocation decisions will do
> +		 * something similar, although regular files will
> +		 * start at 2nd block group of the flexgroup.  See
> +		 * ext4_ext_find_goal() and ext4_find_near().
> +		 */
> +		grp *= flex_size;
> +		for (i = 0; i < flex_size; i++) {
> +			if (grp+i >= sbi->s_groups_count)
> +				break;
> +			desc = ext4_get_group_desc(sb, grp+i, NULL);
> +			if (desc && ext4_free_inodes_count(sb, desc)) {
> +				*group = grp+i;
> +				return 0;
> +			}
>  		}
> -		if (ret == 0)
> -			return ret;
>  		goto fallback;
>  	}
> 
> -	blocks_per_dir = ext4_blocks_count(es) - freeb;
> -	do_div(blocks_per_dir, ndirs);
> -
>  	max_dirs = ndirs / ngroups + inodes_per_group / 16;
> -	min_inodes = avefreei - inodes_per_group / 4;
> -	min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4;
> -
> -	max_debt = EXT4_BLOCKS_PER_GROUP(sb);
> -	max_debt /= max_t(int, blocks_per_dir, BLOCK_COST);
> -	if (max_debt * INODE_COST > inodes_per_group)
> -		max_debt = inodes_per_group / INODE_COST;
> -	if (max_debt > 255)
> -		max_debt = 255;
> -	if (max_debt == 0)
> -		max_debt = 1;
> +	min_inodes = avefreei - inodes_per_group*flex_size / 4;
> +	if (min_inodes < 1)
> +		min_inodes = 1;
> +	min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4;
> +
> +	/*
> +	 * Start looking in the flex group where we last allocated an
> +	 * inode for this parent directory
> +	 */
> +	if (EXT4_I(parent)->i_last_alloc_group != ~0) {
> +		parent_group = EXT4_I(parent)->i_last_alloc_group;
> +		if (flex_size > 1)
> +			parent_group >>= sbi->s_log_groups_per_flex;
> +	}
> 
>  	for (i = 0; i < ngroups; i++) {
> -		*group = (parent_group + i) % ngroups;
> -		desc = ext4_get_group_desc(sb, *group, NULL);
> -		if (!desc || !ext4_free_inodes_count(sb, desc))
> -			continue;
> -		if (ext4_used_dirs_count(sb, desc) >= max_dirs)
> +		grp = (parent_group + i) % ngroups;
> +		get_orlov_stats(sb, grp, flex_size, &stats);
> +		if (stats.used_dirs >= max_dirs)
>  			continue;
> -		if (ext4_free_inodes_count(sb, desc) < min_inodes)
> +		if (stats.free_inodes < min_inodes)
>  			continue;
> -		if (ext4_free_blks_count(sb, desc) < min_blocks)
> +		if (stats.free_blocks < min_blocks)
>  			continue;
> -		return 0;
> +		goto found_flex_bg;
>  	}
> 
>  fallback:
> +	ngroups = sbi->s_groups_count;
> +	avefreei = freei / ngroups;
> +	parent_group = EXT4_I(parent)->i_block_group;
>  	for (i = 0; i < ngroups; i++) {
> -		*group = (parent_group + i) % ngroups;
> -		desc = ext4_get_group_desc(sb, *group, NULL);
> +		grp = (parent_group + i) % ngroups;
> +		desc = ext4_get_group_desc(sb, grp, NULL);
>  		if (desc && ext4_free_inodes_count(sb, desc) &&
> -			ext4_free_inodes_count(sb, desc) >= avefreei)
> +		    ext4_free_inodes_count(sb, desc) >= avefreei) {
> +			*group = grp;
>  			return 0;
> +		}
>  	}
> 
>  	if (avefreei) {
> @@ -540,12 +602,51 @@ fallback:
>  }
> 
>  static int find_group_other(struct super_block *sb, struct inode *parent,
> -				ext4_group_t *group)
> +			    ext4_group_t *group, int mode)
>  {
>  	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
>  	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
>  	struct ext4_group_desc *desc;
> -	ext4_group_t i;
> +	ext4_group_t i, last;
> +	int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
> +
> +	/*
> +	 * Try to place the inode is the same flex group as its
> +	 * parent.  If we can't find space, use the Orlov algorithm to
> +	 * find another flex group, and store that information in the
> +	 * parent directory's inode information so that use that flex
> +	 * group for future allocations.
> +	 */
> +	if (flex_size > 1) {
> +		int retry = 0;
> +
> +	try_again:
> +		parent_group &= ~(flex_size-1);
> +		last = parent_group + flex_size;
> +		if (last > ngroups)
> +			last = ngroups;
> +		for  (i = parent_group; i < last; i++) {
> +			desc = ext4_get_group_desc(sb, i, NULL);
> +			if (desc && ext4_free_inodes_count(sb, desc)) {
> +				*group = i;
> +				return 0;
> +			}
> +		}
> +		if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
> +			retry = 1;
> +			parent_group = EXT4_I(parent)->i_last_alloc_group;
> +			goto try_again;
> +		}


For directories we try first with i_last_alloc_group and then with the
parent i_block_group. For file we are doing the other way round here.
Does it make sense to try with i_last_alloc_group first ?



> +		/*
> +		 * If this didn't work, use the Orlov search algorithm
> +		 * to find a new flex group; we pass in the mode to
> +		 * avoid the topdir algorithms.
> +		 */
> +		*group = parent_group + flex_size;
> +		if (*group > ngroups)
> +			*group = 0;
> +		return find_group_orlov(sb, parent, group, mode);
> +	}
> 
>  	/*

If you want you can add
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

-aneesh

  reply	other threads:[~2009-02-26 18:38 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-02-18 15:43 [PATCH, RFC] ext4: New inode/block allocation algorithms for flex_bg filesystems Theodore Tso
2009-02-24  8:59 ` Aneesh Kumar K.V
2009-02-24 15:27   ` Theodore Tso
2009-02-24 19:04     ` Theodore Tso
2009-02-24 22:41   ` Andreas Dilger
2009-02-25  0:57     ` Eric Sandeen
2009-02-25  0:58       ` Eric Sandeen
2009-02-25  2:50     ` Theodore Tso
2009-02-26 18:21 ` Theodore Tso
2009-02-26 18:38   ` Aneesh Kumar K.V [this message]
2009-03-30  8:48     ` Aneesh Kumar K.V
2009-02-27  0:15   ` Andreas Dilger
2009-02-27  9:17   ` Andreas Dilger
2009-02-27 15:06     ` Theodore Tso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090226183812.GA21612@skywalker \
    --to=aneesh.kumar@linux.vnet.ibm.com \
    --cc=adilger@sun.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=sandeen@redhat.com \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.