linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: liubo <liubo2009@cn.fujitsu.com>
To: Josef Bacik <josef@redhat.com>
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH] Btrfs-progs: add support for mixed data+metadata block groups
Date: Fri, 08 Apr 2011 15:31:37 +0800	[thread overview]
Message-ID: <4D9EB9D9.5060005@cn.fujitsu.com> (raw)
In-Reply-To: <1291919468-17802-1-git-send-email-josef@redhat.com>

On 12/10/2010 02:31 AM, Josef Bacik wrote:
> So alot of crazy people (I'm looking at you Meego) want to use btrfs on phones
> and such with small devices.  Unfortunately the way we split out metadata/data
> chunks it makes space usage inefficient for volumes that are smaller than
> 1gigabyte.  So add a -M option for mixing metadata+data, and default to this
> mixed mode if the filesystem is less than or equal to 1 gigabyte.  I've tested
> this with xfstests on a 100mb filesystem and everything is a-ok.
> 

Hi, Josef,

While using this mix metadata+data option, I noticed the following from btrfs-debug-tree's print:

===
chunk tree
leaf 143360 items 4 free space 3557 generation 4 owner 3
fs uuid 77d78a87-a886-4bfa-be3b-0dd052213a17
chunk uuid e64148d6-8267-4ff1-aafd-4266f74afbb2
        item 0 key (DEV_ITEMS DEV_ITEM 1) itemoff 3897 itemsize 98
                dev item devid 1 total_bytes 4999610368 bytes used 20971520
        item 1 key (FIRST_CHUNK_TREE CHUNK_ITEM 0) itemoff 3817 itemsize 80
                chunk length 4194304 owner 2 type 2 num_stripes 1
                        stripe 0 devid 1 offset 0
        item 2 key (FIRST_CHUNK_TREE CHUNK_ITEM 4194304) itemoff 3737 itemsize 80
                chunk length 8388608 owner 2 type 5 num_stripes 1
                        stripe 0 devid 1 offset 4194304
        item 3 key (FIRST_CHUNK_TREE CHUNK_ITEM 12582912) itemoff 3657 itemsize 80   <== THIS ONE
                chunk length 8388608 owner 2 type 4 num_stripes 1		     <== 
                        stripe 0 devid 1 offset 12582912			     <== 
===

you see, there exists another metadata chunk (type 4) after "mkfs.btrfs -M /dev/xxx".
So I was wondering that _IS_ this chunk what we want, or a spare one?

thanks,
liubo

> Signed-off-by: Josef Bacik <josef@redhat.com>
> ---
>  btrfs-vol.c  |    4 +-
>  btrfs_cmds.c |   13 +++++-
>  ctree.h      |   10 +++--
>  mkfs.c       |  122 +++++++++++++++++++++++++++++++++++++++++-----------------
>  utils.c      |   10 ++--
>  utils.h      |    2 +-
>  6 files changed, 112 insertions(+), 49 deletions(-)
> 
> diff --git a/btrfs-vol.c b/btrfs-vol.c
> index 8069778..7200bbc 100644
> --- a/btrfs-vol.c
> +++ b/btrfs-vol.c
> @@ -129,7 +129,9 @@ int main(int ac, char **av)
>  		exit(1);
>  	}
>  	if (cmd == BTRFS_IOC_ADD_DEV) {
> -		ret = btrfs_prepare_device(devfd, device, 1, &dev_block_count);
> +		int mixed = 0;
> +
> +		ret = btrfs_prepare_device(devfd, device, 1, &dev_block_count, &mixed);
>  		if (ret) {
>  			fprintf(stderr, "Unable to init %s\n", device);
>  			exit(1);
> diff --git a/btrfs_cmds.c b/btrfs_cmds.c
> index 8031c58..683aec0 100644
> --- a/btrfs_cmds.c
> +++ b/btrfs_cmds.c
> @@ -705,6 +705,7 @@ int do_add_volume(int nargs, char **args)
>  		int	devfd, res;
>  		u64 dev_block_count = 0;
>  		struct stat st;
> +		int mixed = 0;
>  
>  		devfd = open(args[i], O_RDWR);
>  		if (!devfd) {
> @@ -727,7 +728,7 @@ int do_add_volume(int nargs, char **args)
>  			continue;
>  		}
>  
> -		res = btrfs_prepare_device(devfd, args[i], 1, &dev_block_count);
> +		res = btrfs_prepare_device(devfd, args[i], 1, &dev_block_count, &mixed);
>  		if (res) {
>  			fprintf(stderr, "ERROR: Unable to init '%s'\n", args[i]);
>  			close(devfd);
> @@ -889,8 +890,14 @@ int do_df_filesystem(int nargs, char **argv)
>  		memset(description, 0, 80);
>  
>  		if (flags & BTRFS_BLOCK_GROUP_DATA) {
> -			snprintf(description, 5, "%s", "Data");
> -			written += 4;
> +			if (flags & BTRFS_BLOCK_GROUP_METADATA) {
> +				snprintf(description, 15, "%s",
> +					 "Data+Metadata");
> +				written += 14;
> +			} else {
> +				snprintf(description, 5, "%s", "Data");
> +				written += 4;
> +			}
>  		} else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) {
>  			snprintf(description, 7, "%s", "System");
>  			written += 6;
> diff --git a/ctree.h b/ctree.h
> index 962c510..ed83d02 100644
> --- a/ctree.h
> +++ b/ctree.h
> @@ -352,13 +352,15 @@ struct btrfs_super_block {
>   * ones specified below then we will fail to mount
>   */
>  #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF	(1ULL << 0)
> -#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(2ULL << 0)
> +#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(1ULL << 1)
> +#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS	(1ULL << 2)
>  
>  #define BTRFS_FEATURE_COMPAT_SUPP		0ULL
>  #define BTRFS_FEATURE_COMPAT_RO_SUPP		0ULL
> -#define BTRFS_FEATURE_INCOMPAT_SUPP		\
> -	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |	\
> -	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
> +#define BTRFS_FEATURE_INCOMPAT_SUPP			\
> +	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
> +	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
> +	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
>  
>  /*
>   * A leaf is full of items. offset and size tell us where to find
> diff --git a/mkfs.c b/mkfs.c
> index 2e99b95..04de93a 100644
> --- a/mkfs.c
> +++ b/mkfs.c
> @@ -69,7 +69,7 @@ static u64 parse_size(char *s)
>  	return atol(s) * mult;
>  }
>  
> -static int make_root_dir(struct btrfs_root *root)
> +static int make_root_dir(struct btrfs_root *root, int mixed)
>  {
>  	struct btrfs_trans_handle *trans;
>  	struct btrfs_key location;
> @@ -88,30 +88,47 @@ static int make_root_dir(struct btrfs_root *root)
>  				     0, BTRFS_MKFS_SYSTEM_GROUP_SIZE);
>  	BUG_ON(ret);
>  
> -	ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
> -				&chunk_start, &chunk_size,
> -				BTRFS_BLOCK_GROUP_METADATA);
> -	BUG_ON(ret);
> -	ret = btrfs_make_block_group(trans, root, 0,
> -				     BTRFS_BLOCK_GROUP_METADATA,
> -				     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
> -				     chunk_start, chunk_size);
> -	BUG_ON(ret);
> +	if (mixed) {
> +		ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
> +					&chunk_start, &chunk_size,
> +					BTRFS_BLOCK_GROUP_METADATA |
> +					BTRFS_BLOCK_GROUP_DATA);
> +		BUG_ON(ret);
> +		ret = btrfs_make_block_group(trans, root, 0,
> +					     BTRFS_BLOCK_GROUP_METADATA |
> +					     BTRFS_BLOCK_GROUP_DATA,
> +					     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
> +					     chunk_start, chunk_size);
> +		BUG_ON(ret);
> +		printf("Created a data/metadata chunk of size %llu\n", chunk_size);
> +	} else {
> +		ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
> +					&chunk_start, &chunk_size,
> +					BTRFS_BLOCK_GROUP_METADATA);
> +		BUG_ON(ret);
> +		ret = btrfs_make_block_group(trans, root, 0,
> +					     BTRFS_BLOCK_GROUP_METADATA,
> +					     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
> +					     chunk_start, chunk_size);
> +		BUG_ON(ret);
> +	}
>  
>  	root->fs_info->system_allocs = 0;
>  	btrfs_commit_transaction(trans, root);
>  	trans = btrfs_start_transaction(root, 1);
>  	BUG_ON(!trans);
>  
> -	ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
> -				&chunk_start, &chunk_size,
> -				BTRFS_BLOCK_GROUP_DATA);
> -	BUG_ON(ret);
> -	ret = btrfs_make_block_group(trans, root, 0,
> -				     BTRFS_BLOCK_GROUP_DATA,
> -				     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
> -				     chunk_start, chunk_size);
> -	BUG_ON(ret);
> +	if (!mixed) {
> +		ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root,
> +					&chunk_start, &chunk_size,
> +					BTRFS_BLOCK_GROUP_DATA);
> +		BUG_ON(ret);
> +		ret = btrfs_make_block_group(trans, root, 0,
> +					     BTRFS_BLOCK_GROUP_DATA,
> +					     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
> +					     chunk_start, chunk_size);
> +		BUG_ON(ret);
> +	}
>  
>  	ret = btrfs_make_root_dir(trans, root->fs_info->tree_root,
>  			      BTRFS_ROOT_TREE_DIR_OBJECTID);
> @@ -200,7 +217,7 @@ static int create_one_raid_group(struct btrfs_trans_handle *trans,
>  
>  static int create_raid_groups(struct btrfs_trans_handle *trans,
>  			      struct btrfs_root *root, u64 data_profile,
> -			      u64 metadata_profile)
> +			      u64 metadata_profile, int mixed)
>  {
>  	u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
>  	u64 allowed;
> @@ -215,20 +232,24 @@ static int create_raid_groups(struct btrfs_trans_handle *trans,
>  		allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1;
>  
>  	if (allowed & metadata_profile) {
> +		u64 meta_flags = BTRFS_BLOCK_GROUP_METADATA;
> +
>  		ret = create_one_raid_group(trans, root,
>  					    BTRFS_BLOCK_GROUP_SYSTEM |
>  					    (allowed & metadata_profile));
>  		BUG_ON(ret);
>  
> -		ret = create_one_raid_group(trans, root,
> -					    BTRFS_BLOCK_GROUP_METADATA |
> +		if (mixed)
> +			meta_flags |= BTRFS_BLOCK_GROUP_DATA;
> +
> +		ret = create_one_raid_group(trans, root, meta_flags |
>  					    (allowed & metadata_profile));
>  		BUG_ON(ret);
>  
>  		ret = recow_roots(trans, root);
>  		BUG_ON(ret);
>  	}
> -	if (num_devices > 1 && (allowed & data_profile)) {
> +	if (!mixed && num_devices > 1 && (allowed & data_profile)) {
>  		ret = create_one_raid_group(trans, root,
>  					    BTRFS_BLOCK_GROUP_DATA |
>  					    (allowed & data_profile));
> @@ -274,6 +295,7 @@ static void print_usage(void)
>  	fprintf(stderr, "\t -l --leafsize size of btree leaves\n");
>  	fprintf(stderr, "\t -L --label set a label\n");
>  	fprintf(stderr, "\t -m --metadata metadata profile, values like data profile\n");
> +	fprintf(stderr, "\t -M --mixed mix metadata and data together\n");
>  	fprintf(stderr, "\t -n --nodesize size of btree nodes\n");
>  	fprintf(stderr, "\t -s --sectorsize min block allocation\n");
>  	fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION);
> @@ -328,6 +350,7 @@ static struct option long_options[] = {
>  	{ "leafsize", 1, NULL, 'l' },
>  	{ "label", 1, NULL, 'L'},
>  	{ "metadata", 1, NULL, 'm' },
> +	{ "mixed", 0, NULL, 'M' },
>  	{ "nodesize", 1, NULL, 'n' },
>  	{ "sectorsize", 1, NULL, 's' },
>  	{ "data", 1, NULL, 'd' },
> @@ -358,10 +381,13 @@ int main(int ac, char **av)
>  	int first_fd;
>  	int ret;
>  	int i;
> +	int mixed = 0;
> +	int data_profile_opt = 0;
> +	int metadata_profile_opt = 0;
>  
>  	while(1) {
>  		int c;
> -		c = getopt_long(ac, av, "A:b:l:n:s:m:d:L:V", long_options,
> +		c = getopt_long(ac, av, "A:b:l:n:s:m:d:L:VM", long_options,
>  				&option_index);
>  		if (c < 0)
>  			break;
> @@ -371,6 +397,7 @@ int main(int ac, char **av)
>  				break;
>  			case 'd':
>  				data_profile = parse_profile(optarg);
> +				data_profile_opt = 1;
>  				break;
>  			case 'l':
>  				leafsize = parse_size(optarg);
> @@ -380,6 +407,10 @@ int main(int ac, char **av)
>  				break;
>  			case 'm':
>  				metadata_profile = parse_profile(optarg);
> +				metadata_profile_opt = 1;
> +				break;
> +			case 'M':
> +				mixed = 1;
>  				break;
>  			case 'n':
>  				nodesize = parse_size(optarg);
> @@ -389,12 +420,10 @@ int main(int ac, char **av)
>  				break;
>  			case 'b':
>  				block_count = parse_size(optarg);
> -				if (block_count < 256*1024*1024) {
> -					fprintf(stderr, "File system size "
> -						"%llu bytes is too small, "
> -						"256M is required at least\n",
> -						(unsigned long long)block_count);
> -					exit(1);
> +				if (block_count <= 1024*1024*1024) {
> +					printf("SMALL VOLUME: forcing mixed "
> +					       "metadata/data groups\n");
> +					mixed = 1;
>  				}
>  				zero_end = 0;
>  				break;
> @@ -439,9 +468,22 @@ int main(int ac, char **av)
>  	}
>  	first_fd = fd;
>  	first_file = file;
> -	ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count);
> +	ret = btrfs_prepare_device(fd, file, zero_end, &dev_block_count,
> +				   &mixed);
>  	if (block_count == 0)
>  		block_count = dev_block_count;
> +	if (mixed) {
> +		if (!metadata_profile_opt)
> +			metadata_profile = 0;
> +		if (!data_profile_opt)
> +			data_profile = 0;
> +
> +		if (metadata_profile != data_profile) {
> +			fprintf(stderr, "With mixed block groups data and metadata "
> +				"profiles must be the same\n");
> +			exit(1);
> +		}
> +	}
>  
>  	blocks[0] = BTRFS_SUPER_INFO_OFFSET;
>  	for (i = 1; i < 7; i++) {
> @@ -459,7 +501,7 @@ int main(int ac, char **av)
>  	root = open_ctree(file, 0, O_RDWR);
>  	root->fs_info->alloc_start = alloc_start;
>  
> -	ret = make_root_dir(root);
> +	ret = make_root_dir(root, mixed);
>  	if (ret) {
>  		fprintf(stderr, "failed to setup the root directory\n");
>  		exit(1);
> @@ -478,6 +520,8 @@ int main(int ac, char **av)
>  
>  	zero_end = 1;
>  	while(ac-- > 0) {
> +		int old_mixed = mixed;
> +
>  		file = av[optind++];
>  		ret = check_mounted(file);
>  		if (ret < 0) {
> @@ -503,8 +547,8 @@ int main(int ac, char **av)
>  			continue;
>  		}
>  		ret = btrfs_prepare_device(fd, file, zero_end,
> -					   &dev_block_count);
> -
> +					   &dev_block_count, &mixed);
> +		mixed = old_mixed;
>  		BUG_ON(ret);
>  
>  		ret = btrfs_add_to_fsid(trans, root, fd, file, dev_block_count,
> @@ -515,12 +559,20 @@ int main(int ac, char **av)
>  
>  raid_groups:
>  	ret = create_raid_groups(trans, root, data_profile,
> -				 metadata_profile);
> +				 metadata_profile, mixed);
>  	BUG_ON(ret);
>  
>  	ret = create_data_reloc_tree(trans, root);
>  	BUG_ON(ret);
>  
> +	if (mixed) {
> +		struct btrfs_super_block *super = &root->fs_info->super_copy;
> +		u64 flags = btrfs_super_incompat_flags(super);
> +
> +		flags |= BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS;
> +		btrfs_set_super_incompat_flags(super, flags);
> +	}
> +
>  	printf("fs created label %s on %s\n\tnodesize %u leafsize %u "
>  	    "sectorsize %u size %s\n",
>  	    label, first_file, nodesize, leafsize, sectorsize,
> diff --git a/utils.c b/utils.c
> index b890728..13d31b7 100644
> --- a/utils.c
> +++ b/utils.c
> @@ -508,7 +508,8 @@ int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
>  	return 0;
>  }
>  
> -int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret)
> +int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret,
> +			 int *mixed)
>  {
>  	u64 block_count;
>  	u64 bytenr;
> @@ -528,10 +529,9 @@ int btrfs_prepare_device(int fd, char *file, int zero_end, u64 *block_count_ret)
>  	}
>  	zero_end = 1;
>  
> -	if (block_count < 256 * 1024 * 1024) {
> -		fprintf(stderr, "device %s is too small "
> -		        "(must be at least 256 MB)\n", file);
> -		exit(1);
> +	if (block_count < 1024 * 1024 * 1024 && !(*mixed)) {
> +		printf("SMALL VOLUME: forcing mixed metadata/data groups\n");
> +		*mixed = 1;
>  	}
>  	ret = zero_dev_start(fd);
>  	if (ret) {
> diff --git a/utils.h b/utils.h
> index 7ff542b..b91140e 100644
> --- a/utils.h
> +++ b/utils.h
> @@ -27,7 +27,7 @@ int make_btrfs(int fd, const char *device, const char *label,
>  int btrfs_make_root_dir(struct btrfs_trans_handle *trans,
>  			struct btrfs_root *root, u64 objectid);
>  int btrfs_prepare_device(int fd, char *file, int zero_end,
> -			 u64 *block_count_ret);
> +			 u64 *block_count_ret, int *mixed);
>  int btrfs_add_to_fsid(struct btrfs_trans_handle *trans,
>  		      struct btrfs_root *root, int fd, char *path,
>  		      u64 block_count, u32 io_width, u32 io_align,


  reply	other threads:[~2011-04-08  7:31 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-09 18:31 [PATCH] Btrfs-progs: add support for mixed data+metadata block groups Josef Bacik
2011-04-08  7:31 ` liubo [this message]
2011-04-08 13:15   ` Josef Bacik
  -- strict thread matches above, loose matches on Subject: below --
2010-09-16 20:16 Josef Bacik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4D9EB9D9.5060005@cn.fujitsu.com \
    --to=liubo2009@cn.fujitsu.com \
    --cc=josef@redhat.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).