[Cluster-devel] [PATCH 1/4] mkfs.gfs2: Set sunit and swidth from probed io limits

cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

* [Cluster-devel] [PATCH 1/4] mkfs.gfs2: Set sunit and swidth from probed io limits
@ 2013-06-06 12:03 Andrew Price
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes Andrew Price
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Andrew Price @ 2013-06-06 12:03 UTC (permalink / raw)
  To: cluster-devel.redhat.com

If sunit and swidth are given, use the io limits probed with libblkid to
provide the alignment values. Also improves validation of the sunit and
swidth options.

Signed-off-by: Andrew Price <anprice@redhat.com>
---
 gfs2/mkfs/main_mkfs.c | 44 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
index 80b2782..12a259f 100644
--- a/gfs2/mkfs/main_mkfs.c
+++ b/gfs2/mkfs/main_mkfs.c
@@ -85,8 +85,8 @@ struct mkfs_opts {
 	unsigned qcsize;
 	unsigned jsize;
 	unsigned rgsize;
-	unsigned sunit;
-	unsigned swidth;
+	unsigned long sunit;
+	unsigned long swidth;
 	uint64_t fssize;
 	uint32_t journals;
 	const char *lockproto;
@@ -216,7 +216,7 @@ static long long cvtnum(unsigned int blocksize, unsigned int sectorsize, const c
         return -1LL;
 }
 
-static void parse_unsigned(struct mkfs_opts *opts, const char *key, const char *val, unsigned *n)
+static unsigned long parse_ulong(struct mkfs_opts *opts, const char *key, const char *val)
 {
 	long long l;
 	if (val == NULL || *val == '\0') {
@@ -224,11 +224,11 @@ static void parse_unsigned(struct mkfs_opts *opts, const char *key, const char *
 		exit(-1);
 	}
 	l = cvtnum(opts->bsize, 0, val);
-	if (l > UINT_MAX || l < 0) {
+	if (l > ULONG_MAX || l < 0) {
 		fprintf(stderr, _("Value of '%s' is invalid\n"), key);
 		exit(-1);
 	}
-	*n = (unsigned)l;
+	return (unsigned long)l;
 }
 
 static void opt_parse_extended(char *str, struct mkfs_opts *opts)
@@ -242,10 +242,10 @@ static void opt_parse_extended(char *str, struct mkfs_opts *opts)
 			exit(-1);
 		}
 		if (strcmp("sunit", key) == 0) {
-			parse_unsigned(opts, "sunit", val, &opts->sunit);
+			opts->sunit = parse_ulong(opts, "sunit", val);
 			opts->got_sunit = 1;
 		} else if (strcmp("swidth", key) == 0) {
-			parse_unsigned(opts, "swidth", val, &opts->swidth);
+			opts->swidth = parse_ulong(opts, "swidth", val);
 			opts->got_swidth = 1;
 		} else {
 			fprintf(stderr, _("Invalid option '%s'\n"), key);
@@ -495,6 +495,16 @@ static void opts_check(struct mkfs_opts *opts)
 	if (!opts->qcsize || opts->qcsize > 64)
 		die( _("bad quota change size\n"));
 
+	if ((opts->got_sunit && !opts->got_swidth) || (!opts->got_sunit && opts->got_swidth)) {
+		fprintf(stderr, _("Stripe unit and stripe width must be specified together\n"));
+		exit(1);
+	}
+
+	if (opts->got_sunit && (opts->swidth % opts->sunit)) {
+		fprintf(stderr, _("Stripe width (%lu) must be a multiple of stripe unit (%lu)\n"),
+				opts->swidth, opts->sunit);
+		exit(1);
+	}
 }
 
 static void print_results(struct gfs2_sbd *sdp, uint64_t real_device_size,
@@ -666,8 +676,8 @@ static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_d
 		printf("  rgsize = %u\n", sdp->rgsize);
 		printf("  table = %s\n", sdp->locktable);
 		printf("  fssize = %"PRIu64"\n", opts->fssize);
-		printf("  sunit = %u\n", opts->sunit);
-		printf("  swidth = %u\n", opts->swidth);
+		printf("  sunit = %lu\n", opts->sunit);
+		printf("  swidth = %lu\n", opts->swidth);
 	}
 }
 
@@ -767,6 +777,22 @@ void main_mkfs(int argc, char *argv[])
 	opts_check(&opts);
 
 	open_dev(opts.device, &dev);
+	if (!opts.got_swidth) {
+		if (dev.optimal_io_size > 0)
+			opts.swidth = dev.optimal_io_size;
+		else
+			opts.swidth = dev.logical_sector_size;
+	}
+
+	if (!opts.got_sunit) {
+		if (dev.minimum_io_size > 0)
+			opts.sunit = dev.minimum_io_size;
+		else
+			opts.sunit = dev.logical_sector_size;
+	}
+
+	if (opts.debug)
+		printf("Resource group alignment: %"PRIu64" bytes\n", opts.swidth);
 
 	if (S_ISREG(dev.stat.st_mode)) {
 		opts.got_bsize = 1; /* Use default block size for regular files */
-- 
1.8.1.4



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes
  2013-06-06 12:03 [Cluster-devel] [PATCH 1/4] mkfs.gfs2: Set sunit and swidth from probed io limits Andrew Price
@ 2013-06-06 12:03 ` Andrew Price
  2013-06-06 12:06   ` Steven Whitehouse
  2013-06-06 12:57   ` Bob Peterson
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 3/4] mkfs.gfs2: Create new resource groups on-demand Andrew Price
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 4/4] mkfs.gfs2: Add align option and update docs Andrew Price
  2 siblings, 2 replies; 19+ messages in thread
From: Andrew Price @ 2013-06-06 12:03 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch uses the values provided by libblkid to align resource groups
to RAID stripes. The strategy we're using here is to give the start of
each rgrp an alignment to the stripe width and add an offset of one
stripe unit for the next rgrp and so on. This should ensure that the
rgrp headers are spread evenly over the array to minimise contention on
the bitmap blocks.

One challenge here was to avoid creating large gaps between rgrps and at
the end of the device due to the alignment padding. We get around this
by calculating the start and length of the next rgrp before fixing the
length of the current rgrp and extending it (or shrinking the final one)
as appropriate.

In order for this to work some relationships between block and stripe
sizes have been enforced: the stripe width must be a multiple of the
stripe unit and the stripe unit must be a multiple of the block size.

With this patch, specifying an rg size on the command line still gives
aligned rgrps but gaps will still be present.

Signed-off-by: Andrew Price <anprice@redhat.com>
---
 gfs2/mkfs/main_mkfs.c | 136 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 92 insertions(+), 44 deletions(-)

diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
index 12a259f..058e4fa 100644
--- a/gfs2/mkfs/main_mkfs.c
+++ b/gfs2/mkfs/main_mkfs.c
@@ -580,26 +580,62 @@ static int writerg(int fd, const struct rgrp_tree *rgt, const unsigned bsize)
 	return 0;
 }
 
-static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
+static uint64_t align_block(const uint64_t base, const uint64_t align, const uint32_t bsize)
+{
+	if ((align > 0) && ((base % align) > 0))
+		return (base - (base % align)) + align;
+	return base;
+}
+
+static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts, const struct mkfs_dev *dev)
 {
 	struct rgrp_tree *rgt = NULL;
 	uint64_t rgaddr = 0;
-	unsigned int i = 0;
+	uint64_t nextaddr = 0;
+	uint64_t rglen = (sdp->rgsize << 20) / sdp->bsize;
+	const uint64_t maxrgsz = (GFS2_MAX_RGSIZE << 20) / sdp->bsize;
+	const uint64_t minrgsz = (GFS2_MIN_RGSIZE << 20) / sdp->bsize;
+	unsigned sunit_blocks = opts->sunit / sdp->bsize;
+	unsigned swidth_blocks = opts->swidth / opts->bsize;
+	unsigned stripe_offset = 0;
 	int err = 0;
 
-	sdp->device.length -= sdp->sb_addr + 1;
-	sdp->new_rgrps = how_many_rgrps(sdp, &sdp->device, opts->got_rgsize);
-	rgaddr = sdp->sb_addr + 1;
+	sdp->new_rgrps = 0;
+	rgaddr = align_block(sdp->sb_addr + 1, swidth_blocks, sdp->bsize);
 
-	for (i = 0; i < sdp->new_rgrps; i++) {
-		/* TODO: align to RAID stripes, etc. */
+	while (rgaddr > 0) {
 		rgt = rgrp_insert(&sdp->rgtree, rgaddr);
 		if (rgt == NULL)
 			return -1;
-		if (i == 0)
-			rgt->length = sdp->device.length - ((sdp->new_rgrps - 1) * (sdp->device.length / sdp->new_rgrps));
+
+		stripe_offset += sunit_blocks;
+		if (stripe_offset >= swidth_blocks)
+			stripe_offset = 0;
+
+		/* The next rg might not fit into the remaining space so calculate it now
+		   in order to make decisions about the current rg */
+		nextaddr = align_block(rgaddr + rglen, swidth_blocks, sdp->bsize) + stripe_offset;
+		if (!opts->got_rgsize && (nextaddr - rgaddr) <= maxrgsz)
+			/* Use up gap left by alignment if possible */
+			rgt->length = nextaddr - rgaddr;
 		else
-			rgt->length = sdp->device.length / sdp->new_rgrps;
+			rgt->length = rglen;
+
+		/* If the next rg would overflow the device, either shrink it or expand
+		   the current rg to use the remaining space */
+		if (nextaddr + rglen > sdp->device.length) {
+			/* Squeeze the last 1 or 2 rgs into the remaining space */
+			if ((nextaddr < sdp->device.length) && (sdp->device.length - nextaddr >= minrgsz)) {
+				rglen = sdp->device.length - nextaddr;
+			} else {
+				if (sdp->device.length - rgaddr <= maxrgsz)
+					rgt->length = sdp->device.length - rgaddr;
+				else
+					rgt->length = maxrgsz;
+				/* This is the last rg */
+				nextaddr = 0;
+			}
+		}
 
 		/* Build the rindex entry */
 		rgt->ri.ri_length = rgblocks2bitblocks(sdp->bsize, rgt->length, &rgt->ri.ri_data);
@@ -614,6 +650,11 @@ static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
 		rgt->rg.rg_header.mh_format = GFS2_FORMAT_RG;
 		rgt->rg.rg_free = rgt->ri.ri_data;
 
+		if (opts->debug) {
+			gfs2_rindex_print(&rgt->ri);
+			printf(" stripe_offset: %u\n", stripe_offset);
+		}
+
 		/* TODO: This call allocates buffer heads and bitmap pointers
 		 * in rgt. We really shouldn't need to do that. */
 		err = gfs2_compute_bitstructs(sdp->bsize, rgt);
@@ -628,8 +669,9 @@ static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
 			perror(_("Failed to write resource group"));
 			return -1;
 		}
+		sdp->new_rgrps++;
 		sdp->blks_total += rgt->ri.ri_data;
-		rgaddr += rgt->length;
+		rgaddr = nextaddr;
 	}
 
 	sdp->rgrps = sdp->new_rgrps;
@@ -637,7 +679,7 @@ static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
 	return 0;
 }
 
-static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_dev *dev)
+static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_dev *dev, unsigned bsize)
 {
 	memset(sdp, 0, sizeof(struct gfs2_sbd));
 	sdp->time = time(NULL);
@@ -647,7 +689,7 @@ static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_d
 	sdp->jsize = opts->jsize;
 	sdp->md.journals = opts->journals;
 	sdp->device_fd = dev->fd;
-	sdp->bsize = choose_blocksize(opts, dev);
+	sdp->bsize = bsize;
 
 	if (compute_constants(sdp)) {
 		perror(_("Failed to compute file system constants"));
@@ -666,19 +708,6 @@ static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_d
 	}
 	strcpy(sdp->lockproto, opts->lockproto);
 	strcpy(sdp->locktable, opts->locktable);
-	if (opts->debug) {
-		printf(_("Calculated file system options:\n"));
-		printf("  bsize = %u\n", sdp->bsize);
-		printf("  qcsize = %u\n", sdp->qcsize);
-		printf("  jsize = %u\n", sdp->jsize);
-		printf("  journals = %u\n", sdp->md.journals);
-		printf("  proto = %s\n", sdp->lockproto);
-		printf("  rgsize = %u\n", sdp->rgsize);
-		printf("  table = %s\n", sdp->locktable);
-		printf("  fssize = %"PRIu64"\n", opts->fssize);
-		printf("  sunit = %lu\n", opts->sunit);
-		printf("  swidth = %lu\n", opts->swidth);
-	}
 }
 
 static int probe_contents(struct mkfs_dev *dev)
@@ -764,6 +793,24 @@ static void open_dev(const char *path, struct mkfs_dev *dev)
 		exit(1);
 }
 
+static void opts_set_stripe(struct mkfs_opts *opts, const struct mkfs_dev *dev, unsigned bsize)
+{
+	if (!opts->got_swidth && dev->optimal_io_size > dev->physical_sector_size) {
+		opts->swidth = dev->optimal_io_size;
+		opts->got_swidth = 1;
+	}
+
+	if (!opts->got_sunit && dev->minimum_io_size > dev->physical_sector_size) {
+		opts->sunit = dev->minimum_io_size;
+		opts->got_sunit = 1;
+	}
+
+	if (opts->got_sunit && (opts->sunit % bsize) != 0) {
+		fprintf(stderr, "Stripe unit (%lu) is not a multiple of the block size (%u)\n", opts->sunit, bsize);
+		exit(1);
+	}
+}
+
 void main_mkfs(int argc, char *argv[])
 {
 	struct gfs2_sbd sbd;
@@ -771,28 +818,15 @@ void main_mkfs(int argc, char *argv[])
 	struct mkfs_dev dev;
 	int error;
 	unsigned char uuid[16];
+	unsigned bsize;
 
 	opts_init(&opts);
 	opts_get(argc, argv, &opts);
 	opts_check(&opts);
 
 	open_dev(opts.device, &dev);
-	if (!opts.got_swidth) {
-		if (dev.optimal_io_size > 0)
-			opts.swidth = dev.optimal_io_size;
-		else
-			opts.swidth = dev.logical_sector_size;
-	}
-
-	if (!opts.got_sunit) {
-		if (dev.minimum_io_size > 0)
-			opts.sunit = dev.minimum_io_size;
-		else
-			opts.sunit = dev.logical_sector_size;
-	}
-
-	if (opts.debug)
-		printf("Resource group alignment: %"PRIu64" bytes\n", opts.swidth);
+	bsize = choose_blocksize(&opts, &dev);
+	opts_set_stripe(&opts, &dev, bsize);
 
 	if (S_ISREG(dev.stat.st_mode)) {
 		opts.got_bsize = 1; /* Use default block size for regular files */
@@ -800,7 +834,21 @@ void main_mkfs(int argc, char *argv[])
 
 	warn_of_destruction(opts.device);
 
-	sbd_init(&sbd, &opts, &dev);
+	sbd_init(&sbd, &opts, &dev, bsize);
+	if (opts.debug) {
+		printf(_("Calculated file system options:\n"));
+		printf("  bsize = %u\n", sbd.bsize);
+		printf("  qcsize = %u\n", sbd.qcsize);
+		printf("  jsize = %u\n", sbd.jsize);
+		printf("  journals = %u\n", sbd.md.journals);
+		printf("  proto = %s\n", sbd.lockproto);
+		printf("  rgsize = %u\n", sbd.rgsize);
+		printf("  table = %s\n", sbd.locktable);
+		printf("  fssize = %"PRIu64"\n", opts.fssize);
+		printf("  sunit = %lu\n", opts.sunit);
+		printf("  swidth = %lu\n", opts.swidth);
+		printf("  rgrp align = %lu+%lu blocks\n", opts.swidth/sbd.bsize, opts.sunit/sbd.bsize);
+	}
 
 	if (opts.confirm && !opts.override)
 		are_you_sure();
@@ -808,7 +856,7 @@ void main_mkfs(int argc, char *argv[])
 	if (!S_ISREG(dev.stat.st_mode) && opts.discard)
 		discard_blocks(dev.fd, sbd.bsize * sbd.device.length, opts.debug);
 
-	error = place_rgrps(&sbd, &opts);
+	error = place_rgrps(&sbd, &opts, &dev);
 	if (error) {
 		fprintf(stderr, _("Failed to build resource groups\n"));
 		exit(1);
-- 
1.8.1.4



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes Andrew Price
@ 2013-06-06 12:06   ` Steven Whitehouse
  2013-06-06 12:19     ` Andrew Price
  2013-06-06 12:57   ` Bob Peterson
  1 sibling, 1 reply; 19+ messages in thread
From: Steven Whitehouse @ 2013-06-06 12:06 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Thu, 2013-06-06 at 13:03 +0100, Andrew Price wrote:
> This patch uses the values provided by libblkid to align resource groups
> to RAID stripes. The strategy we're using here is to give the start of
> each rgrp an alignment to the stripe width and add an offset of one
> stripe unit for the next rgrp and so on. This should ensure that the
> rgrp headers are spread evenly over the array to minimise contention on
> the bitmap blocks.
> 
> One challenge here was to avoid creating large gaps between rgrps and at
> the end of the device due to the alignment padding. We get around this
> by calculating the start and length of the next rgrp before fixing the
> length of the current rgrp and extending it (or shrinking the final one)
> as appropriate.
> 
> In order for this to work some relationships between block and stripe
> sizes have been enforced: the stripe width must be a multiple of the
> stripe unit and the stripe unit must be a multiple of the block size.
> 
> With this patch, specifying an rg size on the command line still gives
> aligned rgrps but gaps will still be present.
> 
> Signed-off-by: Andrew Price <anprice@redhat.com>
> ---
>  gfs2/mkfs/main_mkfs.c | 136 ++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 92 insertions(+), 44 deletions(-)
> 
> diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
> index 12a259f..058e4fa 100644
> --- a/gfs2/mkfs/main_mkfs.c
> +++ b/gfs2/mkfs/main_mkfs.c
> @@ -580,26 +580,62 @@ static int writerg(int fd, const struct rgrp_tree *rgt, const unsigned bsize)
>  	return 0;
>  }
>  
> -static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
> +static uint64_t align_block(const uint64_t base, const uint64_t align, const uint32_t bsize)
> +{
> +	if ((align > 0) && ((base % align) > 0))
> +		return (base - (base % align)) + align;
> +	return base;
> +}
> +
This doesn't appear to use bsize. Is align always going to be a power of
two?

Steve.

> +static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts, const struct mkfs_dev *dev)
>  {
>  	struct rgrp_tree *rgt = NULL;
>  	uint64_t rgaddr = 0;
> -	unsigned int i = 0;
> +	uint64_t nextaddr = 0;
> +	uint64_t rglen = (sdp->rgsize << 20) / sdp->bsize;
> +	const uint64_t maxrgsz = (GFS2_MAX_RGSIZE << 20) / sdp->bsize;
> +	const uint64_t minrgsz = (GFS2_MIN_RGSIZE << 20) / sdp->bsize;
> +	unsigned sunit_blocks = opts->sunit / sdp->bsize;
> +	unsigned swidth_blocks = opts->swidth / opts->bsize;
> +	unsigned stripe_offset = 0;
>  	int err = 0;
>  
> -	sdp->device.length -= sdp->sb_addr + 1;
> -	sdp->new_rgrps = how_many_rgrps(sdp, &sdp->device, opts->got_rgsize);
> -	rgaddr = sdp->sb_addr + 1;
> +	sdp->new_rgrps = 0;
> +	rgaddr = align_block(sdp->sb_addr + 1, swidth_blocks, sdp->bsize);
>  
> -	for (i = 0; i < sdp->new_rgrps; i++) {
> -		/* TODO: align to RAID stripes, etc. */
> +	while (rgaddr > 0) {
>  		rgt = rgrp_insert(&sdp->rgtree, rgaddr);
>  		if (rgt == NULL)
>  			return -1;
> -		if (i == 0)
> -			rgt->length = sdp->device.length - ((sdp->new_rgrps - 1) * (sdp->device.length / sdp->new_rgrps));
> +
> +		stripe_offset += sunit_blocks;
> +		if (stripe_offset >= swidth_blocks)
> +			stripe_offset = 0;
> +
> +		/* The next rg might not fit into the remaining space so calculate it now
> +		   in order to make decisions about the current rg */
> +		nextaddr = align_block(rgaddr + rglen, swidth_blocks, sdp->bsize) + stripe_offset;
> +		if (!opts->got_rgsize && (nextaddr - rgaddr) <= maxrgsz)
> +			/* Use up gap left by alignment if possible */
> +			rgt->length = nextaddr - rgaddr;
>  		else
> -			rgt->length = sdp->device.length / sdp->new_rgrps;
> +			rgt->length = rglen;
> +
> +		/* If the next rg would overflow the device, either shrink it or expand
> +		   the current rg to use the remaining space */
> +		if (nextaddr + rglen > sdp->device.length) {
> +			/* Squeeze the last 1 or 2 rgs into the remaining space */
> +			if ((nextaddr < sdp->device.length) && (sdp->device.length - nextaddr >= minrgsz)) {
> +				rglen = sdp->device.length - nextaddr;
> +			} else {
> +				if (sdp->device.length - rgaddr <= maxrgsz)
> +					rgt->length = sdp->device.length - rgaddr;
> +				else
> +					rgt->length = maxrgsz;
> +				/* This is the last rg */
> +				nextaddr = 0;
> +			}
> +		}
>  
>  		/* Build the rindex entry */
>  		rgt->ri.ri_length = rgblocks2bitblocks(sdp->bsize, rgt->length, &rgt->ri.ri_data);
> @@ -614,6 +650,11 @@ static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
>  		rgt->rg.rg_header.mh_format = GFS2_FORMAT_RG;
>  		rgt->rg.rg_free = rgt->ri.ri_data;
>  
> +		if (opts->debug) {
> +			gfs2_rindex_print(&rgt->ri);
> +			printf(" stripe_offset: %u\n", stripe_offset);
> +		}
> +
>  		/* TODO: This call allocates buffer heads and bitmap pointers
>  		 * in rgt. We really shouldn't need to do that. */
>  		err = gfs2_compute_bitstructs(sdp->bsize, rgt);
> @@ -628,8 +669,9 @@ static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
>  			perror(_("Failed to write resource group"));
>  			return -1;
>  		}
> +		sdp->new_rgrps++;
>  		sdp->blks_total += rgt->ri.ri_data;
> -		rgaddr += rgt->length;
> +		rgaddr = nextaddr;
>  	}
>  
>  	sdp->rgrps = sdp->new_rgrps;
> @@ -637,7 +679,7 @@ static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
>  	return 0;
>  }
>  
> -static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_dev *dev)
> +static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_dev *dev, unsigned bsize)
>  {
>  	memset(sdp, 0, sizeof(struct gfs2_sbd));
>  	sdp->time = time(NULL);
> @@ -647,7 +689,7 @@ static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_d
>  	sdp->jsize = opts->jsize;
>  	sdp->md.journals = opts->journals;
>  	sdp->device_fd = dev->fd;
> -	sdp->bsize = choose_blocksize(opts, dev);
> +	sdp->bsize = bsize;
>  
>  	if (compute_constants(sdp)) {
>  		perror(_("Failed to compute file system constants"));
> @@ -666,19 +708,6 @@ static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_d
>  	}
>  	strcpy(sdp->lockproto, opts->lockproto);
>  	strcpy(sdp->locktable, opts->locktable);
> -	if (opts->debug) {
> -		printf(_("Calculated file system options:\n"));
> -		printf("  bsize = %u\n", sdp->bsize);
> -		printf("  qcsize = %u\n", sdp->qcsize);
> -		printf("  jsize = %u\n", sdp->jsize);
> -		printf("  journals = %u\n", sdp->md.journals);
> -		printf("  proto = %s\n", sdp->lockproto);
> -		printf("  rgsize = %u\n", sdp->rgsize);
> -		printf("  table = %s\n", sdp->locktable);
> -		printf("  fssize = %"PRIu64"\n", opts->fssize);
> -		printf("  sunit = %lu\n", opts->sunit);
> -		printf("  swidth = %lu\n", opts->swidth);
> -	}
>  }
>  
>  static int probe_contents(struct mkfs_dev *dev)
> @@ -764,6 +793,24 @@ static void open_dev(const char *path, struct mkfs_dev *dev)
>  		exit(1);
>  }
>  
> +static void opts_set_stripe(struct mkfs_opts *opts, const struct mkfs_dev *dev, unsigned bsize)
> +{
> +	if (!opts->got_swidth && dev->optimal_io_size > dev->physical_sector_size) {
> +		opts->swidth = dev->optimal_io_size;
> +		opts->got_swidth = 1;
> +	}
> +
> +	if (!opts->got_sunit && dev->minimum_io_size > dev->physical_sector_size) {
> +		opts->sunit = dev->minimum_io_size;
> +		opts->got_sunit = 1;
> +	}
> +
> +	if (opts->got_sunit && (opts->sunit % bsize) != 0) {
> +		fprintf(stderr, "Stripe unit (%lu) is not a multiple of the block size (%u)\n", opts->sunit, bsize);
> +		exit(1);
> +	}
> +}
> +
>  void main_mkfs(int argc, char *argv[])
>  {
>  	struct gfs2_sbd sbd;
> @@ -771,28 +818,15 @@ void main_mkfs(int argc, char *argv[])
>  	struct mkfs_dev dev;
>  	int error;
>  	unsigned char uuid[16];
> +	unsigned bsize;
>  
>  	opts_init(&opts);
>  	opts_get(argc, argv, &opts);
>  	opts_check(&opts);
>  
>  	open_dev(opts.device, &dev);
> -	if (!opts.got_swidth) {
> -		if (dev.optimal_io_size > 0)
> -			opts.swidth = dev.optimal_io_size;
> -		else
> -			opts.swidth = dev.logical_sector_size;
> -	}
> -
> -	if (!opts.got_sunit) {
> -		if (dev.minimum_io_size > 0)
> -			opts.sunit = dev.minimum_io_size;
> -		else
> -			opts.sunit = dev.logical_sector_size;
> -	}
> -
> -	if (opts.debug)
> -		printf("Resource group alignment: %"PRIu64" bytes\n", opts.swidth);
> +	bsize = choose_blocksize(&opts, &dev);
> +	opts_set_stripe(&opts, &dev, bsize);
>  
>  	if (S_ISREG(dev.stat.st_mode)) {
>  		opts.got_bsize = 1; /* Use default block size for regular files */
> @@ -800,7 +834,21 @@ void main_mkfs(int argc, char *argv[])
>  
>  	warn_of_destruction(opts.device);
>  
> -	sbd_init(&sbd, &opts, &dev);
> +	sbd_init(&sbd, &opts, &dev, bsize);
> +	if (opts.debug) {
> +		printf(_("Calculated file system options:\n"));
> +		printf("  bsize = %u\n", sbd.bsize);
> +		printf("  qcsize = %u\n", sbd.qcsize);
> +		printf("  jsize = %u\n", sbd.jsize);
> +		printf("  journals = %u\n", sbd.md.journals);
> +		printf("  proto = %s\n", sbd.lockproto);
> +		printf("  rgsize = %u\n", sbd.rgsize);
> +		printf("  table = %s\n", sbd.locktable);
> +		printf("  fssize = %"PRIu64"\n", opts.fssize);
> +		printf("  sunit = %lu\n", opts.sunit);
> +		printf("  swidth = %lu\n", opts.swidth);
> +		printf("  rgrp align = %lu+%lu blocks\n", opts.swidth/sbd.bsize, opts.sunit/sbd.bsize);
> +	}
>  
>  	if (opts.confirm && !opts.override)
>  		are_you_sure();
> @@ -808,7 +856,7 @@ void main_mkfs(int argc, char *argv[])
>  	if (!S_ISREG(dev.stat.st_mode) && opts.discard)
>  		discard_blocks(dev.fd, sbd.bsize * sbd.device.length, opts.debug);
>  
> -	error = place_rgrps(&sbd, &opts);
> +	error = place_rgrps(&sbd, &opts, &dev);
>  	if (error) {
>  		fprintf(stderr, _("Failed to build resource groups\n"));
>  		exit(1);




^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes
  2013-06-06 12:06   ` Steven Whitehouse
@ 2013-06-06 12:19     ` Andrew Price
  0 siblings, 0 replies; 19+ messages in thread
From: Andrew Price @ 2013-06-06 12:19 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On 06/06/13 13:06, Steven Whitehouse wrote:
> Hi,
>
> On Thu, 2013-06-06 at 13:03 +0100, Andrew Price wrote:
>> This patch uses the values provided by libblkid to align resource groups
>> to RAID stripes. The strategy we're using here is to give the start of
>> each rgrp an alignment to the stripe width and add an offset of one
>> stripe unit for the next rgrp and so on. This should ensure that the
>> rgrp headers are spread evenly over the array to minimise contention on
>> the bitmap blocks.
>>
>> One challenge here was to avoid creating large gaps between rgrps and at
>> the end of the device due to the alignment padding. We get around this
>> by calculating the start and length of the next rgrp before fixing the
>> length of the current rgrp and extending it (or shrinking the final one)
>> as appropriate.
>>
>> In order for this to work some relationships between block and stripe
>> sizes have been enforced: the stripe width must be a multiple of the
>> stripe unit and the stripe unit must be a multiple of the block size.
>>
>> With this patch, specifying an rg size on the command line still gives
>> aligned rgrps but gaps will still be present.
>>
>> Signed-off-by: Andrew Price <anprice@redhat.com>
>> ---
>>   gfs2/mkfs/main_mkfs.c | 136 ++++++++++++++++++++++++++++++++++----------------
>>   1 file changed, 92 insertions(+), 44 deletions(-)
>>
>> diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
>> index 12a259f..058e4fa 100644
>> --- a/gfs2/mkfs/main_mkfs.c
>> +++ b/gfs2/mkfs/main_mkfs.c
>> @@ -580,26 +580,62 @@ static int writerg(int fd, const struct rgrp_tree *rgt, const unsigned bsize)
>>   	return 0;
>>   }
>>
>> -static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
>> +static uint64_t align_block(const uint64_t base, const uint64_t align, const uint32_t bsize)
>> +{
>> +	if ((align > 0) && ((base % align) > 0))
>> +		return (base - (base % align)) + align;
>> +	return base;
>> +}
>> +
> This doesn't appear to use bsize. Is align always going to be a power of
> two?

Hrm good catch, I was sure I had removed the bsize parameter when I 
switched it from a previous version which converted to bytes and back again.

Anyway, align is always going to be a multiple of bsize due to the 
constraints on swidth, but there's no requirement for it to be a power 
of two, I don't think.

Andy

>
> Steve.
>
>> +static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts, const struct mkfs_dev *dev)
>>   {
>>   	struct rgrp_tree *rgt = NULL;
>>   	uint64_t rgaddr = 0;
>> -	unsigned int i = 0;
>> +	uint64_t nextaddr = 0;
>> +	uint64_t rglen = (sdp->rgsize << 20) / sdp->bsize;
>> +	const uint64_t maxrgsz = (GFS2_MAX_RGSIZE << 20) / sdp->bsize;
>> +	const uint64_t minrgsz = (GFS2_MIN_RGSIZE << 20) / sdp->bsize;
>> +	unsigned sunit_blocks = opts->sunit / sdp->bsize;
>> +	unsigned swidth_blocks = opts->swidth / opts->bsize;
>> +	unsigned stripe_offset = 0;
>>   	int err = 0;
>>
>> -	sdp->device.length -= sdp->sb_addr + 1;
>> -	sdp->new_rgrps = how_many_rgrps(sdp, &sdp->device, opts->got_rgsize);
>> -	rgaddr = sdp->sb_addr + 1;
>> +	sdp->new_rgrps = 0;
>> +	rgaddr = align_block(sdp->sb_addr + 1, swidth_blocks, sdp->bsize);
>>
>> -	for (i = 0; i < sdp->new_rgrps; i++) {
>> -		/* TODO: align to RAID stripes, etc. */
>> +	while (rgaddr > 0) {
>>   		rgt = rgrp_insert(&sdp->rgtree, rgaddr);
>>   		if (rgt == NULL)
>>   			return -1;
>> -		if (i == 0)
>> -			rgt->length = sdp->device.length - ((sdp->new_rgrps - 1) * (sdp->device.length / sdp->new_rgrps));
>> +
>> +		stripe_offset += sunit_blocks;
>> +		if (stripe_offset >= swidth_blocks)
>> +			stripe_offset = 0;
>> +
>> +		/* The next rg might not fit into the remaining space so calculate it now
>> +		   in order to make decisions about the current rg */
>> +		nextaddr = align_block(rgaddr + rglen, swidth_blocks, sdp->bsize) + stripe_offset;
>> +		if (!opts->got_rgsize && (nextaddr - rgaddr) <= maxrgsz)
>> +			/* Use up gap left by alignment if possible */
>> +			rgt->length = nextaddr - rgaddr;
>>   		else
>> -			rgt->length = sdp->device.length / sdp->new_rgrps;
>> +			rgt->length = rglen;
>> +
>> +		/* If the next rg would overflow the device, either shrink it or expand
>> +		   the current rg to use the remaining space */
>> +		if (nextaddr + rglen > sdp->device.length) {
>> +			/* Squeeze the last 1 or 2 rgs into the remaining space */
>> +			if ((nextaddr < sdp->device.length) && (sdp->device.length - nextaddr >= minrgsz)) {
>> +				rglen = sdp->device.length - nextaddr;
>> +			} else {
>> +				if (sdp->device.length - rgaddr <= maxrgsz)
>> +					rgt->length = sdp->device.length - rgaddr;
>> +				else
>> +					rgt->length = maxrgsz;
>> +				/* This is the last rg */
>> +				nextaddr = 0;
>> +			}
>> +		}
>>
>>   		/* Build the rindex entry */
>>   		rgt->ri.ri_length = rgblocks2bitblocks(sdp->bsize, rgt->length, &rgt->ri.ri_data);
>> @@ -614,6 +650,11 @@ static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
>>   		rgt->rg.rg_header.mh_format = GFS2_FORMAT_RG;
>>   		rgt->rg.rg_free = rgt->ri.ri_data;
>>
>> +		if (opts->debug) {
>> +			gfs2_rindex_print(&rgt->ri);
>> +			printf(" stripe_offset: %u\n", stripe_offset);
>> +		}
>> +
>>   		/* TODO: This call allocates buffer heads and bitmap pointers
>>   		 * in rgt. We really shouldn't need to do that. */
>>   		err = gfs2_compute_bitstructs(sdp->bsize, rgt);
>> @@ -628,8 +669,9 @@ static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
>>   			perror(_("Failed to write resource group"));
>>   			return -1;
>>   		}
>> +		sdp->new_rgrps++;
>>   		sdp->blks_total += rgt->ri.ri_data;
>> -		rgaddr += rgt->length;
>> +		rgaddr = nextaddr;
>>   	}
>>
>>   	sdp->rgrps = sdp->new_rgrps;
>> @@ -637,7 +679,7 @@ static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts)
>>   	return 0;
>>   }
>>
>> -static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_dev *dev)
>> +static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_dev *dev, unsigned bsize)
>>   {
>>   	memset(sdp, 0, sizeof(struct gfs2_sbd));
>>   	sdp->time = time(NULL);
>> @@ -647,7 +689,7 @@ static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_d
>>   	sdp->jsize = opts->jsize;
>>   	sdp->md.journals = opts->journals;
>>   	sdp->device_fd = dev->fd;
>> -	sdp->bsize = choose_blocksize(opts, dev);
>> +	sdp->bsize = bsize;
>>
>>   	if (compute_constants(sdp)) {
>>   		perror(_("Failed to compute file system constants"));
>> @@ -666,19 +708,6 @@ static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_d
>>   	}
>>   	strcpy(sdp->lockproto, opts->lockproto);
>>   	strcpy(sdp->locktable, opts->locktable);
>> -	if (opts->debug) {
>> -		printf(_("Calculated file system options:\n"));
>> -		printf("  bsize = %u\n", sdp->bsize);
>> -		printf("  qcsize = %u\n", sdp->qcsize);
>> -		printf("  jsize = %u\n", sdp->jsize);
>> -		printf("  journals = %u\n", sdp->md.journals);
>> -		printf("  proto = %s\n", sdp->lockproto);
>> -		printf("  rgsize = %u\n", sdp->rgsize);
>> -		printf("  table = %s\n", sdp->locktable);
>> -		printf("  fssize = %"PRIu64"\n", opts->fssize);
>> -		printf("  sunit = %lu\n", opts->sunit);
>> -		printf("  swidth = %lu\n", opts->swidth);
>> -	}
>>   }
>>
>>   static int probe_contents(struct mkfs_dev *dev)
>> @@ -764,6 +793,24 @@ static void open_dev(const char *path, struct mkfs_dev *dev)
>>   		exit(1);
>>   }
>>
>> +static void opts_set_stripe(struct mkfs_opts *opts, const struct mkfs_dev *dev, unsigned bsize)
>> +{
>> +	if (!opts->got_swidth && dev->optimal_io_size > dev->physical_sector_size) {
>> +		opts->swidth = dev->optimal_io_size;
>> +		opts->got_swidth = 1;
>> +	}
>> +
>> +	if (!opts->got_sunit && dev->minimum_io_size > dev->physical_sector_size) {
>> +		opts->sunit = dev->minimum_io_size;
>> +		opts->got_sunit = 1;
>> +	}
>> +
>> +	if (opts->got_sunit && (opts->sunit % bsize) != 0) {
>> +		fprintf(stderr, "Stripe unit (%lu) is not a multiple of the block size (%u)\n", opts->sunit, bsize);
>> +		exit(1);
>> +	}
>> +}
>> +
>>   void main_mkfs(int argc, char *argv[])
>>   {
>>   	struct gfs2_sbd sbd;
>> @@ -771,28 +818,15 @@ void main_mkfs(int argc, char *argv[])
>>   	struct mkfs_dev dev;
>>   	int error;
>>   	unsigned char uuid[16];
>> +	unsigned bsize;
>>
>>   	opts_init(&opts);
>>   	opts_get(argc, argv, &opts);
>>   	opts_check(&opts);
>>
>>   	open_dev(opts.device, &dev);
>> -	if (!opts.got_swidth) {
>> -		if (dev.optimal_io_size > 0)
>> -			opts.swidth = dev.optimal_io_size;
>> -		else
>> -			opts.swidth = dev.logical_sector_size;
>> -	}
>> -
>> -	if (!opts.got_sunit) {
>> -		if (dev.minimum_io_size > 0)
>> -			opts.sunit = dev.minimum_io_size;
>> -		else
>> -			opts.sunit = dev.logical_sector_size;
>> -	}
>> -
>> -	if (opts.debug)
>> -		printf("Resource group alignment: %"PRIu64" bytes\n", opts.swidth);
>> +	bsize = choose_blocksize(&opts, &dev);
>> +	opts_set_stripe(&opts, &dev, bsize);
>>
>>   	if (S_ISREG(dev.stat.st_mode)) {
>>   		opts.got_bsize = 1; /* Use default block size for regular files */
>> @@ -800,7 +834,21 @@ void main_mkfs(int argc, char *argv[])
>>
>>   	warn_of_destruction(opts.device);
>>
>> -	sbd_init(&sbd, &opts, &dev);
>> +	sbd_init(&sbd, &opts, &dev, bsize);
>> +	if (opts.debug) {
>> +		printf(_("Calculated file system options:\n"));
>> +		printf("  bsize = %u\n", sbd.bsize);
>> +		printf("  qcsize = %u\n", sbd.qcsize);
>> +		printf("  jsize = %u\n", sbd.jsize);
>> +		printf("  journals = %u\n", sbd.md.journals);
>> +		printf("  proto = %s\n", sbd.lockproto);
>> +		printf("  rgsize = %u\n", sbd.rgsize);
>> +		printf("  table = %s\n", sbd.locktable);
>> +		printf("  fssize = %"PRIu64"\n", opts.fssize);
>> +		printf("  sunit = %lu\n", opts.sunit);
>> +		printf("  swidth = %lu\n", opts.swidth);
>> +		printf("  rgrp align = %lu+%lu blocks\n", opts.swidth/sbd.bsize, opts.sunit/sbd.bsize);
>> +	}
>>
>>   	if (opts.confirm && !opts.override)
>>   		are_you_sure();
>> @@ -808,7 +856,7 @@ void main_mkfs(int argc, char *argv[])
>>   	if (!S_ISREG(dev.stat.st_mode) && opts.discard)
>>   		discard_blocks(dev.fd, sbd.bsize * sbd.device.length, opts.debug);
>>
>> -	error = place_rgrps(&sbd, &opts);
>> +	error = place_rgrps(&sbd, &opts, &dev);
>>   	if (error) {
>>   		fprintf(stderr, _("Failed to build resource groups\n"));
>>   		exit(1);
>
>



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes Andrew Price
  2013-06-06 12:06   ` Steven Whitehouse
@ 2013-06-06 12:57   ` Bob Peterson
  2013-06-06 13:04     ` Andrew Price
  2013-06-06 13:11     ` Steven Whitehouse
  1 sibling, 2 replies; 19+ messages in thread
From: Bob Peterson @ 2013-06-06 12:57 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

| +			/* Squeeze the last 1 or 2 rgs into the remaining space */
| +			if ((nextaddr < sdp->device.length) && (sdp->device.length - nextaddr >=
| minrgsz)) {
| +				rglen = sdp->device.length - nextaddr;
| +			} else {
| +				if (sdp->device.length - rgaddr <= maxrgsz)
| +					rgt->length = sdp->device.length - rgaddr;
| +				else
| +					rgt->length = maxrgsz;
| +				/* This is the last rg */
| +				nextaddr = 0;

In GFS1, we allowed mix-and-match resource group sizes, but we originally
designed mkfs.gfs2 to ensure that all rgrps were the same uniform size. This
usually means some space is wasted at the end of the last resource group.

We did this primarily so that fsck.gfs2 could more easily detect and repair
damaged resource groups and rindex values. At the time it was designed, I got
the buy-in of a bunch of developers and we all agreed to it. Since that time,
I've had to change fsck.gfs2 to take more drastic measures to repair damaged
resource groups, due to the fact that gfs2_convert can convert a GFS1 file
system to GFS2, and thus, we can still end up with non-uniform resource groups.
Many customers were adding storage and doing multiple gfs_grow ops,
which resulted in metadata sets where the rgrps and rindex were complete chaos.

Still, my assumption has always been: If the file system was made by
mkfs.gfs2, all resource groups (but the first one) are identical in size.

I think gfs2_grow takes some steps to ensure that new rgrps are also created
using the same size as the current resource groups. If we don't enforce
that rule, the rindex could once again become chaos, which means our chances
of rgrp and rindex repair get worse.

Do we still want to enforce this rule?

With the improved rgrp repair algorithms in fsck.gfs2, it may not be
necessary anymore. I'm not trying to be dogmatic; I'm looking for opinions here.

Regards,

Bob Peterson
Red Hat File Systems

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes
  2013-06-06 12:57   ` Bob Peterson
@ 2013-06-06 13:04     ` Andrew Price
  2013-06-06 13:17       ` Bob Peterson
  2013-06-06 13:11     ` Steven Whitehouse
  1 sibling, 1 reply; 19+ messages in thread
From: Andrew Price @ 2013-06-06 13:04 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On 06/06/13 13:57, Bob Peterson wrote:
> Hi,
>
> | +			/* Squeeze the last 1 or 2 rgs into the remaining space */
> | +			if ((nextaddr < sdp->device.length) && (sdp->device.length - nextaddr >=
> | minrgsz)) {
> | +				rglen = sdp->device.length - nextaddr;
> | +			} else {
> | +				if (sdp->device.length - rgaddr <= maxrgsz)
> | +					rgt->length = sdp->device.length - rgaddr;
> | +				else
> | +					rgt->length = maxrgsz;
> | +				/* This is the last rg */
> | +				nextaddr = 0;
>
> In GFS1, we allowed mix-and-match resource group sizes, but we originally
> designed mkfs.gfs2 to ensure that all rgrps were the same uniform size. This
> usually means some space is wasted at the end of the last resource group.
>
> We did this primarily so that fsck.gfs2 could more easily detect and repair
> damaged resource groups and rindex values. At the time it was designed, I got
> the buy-in of a bunch of developers and we all agreed to it. Since that time,
> I've had to change fsck.gfs2 to take more drastic measures to repair damaged
> resource groups, due to the fact that gfs2_convert can convert a GFS1 file
> system to GFS2, and thus, we can still end up with non-uniform resource groups.
> Many customers were adding storage and doing multiple gfs_grow ops,
> which resulted in metadata sets where the rgrps and rindex were complete chaos.
>
> Still, my assumption has always been: If the file system was made by
> mkfs.gfs2, all resource groups (but the first one) are identical in size.
>
> I think gfs2_grow takes some steps to ensure that new rgrps are also created
> using the same size as the current resource groups. If we don't enforce
> that rule, the rindex could once again become chaos, which means our chances
> of rgrp and rindex repair get worse.
>
> Do we still want to enforce this rule?

Good question. I had assumed that we don't have a rule like that as the 
rindex specifies the rg sizes. My next planned mkfs change is to allow 
the journal creation code to ask for a resource group large enough to 
contain all of a journal's data blocks so that they're always a single 
extent. Returning to enforcing the rule would have implications for that 
plan, too.

Andy

> With the improved rgrp repair algorithms in fsck.gfs2, it may not be
> necessary anymore. I'm not trying to be dogmatic; I'm looking for opinions here.
>
> Regards,
>
> Bob Peterson
> Red Hat File Systems
>



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes
  2013-06-06 13:04     ` Andrew Price
@ 2013-06-06 13:17       ` Bob Peterson
  0 siblings, 0 replies; 19+ messages in thread
From: Bob Peterson @ 2013-06-06 13:17 UTC (permalink / raw)
  To: cluster-devel.redhat.com

| > In GFS1, we allowed mix-and-match resource group sizes, but we originally
| > Do we still want to enforce this rule?
| 
| Good question. I had assumed that we don't have a rule like that as the
| rindex specifies the rg sizes. My next planned mkfs change is to allow
| the journal creation code to ask for a resource group large enough to
| contain all of a journal's data blocks so that they're always a single
| extent. Returning to enforcing the rule would have implications for that
| plan, too.
| 
| Andy

Hi,

Some more thoughts to add to this discussion:

First, since we're now trying to align our rgrps to raid stripes, maybe
the rules no longer make sense.

Second, we probably want to change gfs2_grow to ensure that new rgrps
are also placed on the same stripe boundaries. If we enforce the uniform
rgrp rule, this would probably happen automatically. If we don't, I bet
gfs2_grow would need to change.

Bob Peterson
Red Hat File Systems

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes
  2013-06-06 12:57   ` Bob Peterson
  2013-06-06 13:04     ` Andrew Price
@ 2013-06-06 13:11     ` Steven Whitehouse
  2013-06-06 13:30       ` Bob Peterson
  2013-06-06 15:17       ` Andrew Price
  1 sibling, 2 replies; 19+ messages in thread
From: Steven Whitehouse @ 2013-06-06 13:11 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Thu, 2013-06-06 at 08:57 -0400, Bob Peterson wrote:
> Hi,
> 
> | +			/* Squeeze the last 1 or 2 rgs into the remaining space */
> | +			if ((nextaddr < sdp->device.length) && (sdp->device.length - nextaddr >=
> | minrgsz)) {
> | +				rglen = sdp->device.length - nextaddr;
> | +			} else {
> | +				if (sdp->device.length - rgaddr <= maxrgsz)
> | +					rgt->length = sdp->device.length - rgaddr;
> | +				else
> | +					rgt->length = maxrgsz;
> | +				/* This is the last rg */
> | +				nextaddr = 0;
> 
> In GFS1, we allowed mix-and-match resource group sizes, but we originally
> designed mkfs.gfs2 to ensure that all rgrps were the same uniform size. This
> usually means some space is wasted at the end of the last resource group.
> 
> We did this primarily so that fsck.gfs2 could more easily detect and repair
> damaged resource groups and rindex values. At the time it was designed, I got
> the buy-in of a bunch of developers and we all agreed to it. Since that time,
> I've had to change fsck.gfs2 to take more drastic measures to repair damaged
> resource groups, due to the fact that gfs2_convert can convert a GFS1 file
> system to GFS2, and thus, we can still end up with non-uniform resource groups.
> Many customers were adding storage and doing multiple gfs_grow ops,
> which resulted in metadata sets where the rgrps and rindex were complete chaos.
> 
> Still, my assumption has always been: If the file system was made by
> mkfs.gfs2, all resource groups (but the first one) are identical in size.
> 
> I think gfs2_grow takes some steps to ensure that new rgrps are also created
> using the same size as the current resource groups. If we don't enforce
> that rule, the rindex could once again become chaos, which means our chances
> of rgrp and rindex repair get worse.
> 
> Do we still want to enforce this rule?
> 
> With the improved rgrp repair algorithms in fsck.gfs2, it may not be
> necessary anymore. I'm not trying to be dogmatic; I'm looking for opinions here.
> 
> Regards,
> 
> Bob Peterson
> Red Hat File Systems
> 

It has never been valid to assume that all the rgrps are the same size.
It may be useful as a hint, but we should not be relying on that being
true. Obviously it makes sense to try and keep them to an even spacing
where possible but we must allow for them to be placed and sized
independently as required for alignment, etc.

There are some restrictions on rgrp length as they need to be aligned
such as to give an integer number of bitmap bytes, and in fact it would
be good if that could be further enforced to ensure that all rgrp
bitmaps are an integer number of 64 bit words in length (so a multiple
of 32 blocks, excluding the headers). So depending on the various
restrictions, there may be a few unused blocks between rgrps in some
cases.

It would be worth abstracting the details about alignment from mkfs in
due course, so that fsck also has access to the same information about
where rgrps are likely to have been put, I think.

Some further thoughts:

 - Would it be useful to introduce a flag to show the source of an rgrp
(whether mkfs or gfs2_grow) ?
 - Would it be useful to add a creation date stamp to each rgrp so that
we can see when things have happened in the past?

There are probably spare fields we can use for this kind of thing,
without needing to breack backwards compatibility,

Steve.




^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes
  2013-06-06 13:11     ` Steven Whitehouse
@ 2013-06-06 13:30       ` Bob Peterson
  2013-06-06 15:17       ` Andrew Price
  1 sibling, 0 replies; 19+ messages in thread
From: Bob Peterson @ 2013-06-06 13:30 UTC (permalink / raw)
  To: cluster-devel.redhat.com

----- Original Message -----
| It has never been valid to assume that all the rgrps are the same size.

Well, as I said: due to gfs2_convert, we can't enforce it. However,
gfs2_grow and fsck.gfs2 were originally designed under that assumption.
Today's gfs2_grow may still be assuming it, which might be wrong, and
perhaps should be investigated. (It's been too long since I've looked at it.)
My concern here is that gfs2_grow operates properly on a gfs2_converted
file system from gfs1 with non-uniform rgrps, especially if the Nth rgrp
and N-1th rgrp are different sizes; could damage result? Worth checking.

|  - Would it be useful to introduce a flag to show the source of an rgrp
| (whether mkfs or gfs2_grow) ?

Yes, good idea.

|  - Would it be useful to add a creation date stamp to each rgrp so that
| we can see when things have happened in the past?

Another good idea.

Bob

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes
  2013-06-06 13:11     ` Steven Whitehouse
  2013-06-06 13:30       ` Bob Peterson
@ 2013-06-06 15:17       ` Andrew Price
  1 sibling, 0 replies; 19+ messages in thread
From: Andrew Price @ 2013-06-06 15:17 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On 06/06/13 14:11, Steven Whitehouse wrote:
> It has never been valid to assume that all the rgrps are the same size.
> It may be useful as a hint, but we should not be relying on that being
> true. Obviously it makes sense to try and keep them to an even spacing
> where possible but we must allow for them to be placed and sized
> independently as required for alignment, etc.
>
> There are some restrictions on rgrp length as they need to be aligned
> such as to give an integer number of bitmap bytes, and in fact it would
> be good if that could be further enforced to ensure that all rgrp
> bitmaps are an integer number of 64 bit words in length (so a multiple
> of 32 blocks, excluding the headers). So depending on the various
> restrictions, there may be a few unused blocks between rgrps in some
> cases.

This is on my to-do list and should be a case of changing the "& 
~(uint32_t)3" parts of rgblocks2bitblocks() but I wanted to make sure 
that won't break fsck for older file systems first.

> It would be worth abstracting the details about alignment from mkfs in
> due course, so that fsck also has access to the same information about
> where rgrps are likely to have been put, I think.

Yes, I need to move the new rgrp creation bits from mkfs into libgfs2 
now so that's going to mean updating the other tools to use them 
eventually (hold onto your hats!).

> Some further thoughts:
>
>   - Would it be useful to introduce a flag to show the source of an rgrp
> (whether mkfs or gfs2_grow) ?
>   - Would it be useful to add a creation date stamp to each rgrp so that
> we can see when things have happened in the past?

Both sound good to me.

Andy



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 3/4] mkfs.gfs2: Create new resource groups on-demand
  2013-06-06 12:03 [Cluster-devel] [PATCH 1/4] mkfs.gfs2: Set sunit and swidth from probed io limits Andrew Price
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes Andrew Price
@ 2013-06-06 12:03 ` Andrew Price
  2013-06-06 13:07   ` Bob Peterson
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 4/4] mkfs.gfs2: Add align option and update docs Andrew Price
  2 siblings, 1 reply; 19+ messages in thread
From: Andrew Price @ 2013-06-06 12:03 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Adds a structure to encapsulate the state of resource group creation so
that the new rg_append() function can be called at any time. rg_append()
takes the number of data blocks required as a parameter so that future
enhancements can request a given number of free data blocks.

place_rgrps() now becomes a simple loop to call rg_append() and
writerg() until the end of the device.

Signed-off-by: Andrew Price <anprice@redhat.com>
---
 gfs2/mkfs/main_mkfs.c | 250 ++++++++++++++++++++++++++++----------------------
 1 file changed, 139 insertions(+), 111 deletions(-)

diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
index 058e4fa..dcfb032 100644
--- a/gfs2/mkfs/main_mkfs.c
+++ b/gfs2/mkfs/main_mkfs.c
@@ -134,6 +134,25 @@ struct mkfs_dev {
 	unsigned int got_topol:1;
 };
 
+/**
+ * A representation of the state of resource group calculation. Allows mkfs to create
+ * resource groups at any point instead of creating them all in one batch.
+ */
+struct mkfs_rgs {
+	struct osi_root *root;
+	uint64_t nextaddr;
+	unsigned bsize;
+	unsigned long align;
+	unsigned long align_off;
+	unsigned long curr_offset;
+	uint64_t maxrgsz;
+	uint64_t minrgsz;
+	uint64_t devlen;
+	uint64_t rgsize;
+	uint64_t count;
+	uint64_t blks_total;
+};
+
 static void opts_init(struct mkfs_opts *opts)
 {
 	memset(opts, 0, sizeof(*opts));
@@ -499,12 +518,6 @@ static void opts_check(struct mkfs_opts *opts)
 		fprintf(stderr, _("Stripe unit and stripe width must be specified together\n"));
 		exit(1);
 	}
-
-	if (opts->got_sunit && (opts->swidth % opts->sunit)) {
-		fprintf(stderr, _("Stripe width (%lu) must be a multiple of stripe unit (%lu)\n"),
-				opts->swidth, opts->sunit);
-		exit(1);
-	}
 }
 
 static void print_results(struct gfs2_sbd *sdp, uint64_t real_device_size,
@@ -587,96 +600,125 @@ static uint64_t align_block(const uint64_t base, const uint64_t align, const uin
 	return base;
 }
 
-static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts, const struct mkfs_dev *dev)
+static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_dev *dev, struct gfs2_sbd *sdp)
 {
-	struct rgrp_tree *rgt = NULL;
-	uint64_t rgaddr = 0;
-	uint64_t nextaddr = 0;
-	uint64_t rglen = (sdp->rgsize << 20) / sdp->bsize;
-	const uint64_t maxrgsz = (GFS2_MAX_RGSIZE << 20) / sdp->bsize;
-	const uint64_t minrgsz = (GFS2_MIN_RGSIZE << 20) / sdp->bsize;
-	unsigned sunit_blocks = opts->sunit / sdp->bsize;
-	unsigned swidth_blocks = opts->swidth / opts->bsize;
-	unsigned stripe_offset = 0;
-	int err = 0;
-
-	sdp->new_rgrps = 0;
-	rgaddr = align_block(sdp->sb_addr + 1, swidth_blocks, sdp->bsize);
-
-	while (rgaddr > 0) {
-		rgt = rgrp_insert(&sdp->rgtree, rgaddr);
-		if (rgt == NULL)
-			return -1;
-
-		stripe_offset += sunit_blocks;
-		if (stripe_offset >= swidth_blocks)
-			stripe_offset = 0;
-
-		/* The next rg might not fit into the remaining space so calculate it now
-		   in order to make decisions about the current rg */
-		nextaddr = align_block(rgaddr + rglen, swidth_blocks, sdp->bsize) + stripe_offset;
-		if (!opts->got_rgsize && (nextaddr - rgaddr) <= maxrgsz)
-			/* Use up gap left by alignment if possible */
-			rgt->length = nextaddr - rgaddr;
-		else
-			rgt->length = rglen;
-
-		/* If the next rg would overflow the device, either shrink it or expand
-		   the current rg to use the remaining space */
-		if (nextaddr + rglen > sdp->device.length) {
-			/* Squeeze the last 1 or 2 rgs into the remaining space */
-			if ((nextaddr < sdp->device.length) && (sdp->device.length - nextaddr >= minrgsz)) {
-				rglen = sdp->device.length - nextaddr;
-			} else {
-				if (sdp->device.length - rgaddr <= maxrgsz)
-					rgt->length = sdp->device.length - rgaddr;
-				else
-					rgt->length = maxrgsz;
-				/* This is the last rg */
-				nextaddr = 0;
-			}
+	memset(rgs, 0, sizeof(*rgs));
+	if (opts->got_sunit) {
+		if ((opts->sunit % sdp->bsize) != 0) {
+			fprintf(stderr, _("Stripe unit (%lu) must be a multiple of block size (%u)\n"),
+			        opts->sunit, sdp->bsize);
+			exit(1);
+		} else if ((opts->swidth % opts->sunit) != 0) {
+			fprintf(stderr, _("Stripe width (%lu) must be a multiple of stripe unit (%lu)\n"),
+			        opts->swidth, opts->sunit);
+			exit(1);
+		} else {
+			rgs->align = opts->swidth / sdp->bsize;
+			rgs->align_off = opts->sunit / sdp->bsize;
 		}
-
-		/* Build the rindex entry */
-		rgt->ri.ri_length = rgblocks2bitblocks(sdp->bsize, rgt->length, &rgt->ri.ri_data);
-		rgt->ri.ri_addr = rgaddr;
-		rgt->ri.ri_data0 = rgaddr + rgt->ri.ri_length;
-		rgt->ri.ri_bitbytes = rgt->ri.ri_data / GFS2_NBBY;
-
-		/* Build the rgrp header */
-		memset(&rgt->rg, 0, sizeof(rgt->rg));
-		rgt->rg.rg_header.mh_magic = GFS2_MAGIC;
-		rgt->rg.rg_header.mh_type = GFS2_METATYPE_RG;
-		rgt->rg.rg_header.mh_format = GFS2_FORMAT_RG;
-		rgt->rg.rg_free = rgt->ri.ri_data;
-
-		if (opts->debug) {
-			gfs2_rindex_print(&rgt->ri);
-			printf(" stripe_offset: %u\n", stripe_offset);
+	} else {
+		if ((dev->minimum_io_size > dev->physical_sector_size) &&
+		    (dev->optimal_io_size > dev->physical_sector_size)) {
+			rgs->align = dev->optimal_io_size / sdp->bsize;
+			rgs->align_off = dev->minimum_io_size / sdp->bsize;
 		}
+	}
+	rgs->bsize = sdp->bsize;
+	rgs->maxrgsz = (GFS2_MAX_RGSIZE << 20) / sdp->bsize;
+	rgs->minrgsz = (GFS2_MIN_RGSIZE << 20) / sdp->bsize;
+	rgs->nextaddr = align_block(sdp->sb_addr + 1, rgs->align, sdp->bsize);
+	rgs->rgsize = (sdp->rgsize << 20) / sdp->bsize;
+	rgs->devlen = sdp->device.length;
+	rgs->root = &sdp->rgtree;
+}
 
-		/* TODO: This call allocates buffer heads and bitmap pointers
-		 * in rgt. We really shouldn't need to do that. */
-		err = gfs2_compute_bitstructs(sdp->bsize, rgt);
-		if (err != 0) {
-			fprintf(stderr, _("Could not compute bitmaps. "
-			        "Check resource group and block size options.\n"));
-			return -1;
+static unsigned rgsize_for_data(uint64_t blksreq, unsigned bsize)
+{
+        const uint32_t blks_rgrp = GFS2_NBBY * (bsize - sizeof(struct gfs2_rgrp));
+        const uint32_t blks_meta = GFS2_NBBY * (bsize - sizeof(struct gfs2_meta_header));
+	unsigned bitblocks = 1;
+	if (blksreq > blks_rgrp)
+		bitblocks += ((blksreq - blks_rgrp) + blks_meta) / blks_meta;
+	return bitblocks + blksreq;
+}
+
+static struct rgrp_tree *rg_append(struct mkfs_rgs *rgs, const struct mkfs_opts *opts, uint64_t freerq)
+{
+	int err = 0;
+	uint64_t length = rgsize_for_data(freerq, rgs->bsize);
+	struct rgrp_tree *rgt = rgrp_insert(rgs->root, rgs->nextaddr);
+	if (rgt == NULL)
+		return NULL;
+
+	rgs->curr_offset += rgs->align_off;
+	if (rgs->curr_offset >= rgs->align)
+		rgs->curr_offset = 0;
+
+	if (rgs->rgsize > length)
+		length = rgs->rgsize;
+
+	rgs->nextaddr = align_block(rgt->ri.ri_addr + rgs->rgsize, rgs->align, rgs->bsize) + rgs->curr_offset;
+	/* Use up gap left by alignment if possible */
+	if (!opts->got_rgsize && ((rgs->nextaddr - rgt->ri.ri_addr) <= rgs->maxrgsz))
+		length = rgs->nextaddr - rgt->ri.ri_addr;
+
+	if ((rgs->nextaddr + rgs->rgsize) > rgs->devlen) {
+		/* Squeeze the last 1 or 2 rgs into the remaining space */
+		if ((rgs->nextaddr < rgs->devlen) && ((rgs->devlen - rgs->nextaddr) >= rgs->minrgsz)) {
+			rgs->rgsize = rgs->devlen - rgs->nextaddr;
+		} else {
+			if (rgs->devlen - rgt->ri.ri_addr <= rgs->maxrgsz)
+				length = rgs->devlen - rgt->ri.ri_addr;
+			else
+				length = rgs->maxrgsz;
+			/* This is the last rg */
+			rgs->nextaddr = 0;
 		}
+	}
+
+	rgt->ri.ri_length = rgblocks2bitblocks(rgs->bsize, length, &rgt->ri.ri_data);
+	rgt->ri.ri_data0 = rgt->ri.ri_addr + rgt->ri.ri_length;
+	rgt->ri.ri_bitbytes = rgt->ri.ri_data / GFS2_NBBY;
+	rgt->rg.rg_header.mh_magic = GFS2_MAGIC;
+	rgt->rg.rg_header.mh_type = GFS2_METATYPE_RG;
+	rgt->rg.rg_header.mh_format = GFS2_FORMAT_RG;
+	rgt->rg.rg_free = rgt->ri.ri_data;
 
-		err = writerg(sdp->device_fd, rgt, sdp->bsize);
+	if (opts->debug) {
+		gfs2_rindex_print(&rgt->ri);
+		printf(" offset: %"PRIu64"\n", rgs->curr_offset);
+	}
+
+	err = gfs2_compute_bitstructs(rgs->bsize, rgt);
+	if (err != 0) {
+		fprintf(stderr, _("Could not compute bitmaps. "
+			"Check resource group and block size options.\n"));
+		return NULL;
+	}
+	rgs->blks_total += rgt->ri.ri_data;
+	rgs->count++;
+	return rgt;
+}
+
+static uint64_t place_rgrps(struct mkfs_rgs *rgs, int fd, const struct mkfs_opts *opts, const struct mkfs_dev *dev)
+{
+	int err = 0;
+	struct rgrp_tree *rgt = NULL;
+
+	while (rgs->nextaddr > 0) {
+		rgt = rg_append(rgs, opts, 0);
+		if (rgt == NULL) {
+			perror(_("Failed to create resource group"));
+			return 0;
+		}
+		err = writerg(fd, rgt, rgs->bsize);
 		if (err != 0) {
 			perror(_("Failed to write resource group"));
-			return -1;
+			return 0;
 		}
-		sdp->new_rgrps++;
-		sdp->blks_total += rgt->ri.ri_data;
-		rgaddr = nextaddr;
 	}
 
-	sdp->rgrps = sdp->new_rgrps;
-	sdp->fssize = rgt->ri.ri_data0 + rgt->ri.ri_data;
-	return 0;
+	return rgt->ri.ri_data0 + rgt->ri.ri_data;
 }
 
 static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_dev *dev, unsigned bsize)
@@ -793,29 +835,12 @@ static void open_dev(const char *path, struct mkfs_dev *dev)
 		exit(1);
 }
 
-static void opts_set_stripe(struct mkfs_opts *opts, const struct mkfs_dev *dev, unsigned bsize)
-{
-	if (!opts->got_swidth && dev->optimal_io_size > dev->physical_sector_size) {
-		opts->swidth = dev->optimal_io_size;
-		opts->got_swidth = 1;
-	}
-
-	if (!opts->got_sunit && dev->minimum_io_size > dev->physical_sector_size) {
-		opts->sunit = dev->minimum_io_size;
-		opts->got_sunit = 1;
-	}
-
-	if (opts->got_sunit && (opts->sunit % bsize) != 0) {
-		fprintf(stderr, "Stripe unit (%lu) is not a multiple of the block size (%u)\n", opts->sunit, bsize);
-		exit(1);
-	}
-}
-
 void main_mkfs(int argc, char *argv[])
 {
 	struct gfs2_sbd sbd;
 	struct mkfs_opts opts;
 	struct mkfs_dev dev;
+	struct mkfs_rgs rgs;
 	int error;
 	unsigned char uuid[16];
 	unsigned bsize;
@@ -826,17 +851,16 @@ void main_mkfs(int argc, char *argv[])
 
 	open_dev(opts.device, &dev);
 	bsize = choose_blocksize(&opts, &dev);
-	opts_set_stripe(&opts, &dev, bsize);
 
 	if (S_ISREG(dev.stat.st_mode)) {
 		opts.got_bsize = 1; /* Use default block size for regular files */
 	}
 
-	warn_of_destruction(opts.device);
-
 	sbd_init(&sbd, &opts, &dev, bsize);
+	rgs_init(&rgs, &opts, &dev, &sbd);
+
 	if (opts.debug) {
-		printf(_("Calculated file system options:\n"));
+		printf(_("File system options:\n"));
 		printf("  bsize = %u\n", sbd.bsize);
 		printf("  qcsize = %u\n", sbd.qcsize);
 		printf("  jsize = %u\n", sbd.jsize);
@@ -847,20 +871,24 @@ void main_mkfs(int argc, char *argv[])
 		printf("  fssize = %"PRIu64"\n", opts.fssize);
 		printf("  sunit = %lu\n", opts.sunit);
 		printf("  swidth = %lu\n", opts.swidth);
-		printf("  rgrp align = %lu+%lu blocks\n", opts.swidth/sbd.bsize, opts.sunit/sbd.bsize);
+		printf("  rgrp align = %lu+%lu blocks\n", rgs.align, rgs.align_off);
 	}
 
+	warn_of_destruction(opts.device);
+
 	if (opts.confirm && !opts.override)
 		are_you_sure();
 
 	if (!S_ISREG(dev.stat.st_mode) && opts.discard)
-		discard_blocks(dev.fd, sbd.bsize * sbd.device.length, opts.debug);
+		discard_blocks(dev.fd, dev.size, opts.debug);
 
-	error = place_rgrps(&sbd, &opts, &dev);
-	if (error) {
+	sbd.fssize = place_rgrps(&rgs, sbd.device_fd, &opts, &dev);
+	if (sbd.fssize == 0) {
 		fprintf(stderr, _("Failed to build resource groups\n"));
 		exit(1);
 	}
+	sbd.blks_total = rgs.blks_total;
+	sbd.rgrps = rgs.count;
 	build_root(&sbd);
 	build_master(&sbd);
 	error = build_jindex(&sbd);
-- 
1.8.1.4



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 3/4] mkfs.gfs2: Create new resource groups on-demand
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 3/4] mkfs.gfs2: Create new resource groups on-demand Andrew Price
@ 2013-06-06 13:07   ` Bob Peterson
  2013-06-06 13:50     ` Andrew Price
  0 siblings, 1 reply; 19+ messages in thread
From: Bob Peterson @ 2013-06-06 13:07 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

| +static unsigned rgsize_for_data(uint64_t blksreq, unsigned bsize)
| +{
| +        const uint32_t blks_rgrp = GFS2_NBBY * (bsize - sizeof(struct
| gfs2_rgrp));
| +        const uint32_t blks_meta = GFS2_NBBY * (bsize - sizeof(struct
| gfs2_meta_header));
| +	unsigned bitblocks = 1;
| +	if (blksreq > blks_rgrp)
| +		bitblocks += ((blksreq - blks_rgrp) + blks_meta) / blks_meta;

It looks like this may be rounding up to blks_meta, so shouldn't this be:
  +		bitblocks += ((blksreq - blks_rgrp) + blks_meta - 1) / blks_meta;

Regards,

Bob Peterson
Red Hat File Systems



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 3/4] mkfs.gfs2: Create new resource groups on-demand
  2013-06-06 13:07   ` Bob Peterson
@ 2013-06-06 13:50     ` Andrew Price
  0 siblings, 0 replies; 19+ messages in thread
From: Andrew Price @ 2013-06-06 13:50 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On 06/06/13 14:07, Bob Peterson wrote:
> Hi,
>
> | +static unsigned rgsize_for_data(uint64_t blksreq, unsigned bsize)
> | +{
> | +        const uint32_t blks_rgrp = GFS2_NBBY * (bsize - sizeof(struct
> | gfs2_rgrp));
> | +        const uint32_t blks_meta = GFS2_NBBY * (bsize - sizeof(struct
> | gfs2_meta_header));
> | +	unsigned bitblocks = 1;
> | +	if (blksreq > blks_rgrp)
> | +		bitblocks += ((blksreq - blks_rgrp) + blks_meta) / blks_meta;
>
> It looks like this may be rounding up to blks_meta, so shouldn't this be:
>    +		bitblocks += ((blksreq - blks_rgrp) + blks_meta - 1) / blks_meta;

[Some mental arithmetic later...]

Good catch, thanks :)

Andy



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 4/4] mkfs.gfs2: Add align option and update docs
  2013-06-06 12:03 [Cluster-devel] [PATCH 1/4] mkfs.gfs2: Set sunit and swidth from probed io limits Andrew Price
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes Andrew Price
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 3/4] mkfs.gfs2: Create new resource groups on-demand Andrew Price
@ 2013-06-06 12:03 ` Andrew Price
  2013-06-06 12:15   ` Steven Whitehouse
  2013-06-06 13:13   ` Bob Peterson
  2 siblings, 2 replies; 19+ messages in thread
From: Andrew Price @ 2013-06-06 12:03 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Add an 'align' extended option for enabling and disabling resource group
alignment and update the mkfs.gfs2 man page to document the new extended
options.

Signed-off-by: Andrew Price <anprice@redhat.com>
---
 gfs2/man/mkfs.gfs2.8  | 31 +++++++++++++++++++++++++++++++
 gfs2/mkfs/main_mkfs.c | 47 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/gfs2/man/mkfs.gfs2.8 b/gfs2/man/mkfs.gfs2.8
index 4613305..ceb6f38 100644
--- a/gfs2/man/mkfs.gfs2.8
+++ b/gfs2/man/mkfs.gfs2.8
@@ -48,6 +48,37 @@ storage).
 This option prevents gfs2_mkfs from asking for confirmation before writing
 the filesystem.
 .TP
+\fB-o\fP
+Specify extended options. Multiple options can be separated by commas. Valid extended options are:
+.RS 1.0i
+.TP
+.BI help
+Display an extended options help summary, then exit.
+.TP
+.BI sunit= bytes
+This is used to specify the stripe unit for a RAID device or striped logical
+volume.  This option ensures that resource groups will be stripe unit aligned
+and overrides the stripe unit value obtained by probing the device. This value
+must be a multiple of the file system block size and must be specified with the
+.I swidth
+option.
+.TP
+.BI swidth= bytes
+This is used to specify the stripe width for a RAID device or striped logical
+volume.  This option ensures that resource groups will be stripe aligned and overrides the stripe width value obtained by probing the device. This value must be a multiple of the
+.I sunit
+option and must also be specified with it.
+.TP
+.BI align= [0|1]
+Disable or enable the alignment of resource groups. The default behaviour is to
+align resource groups to the stripe width and stripe unit values obtained from
+probing the device or specified with the
+.I swidth
+and
+.I sunit
+extended options.
+.RE
+.TP
 \fB-p\fP \fILockProtoName\fR 
 LockProtoName is the name of the  locking  protocol to use.  Acceptable
 locking protocols are \fIlock_dlm\fR (for shared storage) or if you are
diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
index dcfb032..fb4b5ca 100644
--- a/gfs2/mkfs/main_mkfs.c
+++ b/gfs2/mkfs/main_mkfs.c
@@ -57,7 +57,7 @@ static void print_usage(const char *prog_name)
 	    "-j", _("<number>"), _("Number of journals"),
 	    "-K", NULL,          _("Don't try to discard unused blocks"),
 	    "-O", NULL,          _("Don't ask for confirmation"),
-	    "-o", _("<key>[=<value>][,...]"), _("Specify extended options"),
+	    "-o", _("<key>[=<value>][,...]"), _("Specify extended options. See '-o help'."),
 	    "-p", _("<name>"),   _("Name of the locking protocol"),
 	    "-q", NULL,          _("Don't print anything"),
 	    "-r", _("<size>"),   _("Size of resource groups, in megabytes"),
@@ -80,6 +80,22 @@ static void print_usage(const char *prog_name)
 	}
 }
 
+static void print_ext_opts(void)
+{
+	int i;
+	const char *options[] = {
+		"help", _("Display this help, then exit"),
+		"swidth=N",  _("Specify the stripe width of the device, overriding probed values"),
+		"sunit=N", _("Specify the stripe unit of the device, overriding probed values"),
+		"align=[0|1]", _("Disable or enable alignment of resource groups"),
+		NULL, NULL
+	};
+	printf(_("Extended options:\n"));
+	for (i = 0; options[i] != NULL; i+=2) {
+		printf("%15s  %-22s\n", options[i], options[i+1]);
+	}
+}
+
 struct mkfs_opts {
 	unsigned bsize;
 	unsigned qcsize;
@@ -111,6 +127,7 @@ struct mkfs_opts {
 	unsigned expert:1;
 	unsigned debug:1;
 	unsigned confirm:1;
+	unsigned align:1;
 };
 
 /**
@@ -165,6 +182,7 @@ static void opts_init(struct mkfs_opts *opts)
 	opts->lockproto = "lock_dlm";
 	opts->locktable = "";
 	opts->confirm = 1;
+	opts->align = 1;
 }
 
 #ifndef BLKDISCARD
@@ -250,6 +268,18 @@ static unsigned long parse_ulong(struct mkfs_opts *opts, const char *key, const
 	return (unsigned long)l;
 }
 
+static unsigned parse_bool(struct mkfs_opts *opts, const char *key, const char *val)
+{
+	if (strnlen(val, 2) == 1) {
+		if (*val == '0')
+			return 0;
+		if (*val == '1')
+			return 1;
+	}
+	fprintf(stderr, _("Option '%s' must be either 1 or 0\n"), key);
+	exit(-1);
+}
+
 static void opt_parse_extended(char *str, struct mkfs_opts *opts)
 {
 	char *opt;
@@ -266,6 +296,11 @@ static void opt_parse_extended(char *str, struct mkfs_opts *opts)
 		} else if (strcmp("swidth", key) == 0) {
 			opts->swidth = parse_ulong(opts, "swidth", val);
 			opts->got_swidth = 1;
+		} else if (strcmp("align", key) == 0) {
+			opts->align = parse_bool(opts, "align", val);
+		} else if (strcmp("help", key) == 0) {
+			print_ext_opts();
+			exit(0);
 		} else {
 			fprintf(stderr, _("Invalid option '%s'\n"), key);
 			exit(-1);
@@ -603,7 +638,7 @@ static uint64_t align_block(const uint64_t base, const uint64_t align, const uin
 static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_dev *dev, struct gfs2_sbd *sdp)
 {
 	memset(rgs, 0, sizeof(*rgs));
-	if (opts->got_sunit) {
+	if (opts->align && opts->got_sunit) {
 		if ((opts->sunit % sdp->bsize) != 0) {
 			fprintf(stderr, _("Stripe unit (%lu) must be a multiple of block size (%u)\n"),
 			        opts->sunit, sdp->bsize);
@@ -616,7 +651,7 @@ static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_d
 			rgs->align = opts->swidth / sdp->bsize;
 			rgs->align_off = opts->sunit / sdp->bsize;
 		}
-	} else {
+	} else if (opts->align) {
 		if ((dev->minimum_io_size > dev->physical_sector_size) &&
 		    (dev->optimal_io_size > dev->physical_sector_size)) {
 			rgs->align = dev->optimal_io_size / sdp->bsize;
@@ -871,7 +906,11 @@ void main_mkfs(int argc, char *argv[])
 		printf("  fssize = %"PRIu64"\n", opts.fssize);
 		printf("  sunit = %lu\n", opts.sunit);
 		printf("  swidth = %lu\n", opts.swidth);
-		printf("  rgrp align = %lu+%lu blocks\n", rgs.align, rgs.align_off);
+		printf("  rgrp align = ");
+		if (opts.align)
+			printf("%lu+%lu blocks\n", rgs.align, rgs.align_off);
+		else
+			printf("(disabled)\n");
 	}
 
 	warn_of_destruction(opts.device);
-- 
1.8.1.4



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 4/4] mkfs.gfs2: Add align option and update docs
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 4/4] mkfs.gfs2: Add align option and update docs Andrew Price
@ 2013-06-06 12:15   ` Steven Whitehouse
  2013-06-06 12:45     ` Andrew Price
  2013-06-06 13:13   ` Bob Peterson
  1 sibling, 1 reply; 19+ messages in thread
From: Steven Whitehouse @ 2013-06-06 12:15 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Thu, 2013-06-06 at 13:03 +0100, Andrew Price wrote:
> Add an 'align' extended option for enabling and disabling resource group
> alignment and update the mkfs.gfs2 man page to document the new extended
> options.
> 
> Signed-off-by: Andrew Price <anprice@redhat.com>
> ---
>  gfs2/man/mkfs.gfs2.8  | 31 +++++++++++++++++++++++++++++++
>  gfs2/mkfs/main_mkfs.c | 47 +++++++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 74 insertions(+), 4 deletions(-)
> 
> diff --git a/gfs2/man/mkfs.gfs2.8 b/gfs2/man/mkfs.gfs2.8
> index 4613305..ceb6f38 100644
> --- a/gfs2/man/mkfs.gfs2.8
> +++ b/gfs2/man/mkfs.gfs2.8
> @@ -48,6 +48,37 @@ storage).
>  This option prevents gfs2_mkfs from asking for confirmation before writing
>  the filesystem.
>  .TP
> +\fB-o\fP
> +Specify extended options. Multiple options can be separated by commas. Valid extended options are:
> +.RS 1.0i
> +.TP
> +.BI help
> +Display an extended options help summary, then exit.
> +.TP
> +.BI sunit= bytes
> +This is used to specify the stripe unit for a RAID device or striped logical
> +volume.  This option ensures that resource groups will be stripe unit aligned
> +and overrides the stripe unit value obtained by probing the device. This value
> +must be a multiple of the file system block size and must be specified with the
> +.I swidth
> +option.
> +.TP
> +.BI swidth= bytes
> +This is used to specify the stripe width for a RAID device or striped logical
> +volume.  This option ensures that resource groups will be stripe aligned and overrides the stripe width value obtained by probing the device. This value must be a multiple of the
> +.I sunit
> +option and must also be specified with it.
> +.TP
> +.BI align= [0|1]
> +Disable or enable the alignment of resource groups. The default behaviour is to
> +align resource groups to the stripe width and stripe unit values obtained from
> +probing the device or specified with the
> +.I swidth
> +and
> +.I sunit
> +extended options.
> +.RE
> +.TP
>  \fB-p\fP \fILockProtoName\fR 
>  LockProtoName is the name of the  locking  protocol to use.  Acceptable
>  locking protocols are \fIlock_dlm\fR (for shared storage) or if you are
> diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
> index dcfb032..fb4b5ca 100644
> --- a/gfs2/mkfs/main_mkfs.c
> +++ b/gfs2/mkfs/main_mkfs.c
> @@ -57,7 +57,7 @@ static void print_usage(const char *prog_name)
>  	    "-j", _("<number>"), _("Number of journals"),
>  	    "-K", NULL,          _("Don't try to discard unused blocks"),
>  	    "-O", NULL,          _("Don't ask for confirmation"),
> -	    "-o", _("<key>[=<value>][,...]"), _("Specify extended options"),
> +	    "-o", _("<key>[=<value>][,...]"), _("Specify extended options. See '-o help'."),
>  	    "-p", _("<name>"),   _("Name of the locking protocol"),
>  	    "-q", NULL,          _("Don't print anything"),
>  	    "-r", _("<size>"),   _("Size of resource groups, in megabytes"),
> @@ -80,6 +80,22 @@ static void print_usage(const char *prog_name)
>  	}
>  }
>  
> +static void print_ext_opts(void)
> +{
> +	int i;
> +	const char *options[] = {
> +		"help", _("Display this help, then exit"),
> +		"swidth=N",  _("Specify the stripe width of the device, overriding probed values"),
> +		"sunit=N", _("Specify the stripe unit of the device, overriding probed values"),
> +		"align=[0|1]", _("Disable or enable alignment of resource groups"),
I don't think that this is going to work with gettext since the _()
implies a function call,

Steve.

> +		NULL, NULL
> +	};
> +	printf(_("Extended options:\n"));
> +	for (i = 0; options[i] != NULL; i+=2) {
> +		printf("%15s  %-22s\n", options[i], options[i+1]);
> +	}
> +}
> +
>  struct mkfs_opts {
>  	unsigned bsize;
>  	unsigned qcsize;
> @@ -111,6 +127,7 @@ struct mkfs_opts {
>  	unsigned expert:1;
>  	unsigned debug:1;
>  	unsigned confirm:1;
> +	unsigned align:1;
>  };
>  
>  /**
> @@ -165,6 +182,7 @@ static void opts_init(struct mkfs_opts *opts)
>  	opts->lockproto = "lock_dlm";
>  	opts->locktable = "";
>  	opts->confirm = 1;
> +	opts->align = 1;
>  }
>  
>  #ifndef BLKDISCARD
> @@ -250,6 +268,18 @@ static unsigned long parse_ulong(struct mkfs_opts *opts, const char *key, const
>  	return (unsigned long)l;
>  }
>  
> +static unsigned parse_bool(struct mkfs_opts *opts, const char *key, const char *val)
> +{
> +	if (strnlen(val, 2) == 1) {
> +		if (*val == '0')
> +			return 0;
> +		if (*val == '1')
> +			return 1;
> +	}
> +	fprintf(stderr, _("Option '%s' must be either 1 or 0\n"), key);
> +	exit(-1);
> +}
> +
>  static void opt_parse_extended(char *str, struct mkfs_opts *opts)
>  {
>  	char *opt;
> @@ -266,6 +296,11 @@ static void opt_parse_extended(char *str, struct mkfs_opts *opts)
>  		} else if (strcmp("swidth", key) == 0) {
>  			opts->swidth = parse_ulong(opts, "swidth", val);
>  			opts->got_swidth = 1;
> +		} else if (strcmp("align", key) == 0) {
> +			opts->align = parse_bool(opts, "align", val);
> +		} else if (strcmp("help", key) == 0) {
> +			print_ext_opts();
> +			exit(0);
>  		} else {
>  			fprintf(stderr, _("Invalid option '%s'\n"), key);
>  			exit(-1);
> @@ -603,7 +638,7 @@ static uint64_t align_block(const uint64_t base, const uint64_t align, const uin
>  static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_dev *dev, struct gfs2_sbd *sdp)
>  {
>  	memset(rgs, 0, sizeof(*rgs));
> -	if (opts->got_sunit) {
> +	if (opts->align && opts->got_sunit) {
>  		if ((opts->sunit % sdp->bsize) != 0) {
>  			fprintf(stderr, _("Stripe unit (%lu) must be a multiple of block size (%u)\n"),
>  			        opts->sunit, sdp->bsize);
> @@ -616,7 +651,7 @@ static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_d
>  			rgs->align = opts->swidth / sdp->bsize;
>  			rgs->align_off = opts->sunit / sdp->bsize;
>  		}
> -	} else {
> +	} else if (opts->align) {
>  		if ((dev->minimum_io_size > dev->physical_sector_size) &&
>  		    (dev->optimal_io_size > dev->physical_sector_size)) {
>  			rgs->align = dev->optimal_io_size / sdp->bsize;
> @@ -871,7 +906,11 @@ void main_mkfs(int argc, char *argv[])
>  		printf("  fssize = %"PRIu64"\n", opts.fssize);
>  		printf("  sunit = %lu\n", opts.sunit);
>  		printf("  swidth = %lu\n", opts.swidth);
> -		printf("  rgrp align = %lu+%lu blocks\n", rgs.align, rgs.align_off);
> +		printf("  rgrp align = ");
> +		if (opts.align)
> +			printf("%lu+%lu blocks\n", rgs.align, rgs.align_off);
> +		else
> +			printf("(disabled)\n");
>  	}
>  
>  	warn_of_destruction(opts.device);




^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 4/4] mkfs.gfs2: Add align option and update docs
  2013-06-06 12:15   ` Steven Whitehouse
@ 2013-06-06 12:45     ` Andrew Price
  2013-06-06 12:53       ` Steven Whitehouse
  0 siblings, 1 reply; 19+ messages in thread
From: Andrew Price @ 2013-06-06 12:45 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On 06/06/13 13:15, Steven Whitehouse wrote:
> Hi,
>
> On Thu, 2013-06-06 at 13:03 +0100, Andrew Price wrote:
>> Add an 'align' extended option for enabling and disabling resource group
>> alignment and update the mkfs.gfs2 man page to document the new extended
>> options.
>>
>> Signed-off-by: Andrew Price <anprice@redhat.com>
>> ---
>>   gfs2/man/mkfs.gfs2.8  | 31 +++++++++++++++++++++++++++++++
>>   gfs2/mkfs/main_mkfs.c | 47 +++++++++++++++++++++++++++++++++++++++++++----
>>   2 files changed, 74 insertions(+), 4 deletions(-)
>>
>> diff --git a/gfs2/man/mkfs.gfs2.8 b/gfs2/man/mkfs.gfs2.8
>> index 4613305..ceb6f38 100644
>> --- a/gfs2/man/mkfs.gfs2.8
>> +++ b/gfs2/man/mkfs.gfs2.8
>> @@ -48,6 +48,37 @@ storage).
>>   This option prevents gfs2_mkfs from asking for confirmation before writing
>>   the filesystem.
>>   .TP
>> +\fB-o\fP
>> +Specify extended options. Multiple options can be separated by commas. Valid extended options are:
>> +.RS 1.0i
>> +.TP
>> +.BI help
>> +Display an extended options help summary, then exit.
>> +.TP
>> +.BI sunit= bytes
>> +This is used to specify the stripe unit for a RAID device or striped logical
>> +volume.  This option ensures that resource groups will be stripe unit aligned
>> +and overrides the stripe unit value obtained by probing the device. This value
>> +must be a multiple of the file system block size and must be specified with the
>> +.I swidth
>> +option.
>> +.TP
>> +.BI swidth= bytes
>> +This is used to specify the stripe width for a RAID device or striped logical
>> +volume.  This option ensures that resource groups will be stripe aligned and overrides the stripe width value obtained by probing the device. This value must be a multiple of the
>> +.I sunit
>> +option and must also be specified with it.
>> +.TP
>> +.BI align= [0|1]
>> +Disable or enable the alignment of resource groups. The default behaviour is to
>> +align resource groups to the stripe width and stripe unit values obtained from
>> +probing the device or specified with the
>> +.I swidth
>> +and
>> +.I sunit
>> +extended options.
>> +.RE
>> +.TP
>>   \fB-p\fP \fILockProtoName\fR
>>   LockProtoName is the name of the  locking  protocol to use.  Acceptable
>>   locking protocols are \fIlock_dlm\fR (for shared storage) or if you are
>> diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
>> index dcfb032..fb4b5ca 100644
>> --- a/gfs2/mkfs/main_mkfs.c
>> +++ b/gfs2/mkfs/main_mkfs.c
>> @@ -57,7 +57,7 @@ static void print_usage(const char *prog_name)
>>   	    "-j", _("<number>"), _("Number of journals"),
>>   	    "-K", NULL,          _("Don't try to discard unused blocks"),
>>   	    "-O", NULL,          _("Don't ask for confirmation"),
>> -	    "-o", _("<key>[=<value>][,...]"), _("Specify extended options"),
>> +	    "-o", _("<key>[=<value>][,...]"), _("Specify extended options. See '-o help'."),
>>   	    "-p", _("<name>"),   _("Name of the locking protocol"),
>>   	    "-q", NULL,          _("Don't print anything"),
>>   	    "-r", _("<size>"),   _("Size of resource groups, in megabytes"),
>> @@ -80,6 +80,22 @@ static void print_usage(const char *prog_name)
>>   	}
>>   }
>>
>> +static void print_ext_opts(void)
>> +{
>> +	int i;
>> +	const char *options[] = {
>> +		"help", _("Display this help, then exit"),
>> +		"swidth=N",  _("Specify the stripe width of the device, overriding probed values"),
>> +		"sunit=N", _("Specify the stripe unit of the device, overriding probed values"),
>> +		"align=[0|1]", _("Disable or enable alignment of resource groups"),
> I don't think that this is going to work with gettext since the _()
> implies a function call,

_() is #defined as gettext() but I'm not sure why it shouldn't work. 
gettext() is being called as the array is initialised, returning char 
pointers which get stored in the array.

Andy

>
> Steve.
>
>> +		NULL, NULL
>> +	};
>> +	printf(_("Extended options:\n"));
>> +	for (i = 0; options[i] != NULL; i+=2) {
>> +		printf("%15s  %-22s\n", options[i], options[i+1]);
>> +	}
>> +}
>> +
>>   struct mkfs_opts {
>>   	unsigned bsize;
>>   	unsigned qcsize;
>> @@ -111,6 +127,7 @@ struct mkfs_opts {
>>   	unsigned expert:1;
>>   	unsigned debug:1;
>>   	unsigned confirm:1;
>> +	unsigned align:1;
>>   };
>>
>>   /**
>> @@ -165,6 +182,7 @@ static void opts_init(struct mkfs_opts *opts)
>>   	opts->lockproto = "lock_dlm";
>>   	opts->locktable = "";
>>   	opts->confirm = 1;
>> +	opts->align = 1;
>>   }
>>
>>   #ifndef BLKDISCARD
>> @@ -250,6 +268,18 @@ static unsigned long parse_ulong(struct mkfs_opts *opts, const char *key, const
>>   	return (unsigned long)l;
>>   }
>>
>> +static unsigned parse_bool(struct mkfs_opts *opts, const char *key, const char *val)
>> +{
>> +	if (strnlen(val, 2) == 1) {
>> +		if (*val == '0')
>> +			return 0;
>> +		if (*val == '1')
>> +			return 1;
>> +	}
>> +	fprintf(stderr, _("Option '%s' must be either 1 or 0\n"), key);
>> +	exit(-1);
>> +}
>> +
>>   static void opt_parse_extended(char *str, struct mkfs_opts *opts)
>>   {
>>   	char *opt;
>> @@ -266,6 +296,11 @@ static void opt_parse_extended(char *str, struct mkfs_opts *opts)
>>   		} else if (strcmp("swidth", key) == 0) {
>>   			opts->swidth = parse_ulong(opts, "swidth", val);
>>   			opts->got_swidth = 1;
>> +		} else if (strcmp("align", key) == 0) {
>> +			opts->align = parse_bool(opts, "align", val);
>> +		} else if (strcmp("help", key) == 0) {
>> +			print_ext_opts();
>> +			exit(0);
>>   		} else {
>>   			fprintf(stderr, _("Invalid option '%s'\n"), key);
>>   			exit(-1);
>> @@ -603,7 +638,7 @@ static uint64_t align_block(const uint64_t base, const uint64_t align, const uin
>>   static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_dev *dev, struct gfs2_sbd *sdp)
>>   {
>>   	memset(rgs, 0, sizeof(*rgs));
>> -	if (opts->got_sunit) {
>> +	if (opts->align && opts->got_sunit) {
>>   		if ((opts->sunit % sdp->bsize) != 0) {
>>   			fprintf(stderr, _("Stripe unit (%lu) must be a multiple of block size (%u)\n"),
>>   			        opts->sunit, sdp->bsize);
>> @@ -616,7 +651,7 @@ static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_d
>>   			rgs->align = opts->swidth / sdp->bsize;
>>   			rgs->align_off = opts->sunit / sdp->bsize;
>>   		}
>> -	} else {
>> +	} else if (opts->align) {
>>   		if ((dev->minimum_io_size > dev->physical_sector_size) &&
>>   		    (dev->optimal_io_size > dev->physical_sector_size)) {
>>   			rgs->align = dev->optimal_io_size / sdp->bsize;
>> @@ -871,7 +906,11 @@ void main_mkfs(int argc, char *argv[])
>>   		printf("  fssize = %"PRIu64"\n", opts.fssize);
>>   		printf("  sunit = %lu\n", opts.sunit);
>>   		printf("  swidth = %lu\n", opts.swidth);
>> -		printf("  rgrp align = %lu+%lu blocks\n", rgs.align, rgs.align_off);
>> +		printf("  rgrp align = ");
>> +		if (opts.align)
>> +			printf("%lu+%lu blocks\n", rgs.align, rgs.align_off);
>> +		else
>> +			printf("(disabled)\n");
>>   	}
>>
>>   	warn_of_destruction(opts.device);
>
>



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 4/4] mkfs.gfs2: Add align option and update docs
  2013-06-06 12:45     ` Andrew Price
@ 2013-06-06 12:53       ` Steven Whitehouse
  0 siblings, 0 replies; 19+ messages in thread
From: Steven Whitehouse @ 2013-06-06 12:53 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

On Thu, 2013-06-06 at 13:45 +0100, Andrew Price wrote:
> On 06/06/13 13:15, Steven Whitehouse wrote:
> > Hi,
> >
> > On Thu, 2013-06-06 at 13:03 +0100, Andrew Price wrote:
> >> Add an 'align' extended option for enabling and disabling resource group
> >> alignment and update the mkfs.gfs2 man page to document the new extended
> >> options.
> >>
> >> Signed-off-by: Andrew Price <anprice@redhat.com>
> >> ---
> >>   gfs2/man/mkfs.gfs2.8  | 31 +++++++++++++++++++++++++++++++
> >>   gfs2/mkfs/main_mkfs.c | 47 +++++++++++++++++++++++++++++++++++++++++++----
> >>   2 files changed, 74 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/gfs2/man/mkfs.gfs2.8 b/gfs2/man/mkfs.gfs2.8
> >> index 4613305..ceb6f38 100644
> >> --- a/gfs2/man/mkfs.gfs2.8
> >> +++ b/gfs2/man/mkfs.gfs2.8
> >> @@ -48,6 +48,37 @@ storage).
> >>   This option prevents gfs2_mkfs from asking for confirmation before writing
> >>   the filesystem.
> >>   .TP
> >> +\fB-o\fP
> >> +Specify extended options. Multiple options can be separated by commas. Valid extended options are:
> >> +.RS 1.0i
> >> +.TP
> >> +.BI help
> >> +Display an extended options help summary, then exit.
> >> +.TP
> >> +.BI sunit= bytes
> >> +This is used to specify the stripe unit for a RAID device or striped logical
> >> +volume.  This option ensures that resource groups will be stripe unit aligned
> >> +and overrides the stripe unit value obtained by probing the device. This value
> >> +must be a multiple of the file system block size and must be specified with the
> >> +.I swidth
> >> +option.
> >> +.TP
> >> +.BI swidth= bytes
> >> +This is used to specify the stripe width for a RAID device or striped logical
> >> +volume.  This option ensures that resource groups will be stripe aligned and overrides the stripe width value obtained by probing the device. This value must be a multiple of the
> >> +.I sunit
> >> +option and must also be specified with it.
> >> +.TP
> >> +.BI align= [0|1]
> >> +Disable or enable the alignment of resource groups. The default behaviour is to
> >> +align resource groups to the stripe width and stripe unit values obtained from
> >> +probing the device or specified with the
> >> +.I swidth
> >> +and
> >> +.I sunit
> >> +extended options.
> >> +.RE
> >> +.TP
> >>   \fB-p\fP \fILockProtoName\fR
> >>   LockProtoName is the name of the  locking  protocol to use.  Acceptable
> >>   locking protocols are \fIlock_dlm\fR (for shared storage) or if you are
> >> diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
> >> index dcfb032..fb4b5ca 100644
> >> --- a/gfs2/mkfs/main_mkfs.c
> >> +++ b/gfs2/mkfs/main_mkfs.c
> >> @@ -57,7 +57,7 @@ static void print_usage(const char *prog_name)
> >>   	    "-j", _("<number>"), _("Number of journals"),
> >>   	    "-K", NULL,          _("Don't try to discard unused blocks"),
> >>   	    "-O", NULL,          _("Don't ask for confirmation"),
> >> -	    "-o", _("<key>[=<value>][,...]"), _("Specify extended options"),
> >> +	    "-o", _("<key>[=<value>][,...]"), _("Specify extended options. See '-o help'."),
> >>   	    "-p", _("<name>"),   _("Name of the locking protocol"),
> >>   	    "-q", NULL,          _("Don't print anything"),
> >>   	    "-r", _("<size>"),   _("Size of resource groups, in megabytes"),
> >> @@ -80,6 +80,22 @@ static void print_usage(const char *prog_name)
> >>   	}
> >>   }
> >>
> >> +static void print_ext_opts(void)
> >> +{
> >> +	int i;
> >> +	const char *options[] = {
> >> +		"help", _("Display this help, then exit"),
> >> +		"swidth=N",  _("Specify the stripe width of the device, overriding probed values"),
> >> +		"sunit=N", _("Specify the stripe unit of the device, overriding probed values"),
> >> +		"align=[0|1]", _("Disable or enable alignment of resource groups"),
> > I don't think that this is going to work with gettext since the _()
> > implies a function call,
> 
> _() is #defined as gettext() but I'm not sure why it shouldn't work. 
> gettext() is being called as the array is initialised, returning char 
> pointers which get stored in the array.
> 
> Andy
> 
Ok, I see now, that should work in that case. Sorry, obviously not
thinking straight today!

Steve.

> >
> > Steve.
> >
> >> +		NULL, NULL
> >> +	};
> >> +	printf(_("Extended options:\n"));
> >> +	for (i = 0; options[i] != NULL; i+=2) {
> >> +		printf("%15s  %-22s\n", options[i], options[i+1]);
> >> +	}
> >> +}
> >> +
> >>   struct mkfs_opts {
> >>   	unsigned bsize;
> >>   	unsigned qcsize;
> >> @@ -111,6 +127,7 @@ struct mkfs_opts {
> >>   	unsigned expert:1;
> >>   	unsigned debug:1;
> >>   	unsigned confirm:1;
> >> +	unsigned align:1;
> >>   };
> >>
> >>   /**
> >> @@ -165,6 +182,7 @@ static void opts_init(struct mkfs_opts *opts)
> >>   	opts->lockproto = "lock_dlm";
> >>   	opts->locktable = "";
> >>   	opts->confirm = 1;
> >> +	opts->align = 1;
> >>   }
> >>
> >>   #ifndef BLKDISCARD
> >> @@ -250,6 +268,18 @@ static unsigned long parse_ulong(struct mkfs_opts *opts, const char *key, const
> >>   	return (unsigned long)l;
> >>   }
> >>
> >> +static unsigned parse_bool(struct mkfs_opts *opts, const char *key, const char *val)
> >> +{
> >> +	if (strnlen(val, 2) == 1) {
> >> +		if (*val == '0')
> >> +			return 0;
> >> +		if (*val == '1')
> >> +			return 1;
> >> +	}
> >> +	fprintf(stderr, _("Option '%s' must be either 1 or 0\n"), key);
> >> +	exit(-1);
> >> +}
> >> +
> >>   static void opt_parse_extended(char *str, struct mkfs_opts *opts)
> >>   {
> >>   	char *opt;
> >> @@ -266,6 +296,11 @@ static void opt_parse_extended(char *str, struct mkfs_opts *opts)
> >>   		} else if (strcmp("swidth", key) == 0) {
> >>   			opts->swidth = parse_ulong(opts, "swidth", val);
> >>   			opts->got_swidth = 1;
> >> +		} else if (strcmp("align", key) == 0) {
> >> +			opts->align = parse_bool(opts, "align", val);
> >> +		} else if (strcmp("help", key) == 0) {
> >> +			print_ext_opts();
> >> +			exit(0);
> >>   		} else {
> >>   			fprintf(stderr, _("Invalid option '%s'\n"), key);
> >>   			exit(-1);
> >> @@ -603,7 +638,7 @@ static uint64_t align_block(const uint64_t base, const uint64_t align, const uin
> >>   static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_dev *dev, struct gfs2_sbd *sdp)
> >>   {
> >>   	memset(rgs, 0, sizeof(*rgs));
> >> -	if (opts->got_sunit) {
> >> +	if (opts->align && opts->got_sunit) {
> >>   		if ((opts->sunit % sdp->bsize) != 0) {
> >>   			fprintf(stderr, _("Stripe unit (%lu) must be a multiple of block size (%u)\n"),
> >>   			        opts->sunit, sdp->bsize);
> >> @@ -616,7 +651,7 @@ static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_d
> >>   			rgs->align = opts->swidth / sdp->bsize;
> >>   			rgs->align_off = opts->sunit / sdp->bsize;
> >>   		}
> >> -	} else {
> >> +	} else if (opts->align) {
> >>   		if ((dev->minimum_io_size > dev->physical_sector_size) &&
> >>   		    (dev->optimal_io_size > dev->physical_sector_size)) {
> >>   			rgs->align = dev->optimal_io_size / sdp->bsize;
> >> @@ -871,7 +906,11 @@ void main_mkfs(int argc, char *argv[])
> >>   		printf("  fssize = %"PRIu64"\n", opts.fssize);
> >>   		printf("  sunit = %lu\n", opts.sunit);
> >>   		printf("  swidth = %lu\n", opts.swidth);
> >> -		printf("  rgrp align = %lu+%lu blocks\n", rgs.align, rgs.align_off);
> >> +		printf("  rgrp align = ");
> >> +		if (opts.align)
> >> +			printf("%lu+%lu blocks\n", rgs.align, rgs.align_off);
> >> +		else
> >> +			printf("(disabled)\n");
> >>   	}
> >>
> >>   	warn_of_destruction(opts.device);
> >
> >
> 




^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 4/4] mkfs.gfs2: Add align option and update docs
  2013-06-06 12:03 ` [Cluster-devel] [PATCH 4/4] mkfs.gfs2: Add align option and update docs Andrew Price
  2013-06-06 12:15   ` Steven Whitehouse
@ 2013-06-06 13:13   ` Bob Peterson
  2013-06-06 13:53     ` Andrew Price
  1 sibling, 1 reply; 19+ messages in thread
From: Bob Peterson @ 2013-06-06 13:13 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,

----- Original Message -----
| +	for (i = 0; options[i] != NULL; i+=2) {

Just a nit here, but I'd recommend adding spaces, as per our standard: "i += 2"

| +		printf("%15s  %-22s\n", options[i], options[i+1]);

Same here: "i + 1"

Regards,

Bob Peterson
Red Hat File Systems



^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Cluster-devel] [PATCH 4/4] mkfs.gfs2: Add align option and update docs
  2013-06-06 13:13   ` Bob Peterson
@ 2013-06-06 13:53     ` Andrew Price
  0 siblings, 0 replies; 19+ messages in thread
From: Andrew Price @ 2013-06-06 13:53 UTC (permalink / raw)
  To: cluster-devel.redhat.com

On 06/06/13 14:13, Bob Peterson wrote:
> Hi,
>
> ----- Original Message -----
> | +	for (i = 0; options[i] != NULL; i+=2) {
>
> Just a nit here, but I'd recommend adding spaces, as per our standard: "i += 2"

Agreed - thanks.

Andy

>
> | +		printf("%15s  %-22s\n", options[i], options[i+1]);
>
> Same here: "i + 1"
>
> Regards,
>
> Bob Peterson
> Red Hat File Systems
>



^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2013-06-06 15:17 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-06-06 12:03 [Cluster-devel] [PATCH 1/4] mkfs.gfs2: Set sunit and swidth from probed io limits Andrew Price
2013-06-06 12:03 ` [Cluster-devel] [PATCH 2/4] mkfs.gfs2: Align resource groups to RAID stripes Andrew Price
2013-06-06 12:06   ` Steven Whitehouse
2013-06-06 12:19     ` Andrew Price
2013-06-06 12:57   ` Bob Peterson
2013-06-06 13:04     ` Andrew Price
2013-06-06 13:17       ` Bob Peterson
2013-06-06 13:11     ` Steven Whitehouse
2013-06-06 13:30       ` Bob Peterson
2013-06-06 15:17       ` Andrew Price
2013-06-06 12:03 ` [Cluster-devel] [PATCH 3/4] mkfs.gfs2: Create new resource groups on-demand Andrew Price
2013-06-06 13:07   ` Bob Peterson
2013-06-06 13:50     ` Andrew Price
2013-06-06 12:03 ` [Cluster-devel] [PATCH 4/4] mkfs.gfs2: Add align option and update docs Andrew Price
2013-06-06 12:15   ` Steven Whitehouse
2013-06-06 12:45     ` Andrew Price
2013-06-06 12:53       ` Steven Whitehouse
2013-06-06 13:13   ` Bob Peterson
2013-06-06 13:53     ` Andrew Price

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).