Linux RAID subsystem development
 help / color / mirror / Atom feed
* reshaping raid6 in-place
@ 2010-05-12 23:10 Frank Corrao
  2010-05-13  2:08 ` Neil Brown
  0 siblings, 1 reply; 5+ messages in thread
From: Frank Corrao @ 2010-05-12 23:10 UTC (permalink / raw)
  To: linux-raid

I have a 5-disk raid5 array which I recently reshaped to raid6 while 
simultaneously adding a 6th disk.  mdadm (3.1.1) reported that the 
kernel I was running at the time (2.6.30) wasn't safe to perform the 
in-place portion of the reshaping.  The array was partitionally 
converted to raid6, but I believe the Q blocks are all on the last 
device I added.  mdadm seems to report the algorithm as 
left-symmetric-6 as opposed to left-symmetric.  Today I upgraded to 
2.6.32 and mdadm 3.1.2 and attempted to complete the re-shaping but ran 
into a snag:

$ sudo ./mdadm -v --grow /dev/md2 -l 6 -n 6 -p left-symmetric
mdadm: /dev/md2: Something wrong - reshape aborted

Is my syntax correct?

Frank





Relevant portion of /proc/mdstat:

md2 : active raid6 sda3[0] sde3[5] sdd3[4] sdf3[3] sdc3[2] sdb3[1]
       5721706496 blocks level 6, 1024k chunk, algorithm 18 [6/6] [UUUUUU]



$ sudo ./mdadm --detail /dev/md2
/dev/md2:
         Version : 0.90
   Creation Time : Mon Oct 19 14:15:33 2009
      Raid Level : raid6
      Array Size : 5721706496 (5456.64 GiB 5859.03 GB)
   Used Dev Size : 1430426624 (1364.16 GiB 1464.76 GB)
    Raid Devices : 6
   Total Devices : 6
Preferred Minor : 2
     Persistence : Superblock is persistent

     Update Time : Wed May 12 19:05:51 2010
           State : clean
  Active Devices : 6
Working Devices : 6
  Failed Devices : 0
   Spare Devices : 0

          Layout : left-symmetric-6
      Chunk Size : 1024K

            UUID : 677f0016:b447937b:afdaf140:4080c53a
          Events : 0.16148

     Number   Major   Minor   RaidDevice State
        0       8        3        0      active sync   /dev/sda3
        1       8       19        1      active sync   /dev/sdb3
        2       8       35        2      active sync   /dev/sdc3
        3       8       83        3      active sync   /dev/sdf3
        4       8       51        4      active sync   /dev/sdd3
        5       8       67        5      active sync   /dev/sde3

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: reshaping raid6 in-place
  2010-05-12 23:10 reshaping raid6 in-place Frank Corrao
@ 2010-05-13  2:08 ` Neil Brown
  2010-05-13  2:41   ` Frank Corrao
  0 siblings, 1 reply; 5+ messages in thread
From: Neil Brown @ 2010-05-13  2:08 UTC (permalink / raw)
  To: Frank Corrao; +Cc: linux-raid

On Wed, 12 May 2010 19:10:35 -0400 (EDT)
Frank Corrao <fcorrao@temp555.com> wrote:

> I have a 5-disk raid5 array which I recently reshaped to raid6 while 
> simultaneously adding a 6th disk.  mdadm (3.1.1) reported that the 
> kernel I was running at the time (2.6.30) wasn't safe to perform the 
> in-place portion of the reshaping.  The array was partitionally 
> converted to raid6, but I believe the Q blocks are all on the last 
> device I added.  mdadm seems to report the algorithm as 
> left-symmetric-6 as opposed to left-symmetric.  Today I upgraded to 
> 2.6.32 and mdadm 3.1.2 and attempted to complete the re-shaping but ran 
> into a snag:
> 
> $ sudo ./mdadm -v --grow /dev/md2 -l 6 -n 6 -p left-symmetric
> mdadm: /dev/md2: Something wrong - reshape aborted
> 
> Is my syntax correct?
> 
> Frank

This is a 32bit host isn't it???  You would have thought I would have learned
by now, but it seems not.  'blocks' in Grow.c is "unsigned long" and should
really be "unsigned long long".

Also your syntax is incomplete.  You need a backup-file for that reshape.
mdadm would have told you that if it hadn't confused itself first.

This patch should help you.

NeilBrown

diff --git a/Grow.c b/Grow.c
index 0916c5d..d1a7b63 100644
--- a/Grow.c
+++ b/Grow.c
@@ -418,15 +418,15 @@ int bsb_csum(char *buf, int len)
 	return __cpu_to_le32(csum);
 }
 
-static int child_grow(int afd, struct mdinfo *sra, unsigned long blocks,
+static int child_grow(int afd, struct mdinfo *sra, unsigned long long blocks,
 		      int *fds, unsigned long long *offsets,
 		      int disks, int chunk, int level, int layout, int data,
 		      int dests, int *destfd, unsigned long long *destoffsets);
-static int child_shrink(int afd, struct mdinfo *sra, unsigned long blocks,
+static int child_shrink(int afd, struct mdinfo *sra, unsigned long long blocks,
 			int *fds, unsigned long long *offsets,
 			int disks, int chunk, int level, int layout, int data,
 			int dests, int *destfd, unsigned long long *destoffsets);
-static int child_same_size(int afd, struct mdinfo *sra, unsigned long blocks,
+static int child_same_size(int afd, struct mdinfo *sra, unsigned long long blocks,
 			   int *fds, unsigned long long *offsets,
 			   unsigned long long start,
 			   int disks, int chunk, int level, int layout, int data,
@@ -514,7 +514,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 	int nrdisks;
 	int err;
 	int frozen;
-	unsigned long a,b, blocks, stripes;
+	unsigned long long a,b, blocks, stripes;
 	int cache;
 	unsigned long long array_size;
 	int changed = 0;
@@ -1262,7 +1262,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 /* FIXME return status is never checked */
 int grow_backup(struct mdinfo *sra,
 		unsigned long long offset, /* per device */
-		unsigned long stripes, /* per device */
+		unsigned long long stripes, /* per device */
 		int *sources, unsigned long long *offsets,
 		int disks, int chunk, int level, int layout,
 		int dests, int *destfd, unsigned long long *destoffsets,
@@ -1522,7 +1522,7 @@ static void validate(int afd, int bfd, unsigned long long offset)
 	}
 }
 
-static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
+static int child_grow(int afd, struct mdinfo *sra, unsigned long long stripes,
 		      int *fds, unsigned long long *offsets,
 		      int disks, int chunk, int level, int layout, int data,
 		      int dests, int *destfd, unsigned long long *destoffsets)
@@ -1550,7 +1550,7 @@ static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
 	return 1;
 }
 
-static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
+static int child_shrink(int afd, struct mdinfo *sra, unsigned long long stripes,
 			int *fds, unsigned long long *offsets,
 			int disks, int chunk, int level, int layout, int data,
 			int dests, int *destfd, unsigned long long *destoffsets)
@@ -1586,14 +1586,14 @@ static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
 	return 1;
 }
 
-static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes,
+static int child_same_size(int afd, struct mdinfo *sra, unsigned long long stripes,
 			   int *fds, unsigned long long *offsets,
 			   unsigned long long start,
 			   int disks, int chunk, int level, int layout, int data,
 			   int dests, int *destfd, unsigned long long *destoffsets)
 {
 	unsigned long long size;
-	unsigned long tailstripes = stripes;
+	unsigned long long tailstripes = stripes;
 	int part;
 	char *buf;
 	unsigned long long speed;
@@ -1960,7 +1960,7 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
 	int backup_list[1];
 	unsigned long long backup_offsets[1];
 	int odisks, ndisks, ochunk, nchunk,odata,ndata;
-	unsigned long a,b,blocks,stripes;
+	unsigned long long a,b,blocks,stripes;
 	int backup_fd;
 	int *fds;
 	unsigned long long *offsets;


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: reshaping raid6 in-place
  2010-05-13  2:08 ` Neil Brown
@ 2010-05-13  2:41   ` Frank Corrao
  2010-05-13  3:18     ` Neil Brown
  0 siblings, 1 reply; 5+ messages in thread
From: Frank Corrao @ 2010-05-13  2:41 UTC (permalink / raw)
  To: Neil Brown; +Cc: linux-raid

On Thu, 13 May 2010, Neil Brown wrote:

> On Wed, 12 May 2010 19:10:35 -0400 (EDT)
> Frank Corrao <fcorrao@temp555.com> wrote:
>
>> I have a 5-disk raid5 array which I recently reshaped to raid6 while
>> simultaneously adding a 6th disk.  mdadm (3.1.1) reported that the
>> kernel I was running at the time (2.6.30) wasn't safe to perform the
>> in-place portion of the reshaping.  The array was partitionally
>> converted to raid6, but I believe the Q blocks are all on the last
>> device I added.  mdadm seems to report the algorithm as
>> left-symmetric-6 as opposed to left-symmetric.  Today I upgraded to
>> 2.6.32 and mdadm 3.1.2 and attempted to complete the re-shaping but ran
>> into a snag:
>>
>> $ sudo ./mdadm -v --grow /dev/md2 -l 6 -n 6 -p left-symmetric
>> mdadm: /dev/md2: Something wrong - reshape aborted
>>
>> Is my syntax correct?
>>
>> Frank
>
> This is a 32bit host isn't it???  You would have thought I would have learned
> by now, but it seems not.  'blocks' in Grow.c is "unsigned long" and should
> really be "unsigned long long".

Actually no - its x86_64.  I applied the patch, corrected the fprintf() 
call on line 961 and recompiled but I still get the same error.


> Also your syntax is incomplete.  You need a backup-file for that reshape.
> mdadm would have told you that if it hadn't confused itself first.

No change when adding the --backup-file option:

$ sudo ./mdadm -v --grow /dev/md2 -l 6 -n 6 -p left-symmetric --backup-file=/root/md2.backup
mdadm: /dev/md2: Something wrong - reshape aborted


Frank




> This patch should help you.
>
> NeilBrown
>
> diff --git a/Grow.c b/Grow.c
> index 0916c5d..d1a7b63 100644
> --- a/Grow.c
> +++ b/Grow.c
> @@ -418,15 +418,15 @@ int bsb_csum(char *buf, int len)
> 	return __cpu_to_le32(csum);
> }
>
> -static int child_grow(int afd, struct mdinfo *sra, unsigned long blocks,
> +static int child_grow(int afd, struct mdinfo *sra, unsigned long long blocks,
> 		      int *fds, unsigned long long *offsets,
> 		      int disks, int chunk, int level, int layout, int data,
> 		      int dests, int *destfd, unsigned long long *destoffsets);
> -static int child_shrink(int afd, struct mdinfo *sra, unsigned long blocks,
> +static int child_shrink(int afd, struct mdinfo *sra, unsigned long long blocks,
> 			int *fds, unsigned long long *offsets,
> 			int disks, int chunk, int level, int layout, int data,
> 			int dests, int *destfd, unsigned long long *destoffsets);
> -static int child_same_size(int afd, struct mdinfo *sra, unsigned long blocks,
> +static int child_same_size(int afd, struct mdinfo *sra, unsigned long long blocks,
> 			   int *fds, unsigned long long *offsets,
> 			   unsigned long long start,
> 			   int disks, int chunk, int level, int layout, int data,
> @@ -514,7 +514,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
> 	int nrdisks;
> 	int err;
> 	int frozen;
> -	unsigned long a,b, blocks, stripes;
> +	unsigned long long a,b, blocks, stripes;
> 	int cache;
> 	unsigned long long array_size;
> 	int changed = 0;
> @@ -1262,7 +1262,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
> /* FIXME return status is never checked */
> int grow_backup(struct mdinfo *sra,
> 		unsigned long long offset, /* per device */
> -		unsigned long stripes, /* per device */
> +		unsigned long long stripes, /* per device */
> 		int *sources, unsigned long long *offsets,
> 		int disks, int chunk, int level, int layout,
> 		int dests, int *destfd, unsigned long long *destoffsets,
> @@ -1522,7 +1522,7 @@ static void validate(int afd, int bfd, unsigned long long offset)
> 	}
> }
>
> -static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
> +static int child_grow(int afd, struct mdinfo *sra, unsigned long long stripes,
> 		      int *fds, unsigned long long *offsets,
> 		      int disks, int chunk, int level, int layout, int data,
> 		      int dests, int *destfd, unsigned long long *destoffsets)
> @@ -1550,7 +1550,7 @@ static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
> 	return 1;
> }
>
> -static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
> +static int child_shrink(int afd, struct mdinfo *sra, unsigned long long stripes,
> 			int *fds, unsigned long long *offsets,
> 			int disks, int chunk, int level, int layout, int data,
> 			int dests, int *destfd, unsigned long long *destoffsets)
> @@ -1586,14 +1586,14 @@ static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
> 	return 1;
> }
>
> -static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes,
> +static int child_same_size(int afd, struct mdinfo *sra, unsigned long long stripes,
> 			   int *fds, unsigned long long *offsets,
> 			   unsigned long long start,
> 			   int disks, int chunk, int level, int layout, int data,
> 			   int dests, int *destfd, unsigned long long *destoffsets)
> {
> 	unsigned long long size;
> -	unsigned long tailstripes = stripes;
> +	unsigned long long tailstripes = stripes;
> 	int part;
> 	char *buf;
> 	unsigned long long speed;
> @@ -1960,7 +1960,7 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
> 	int backup_list[1];
> 	unsigned long long backup_offsets[1];
> 	int odisks, ndisks, ochunk, nchunk,odata,ndata;
> -	unsigned long a,b,blocks,stripes;
> +	unsigned long long a,b,blocks,stripes;
> 	int backup_fd;
> 	int *fds;
> 	unsigned long long *offsets;
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: reshaping raid6 in-place
  2010-05-13  2:41   ` Frank Corrao
@ 2010-05-13  3:18     ` Neil Brown
  2010-05-13  4:04       ` Frank Corrao
  0 siblings, 1 reply; 5+ messages in thread
From: Neil Brown @ 2010-05-13  3:18 UTC (permalink / raw)
  To: Frank Corrao; +Cc: linux-raid

On Wed, 12 May 2010 22:41:52 -0400 (EDT)
Frank Corrao <fcorrao@temp555.com> wrote:

> On Thu, 13 May 2010, Neil Brown wrote:
> 
> > On Wed, 12 May 2010 19:10:35 -0400 (EDT)
> > Frank Corrao <fcorrao@temp555.com> wrote:
> >
> >> I have a 5-disk raid5 array which I recently reshaped to raid6 while
> >> simultaneously adding a 6th disk.  mdadm (3.1.1) reported that the
> >> kernel I was running at the time (2.6.30) wasn't safe to perform the
> >> in-place portion of the reshaping.  The array was partitionally
> >> converted to raid6, but I believe the Q blocks are all on the last
> >> device I added.  mdadm seems to report the algorithm as
> >> left-symmetric-6 as opposed to left-symmetric.  Today I upgraded to
> >> 2.6.32 and mdadm 3.1.2 and attempted to complete the re-shaping but ran
> >> into a snag:
> >>
> >> $ sudo ./mdadm -v --grow /dev/md2 -l 6 -n 6 -p left-symmetric
> >> mdadm: /dev/md2: Something wrong - reshape aborted
> >>
> >> Is my syntax correct?
> >>
> >> Frank
> >
> > This is a 32bit host isn't it???  You would have thought I would have learned
> > by now, but it seems not.  'blocks' in Grow.c is "unsigned long" and should
> > really be "unsigned long long".
> 
> Actually no - its x86_64.  I applied the patch, corrected the fprintf() 
> call on line 961 and recompiled but I still get the same error.
> 

Strange....
The error comes from

		if (blocks >= sra->component_size/2) {
			fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
				devname);
			rv = 1;
			break;
		}

as odata == ndate (==4), blocks should be 32768.

sra->component_size should be 2860853248 (it is in sectors, so double the
number of kilobytes).

So that wouldn't overflow even on 32bit...  so my original analysis was
obviously wrong, as you discovered.

Could you print some printfs in there to find out what the values of
  blocks, sra->component_size, a, b, ochunk, nchuck, odata, ndata
are?

Also, while it isn't wrong to give "-l 6 -n 6" it is redundant because the
array is already that size/shape.
I would just use:
  mdadm -v --grow /dev/md0 --layout=normalise  --backup-file=/root/md2.backup

once we sort out why is wrong with 'blocks'.

NeilBrown

> 
> > Also your syntax is incomplete.  You need a backup-file for that reshape.
> > mdadm would have told you that if it hadn't confused itself first.
> 
> No change when adding the --backup-file option:
> 
> $ sudo ./mdadm -v --grow /dev/md2 -l 6 -n 6 -p left-symmetric --backup-file=/root/md2.backup
> mdadm: /dev/md2: Something wrong - reshape aborted
> 
> 
> Frank
> 
> 
> 
> 
> > This patch should help you.
> >
> > NeilBrown
> >
> > diff --git a/Grow.c b/Grow.c
> > index 0916c5d..d1a7b63 100644
> > --- a/Grow.c
> > +++ b/Grow.c
> > @@ -418,15 +418,15 @@ int bsb_csum(char *buf, int len)
> > 	return __cpu_to_le32(csum);
> > }
> >
> > -static int child_grow(int afd, struct mdinfo *sra, unsigned long blocks,
> > +static int child_grow(int afd, struct mdinfo *sra, unsigned long long blocks,
> > 		      int *fds, unsigned long long *offsets,
> > 		      int disks, int chunk, int level, int layout, int data,
> > 		      int dests, int *destfd, unsigned long long *destoffsets);
> > -static int child_shrink(int afd, struct mdinfo *sra, unsigned long blocks,
> > +static int child_shrink(int afd, struct mdinfo *sra, unsigned long long blocks,
> > 			int *fds, unsigned long long *offsets,
> > 			int disks, int chunk, int level, int layout, int data,
> > 			int dests, int *destfd, unsigned long long *destoffsets);
> > -static int child_same_size(int afd, struct mdinfo *sra, unsigned long blocks,
> > +static int child_same_size(int afd, struct mdinfo *sra, unsigned long long blocks,
> > 			   int *fds, unsigned long long *offsets,
> > 			   unsigned long long start,
> > 			   int disks, int chunk, int level, int layout, int data,
> > @@ -514,7 +514,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
> > 	int nrdisks;
> > 	int err;
> > 	int frozen;
> > -	unsigned long a,b, blocks, stripes;
> > +	unsigned long long a,b, blocks, stripes;
> > 	int cache;
> > 	unsigned long long array_size;
> > 	int changed = 0;
> > @@ -1262,7 +1262,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
> > /* FIXME return status is never checked */
> > int grow_backup(struct mdinfo *sra,
> > 		unsigned long long offset, /* per device */
> > -		unsigned long stripes, /* per device */
> > +		unsigned long long stripes, /* per device */
> > 		int *sources, unsigned long long *offsets,
> > 		int disks, int chunk, int level, int layout,
> > 		int dests, int *destfd, unsigned long long *destoffsets,
> > @@ -1522,7 +1522,7 @@ static void validate(int afd, int bfd, unsigned long long offset)
> > 	}
> > }
> >
> > -static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
> > +static int child_grow(int afd, struct mdinfo *sra, unsigned long long stripes,
> > 		      int *fds, unsigned long long *offsets,
> > 		      int disks, int chunk, int level, int layout, int data,
> > 		      int dests, int *destfd, unsigned long long *destoffsets)
> > @@ -1550,7 +1550,7 @@ static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
> > 	return 1;
> > }
> >
> > -static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
> > +static int child_shrink(int afd, struct mdinfo *sra, unsigned long long stripes,
> > 			int *fds, unsigned long long *offsets,
> > 			int disks, int chunk, int level, int layout, int data,
> > 			int dests, int *destfd, unsigned long long *destoffsets)
> > @@ -1586,14 +1586,14 @@ static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
> > 	return 1;
> > }
> >
> > -static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes,
> > +static int child_same_size(int afd, struct mdinfo *sra, unsigned long long stripes,
> > 			   int *fds, unsigned long long *offsets,
> > 			   unsigned long long start,
> > 			   int disks, int chunk, int level, int layout, int data,
> > 			   int dests, int *destfd, unsigned long long *destoffsets)
> > {
> > 	unsigned long long size;
> > -	unsigned long tailstripes = stripes;
> > +	unsigned long long tailstripes = stripes;
> > 	int part;
> > 	char *buf;
> > 	unsigned long long speed;
> > @@ -1960,7 +1960,7 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
> > 	int backup_list[1];
> > 	unsigned long long backup_offsets[1];
> > 	int odisks, ndisks, ochunk, nchunk,odata,ndata;
> > -	unsigned long a,b,blocks,stripes;
> > +	unsigned long long a,b,blocks,stripes;
> > 	int backup_fd;
> > 	int *fds;
> > 	unsigned long long *offsets;
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >
> --
> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: reshaping raid6 in-place
  2010-05-13  3:18     ` Neil Brown
@ 2010-05-13  4:04       ` Frank Corrao
  0 siblings, 0 replies; 5+ messages in thread
From: Frank Corrao @ 2010-05-13  4:04 UTC (permalink / raw)
  To: Neil Brown; +Cc: linux-raid

On Thu, 13 May 2010, Neil Brown wrote:

> On Wed, 12 May 2010 22:41:52 -0400 (EDT)
> Frank Corrao <fcorrao@temp555.com> wrote:
>
>> On Thu, 13 May 2010, Neil Brown wrote:
>>
>>> On Wed, 12 May 2010 19:10:35 -0400 (EDT)
>>> Frank Corrao <fcorrao@temp555.com> wrote:
>>>
>>>> I have a 5-disk raid5 array which I recently reshaped to raid6 while
>>>> simultaneously adding a 6th disk.  mdadm (3.1.1) reported that the
>>>> kernel I was running at the time (2.6.30) wasn't safe to perform the
>>>> in-place portion of the reshaping.  The array was partitionally
>>>> converted to raid6, but I believe the Q blocks are all on the last
>>>> device I added.  mdadm seems to report the algorithm as
>>>> left-symmetric-6 as opposed to left-symmetric.  Today I upgraded to
>>>> 2.6.32 and mdadm 3.1.2 and attempted to complete the re-shaping but ran
>>>> into a snag:
>>>>
>>>> $ sudo ./mdadm -v --grow /dev/md2 -l 6 -n 6 -p left-symmetric
>>>> mdadm: /dev/md2: Something wrong - reshape aborted
>>>>
>>>> Is my syntax correct?
>>>>
>>>> Frank
>>>
>>> This is a 32bit host isn't it???  You would have thought I would have learned
>>> by now, but it seems not.  'blocks' in Grow.c is "unsigned long" and should
>>> really be "unsigned long long".
>>
>> Actually no - its x86_64.  I applied the patch, corrected the fprintf()
>> call on line 961 and recompiled but I still get the same error.
>>
>
> Strange....
> The error comes from
>
> 		if (blocks >= sra->component_size/2) {
> 			fprintf(stderr, Name ": %s: Something wrong - reshape aborted\n",
> 				devname);
> 			rv = 1;
> 			break;
> 		}
>
> as odata == ndate (==4), blocks should be 32768.
>
> sra->component_size should be 2860853248 (it is in sectors, so double the
> number of kilobytes).
>
> So that wouldn't overflow even on 32bit...  so my original analysis was
> obviously wrong, as you discovered.
>
> Could you print some printfs in there to find out what the values of
>  blocks, sra->component_size, a, b, ochunk, nchuck, odata, ndata
> are?
>
> Also, while it isn't wrong to give "-l 6 -n 6" it is redundant because the
> array is already that size/shape.
> I would just use:
>  mdadm -v --grow /dev/md0 --layout=normalise  --backup-file=/root/md2.backup
>
> once we sort out why is wrong with 'blocks'.
>
> NeilBrown

Okay, here is the output with the values of all the variables that go 
into the calculation of blocks.  Based on the input values I believe 
the LCM should have been calculate to be 8192.


sudo ./mdadm -v --grow /dev/md2 --layout=normalise --backup-file=/root/md2.backup
before GCD calculation
         a: 8192
         b: 8192

before LCM calculation
         a: 8192
         b: 8192
         ochunk: 1048576
         nchunk: 1048576
         odata: 4
         ndata: 4

after LCM calculation
         blocks: 2251799813677056

after sysfs_read
         sra->component_size: 2860853248
         blocks: 2251799813677056
         a: 8192
         b: 8192
         ochunk: 1048576
         nchunk: 1048576
         odata: 4
         ndata: 4

mdadm: /dev/md2: Something wrong - reshape aborted




















>>> Also your syntax is incomplete.  You need a backup-file for that reshape.
>>> mdadm would have told you that if it hadn't confused itself first.
>>
>> No change when adding the --backup-file option:
>>
>> $ sudo ./mdadm -v --grow /dev/md2 -l 6 -n 6 -p left-symmetric --backup-file=/root/md2.backup
>> mdadm: /dev/md2: Something wrong - reshape aborted
>>
>>
>> Frank
>>
>>
>>
>>
>>> This patch should help you.
>>>
>>> NeilBrown
>>>
>>> diff --git a/Grow.c b/Grow.c
>>> index 0916c5d..d1a7b63 100644
>>> --- a/Grow.c
>>> +++ b/Grow.c
>>> @@ -418,15 +418,15 @@ int bsb_csum(char *buf, int len)
>>> 	return __cpu_to_le32(csum);
>>> }
>>>
>>> -static int child_grow(int afd, struct mdinfo *sra, unsigned long blocks,
>>> +static int child_grow(int afd, struct mdinfo *sra, unsigned long long blocks,
>>> 		      int *fds, unsigned long long *offsets,
>>> 		      int disks, int chunk, int level, int layout, int data,
>>> 		      int dests, int *destfd, unsigned long long *destoffsets);
>>> -static int child_shrink(int afd, struct mdinfo *sra, unsigned long blocks,
>>> +static int child_shrink(int afd, struct mdinfo *sra, unsigned long long blocks,
>>> 			int *fds, unsigned long long *offsets,
>>> 			int disks, int chunk, int level, int layout, int data,
>>> 			int dests, int *destfd, unsigned long long *destoffsets);
>>> -static int child_same_size(int afd, struct mdinfo *sra, unsigned long blocks,
>>> +static int child_same_size(int afd, struct mdinfo *sra, unsigned long long blocks,
>>> 			   int *fds, unsigned long long *offsets,
>>> 			   unsigned long long start,
>>> 			   int disks, int chunk, int level, int layout, int data,
>>> @@ -514,7 +514,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
>>> 	int nrdisks;
>>> 	int err;
>>> 	int frozen;
>>> -	unsigned long a,b, blocks, stripes;
>>> +	unsigned long long a,b, blocks, stripes;
>>> 	int cache;
>>> 	unsigned long long array_size;
>>> 	int changed = 0;
>>> @@ -1262,7 +1262,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
>>> /* FIXME return status is never checked */
>>> int grow_backup(struct mdinfo *sra,
>>> 		unsigned long long offset, /* per device */
>>> -		unsigned long stripes, /* per device */
>>> +		unsigned long long stripes, /* per device */
>>> 		int *sources, unsigned long long *offsets,
>>> 		int disks, int chunk, int level, int layout,
>>> 		int dests, int *destfd, unsigned long long *destoffsets,
>>> @@ -1522,7 +1522,7 @@ static void validate(int afd, int bfd, unsigned long long offset)
>>> 	}
>>> }
>>>
>>> -static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
>>> +static int child_grow(int afd, struct mdinfo *sra, unsigned long long stripes,
>>> 		      int *fds, unsigned long long *offsets,
>>> 		      int disks, int chunk, int level, int layout, int data,
>>> 		      int dests, int *destfd, unsigned long long *destoffsets)
>>> @@ -1550,7 +1550,7 @@ static int child_grow(int afd, struct mdinfo *sra, unsigned long stripes,
>>> 	return 1;
>>> }
>>>
>>> -static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
>>> +static int child_shrink(int afd, struct mdinfo *sra, unsigned long long stripes,
>>> 			int *fds, unsigned long long *offsets,
>>> 			int disks, int chunk, int level, int layout, int data,
>>> 			int dests, int *destfd, unsigned long long *destoffsets)
>>> @@ -1586,14 +1586,14 @@ static int child_shrink(int afd, struct mdinfo *sra, unsigned long stripes,
>>> 	return 1;
>>> }
>>>
>>> -static int child_same_size(int afd, struct mdinfo *sra, unsigned long stripes,
>>> +static int child_same_size(int afd, struct mdinfo *sra, unsigned long long stripes,
>>> 			   int *fds, unsigned long long *offsets,
>>> 			   unsigned long long start,
>>> 			   int disks, int chunk, int level, int layout, int data,
>>> 			   int dests, int *destfd, unsigned long long *destoffsets)
>>> {
>>> 	unsigned long long size;
>>> -	unsigned long tailstripes = stripes;
>>> +	unsigned long long tailstripes = stripes;
>>> 	int part;
>>> 	char *buf;
>>> 	unsigned long long speed;
>>> @@ -1960,7 +1960,7 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
>>> 	int backup_list[1];
>>> 	unsigned long long backup_offsets[1];
>>> 	int odisks, ndisks, ochunk, nchunk,odata,ndata;
>>> -	unsigned long a,b,blocks,stripes;
>>> +	unsigned long long a,b,blocks,stripes;
>>> 	int backup_fd;
>>> 	int *fds;
>>> 	unsigned long long *offsets;
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-05-13  4:04 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-12 23:10 reshaping raid6 in-place Frank Corrao
2010-05-13  2:08 ` Neil Brown
2010-05-13  2:41   ` Frank Corrao
2010-05-13  3:18     ` Neil Brown
2010-05-13  4:04       ` Frank Corrao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox