* [PATCH 7 of 8 - v2] MD: add bitmap support
From: Jonathan Brassow @ 2011-06-08 17:59 UTC (permalink / raw)
To: linux-raid
Neil, fixed power-of-two check and switched read_sb_page to just alloc_page. Also found another
place in bitmap.c that needs to switch to is_power_of_2, but I'll put that in another patch.
brassow
==========
Add bitmap support to the device-mapper specific metadata area.
This patch allows the creation of the bitmap metadata area upon initial array
creation via device-mapper.
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Index: linux-2.6/drivers/md/bitmap.c
===================================================================
--- linux-2.6.orig/drivers/md/bitmap.c
+++ linux-2.6/drivers/md/bitmap.c
@@ -534,6 +534,82 @@ void bitmap_print_sb(struct bitmap *bitm
kunmap_atomic(sb, KM_USER0);
}
+/*
+ * bitmap_new_disk_sb
+ * @bitmap
+ *
+ * This function is somewhat the reverse of bitmap_read_sb. bitmap_read_sb
+ * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
+ * This function verifies 'bitmap_info' and populates the on-disk bitmap
+ * structure, which is to be written to disk.
+ *
+ * Returns: 0 on success, -Exxx on error
+ */
+static int bitmap_new_disk_sb(struct bitmap *bitmap)
+{
+ bitmap_super_t *sb;
+ unsigned long chunksize, daemon_sleep, write_behind;
+ int err = -EINVAL;
+
+ bitmap->sb_page = alloc_page(GFP_KERNEL);
+ if (IS_ERR(bitmap->sb_page)) {
+ err = PTR_ERR(bitmap->sb_page);
+ bitmap->sb_page = NULL;
+ return err;
+ }
+ bitmap->sb_page->index = 0;
+
+ sb = kmap_atomic(bitmap->sb_page, KM_USER0);
+
+ sb->magic = cpu_to_le32(BITMAP_MAGIC);
+ sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
+
+ chunksize = bitmap->mddev->bitmap_info.chunksize;
+ BUG_ON(!chunksize);
+ if (!is_power_of_2(chunksize)) {
+ kunmap_atomic(sb, KM_USER0);
+ printk(KERN_ERR "bitmap chunksize not a power of 2\n");
+ return -EINVAL;
+ }
+ sb->chunksize = cpu_to_le32(chunksize);
+
+ daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
+ if (!daemon_sleep ||
+ (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
+ printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
+ daemon_sleep = 5 * HZ;
+ }
+ sb->daemon_sleep = cpu_to_le32(daemon_sleep);
+ bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
+
+ /*
+ * FIXME: write_behind for RAID1. If not specified, what
+ * is a good choice? We choose COUNTER_MAX / 2 arbitrarily.
+ */
+ write_behind = bitmap->mddev->bitmap_info.max_write_behind;
+ if (write_behind > COUNTER_MAX)
+ write_behind = COUNTER_MAX / 2;
+ sb->write_behind = cpu_to_le32(write_behind);
+ bitmap->mddev->bitmap_info.max_write_behind = write_behind;
+
+ /* keep the array size field of the bitmap superblock up to date */
+ sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
+
+ memcpy(sb->uuid, bitmap->mddev->uuid, 16);
+
+ bitmap->flags |= BITMAP_STALE;
+ sb->state |= cpu_to_le32(BITMAP_STALE);
+ bitmap->events_cleared = bitmap->mddev->events;
+ sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
+
+ bitmap->flags |= BITMAP_HOSTENDIAN;
+ sb->version = cpu_to_le32(BITMAP_MAJOR_HOSTENDIAN);
+
+ kunmap_atomic(sb, KM_USER0);
+
+ return 0;
+}
+
/* read the superblock from the bitmap file and initialize some bitmap fields */
static int bitmap_read_sb(struct bitmap *bitmap)
{
@@ -1076,8 +1152,8 @@ static int bitmap_init_from_disk(struct
}
printk(KERN_INFO "%s: bitmap initialized from disk: "
- "read %lu/%lu pages, set %lu bits\n",
- bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt);
+ "read %lu/%lu pages, set %lu of %lu bits\n",
+ bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, chunks);
return 0;
@@ -1728,9 +1804,16 @@ int bitmap_create(mddev_t *mddev)
vfs_fsync(file, 1);
}
/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
- if (!mddev->bitmap_info.external)
- err = bitmap_read_sb(bitmap);
- else {
+ if (!mddev->bitmap_info.external) {
+ /*
+ * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
+ * instructing us to create a new on-disk bitmap instance.
+ */
+ if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
+ err = bitmap_new_disk_sb(bitmap);
+ else
+ err = bitmap_read_sb(bitmap);
+ } else {
err = 0;
if (mddev->bitmap_info.chunksize == 0 ||
mddev->bitmap_info.daemon_sleep == 0)
Index: linux-2.6/drivers/md/md.h
===================================================================
--- linux-2.6.orig/drivers/md/md.h
+++ linux-2.6/drivers/md/md.h
@@ -124,6 +124,7 @@ struct mddev_s
#define MD_CHANGE_DEVS 0 /* Some device status has changed */
#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */
#define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */
+#define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */
int suspended;
atomic_t active_io;
^ permalink raw reply
* [PATCH 21/21] MAN: Man update for check-pointing
From: Adam Kwolek @ 2011-06-08 16:12 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
mdadm.8.in | 9 +++++++--
1 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/mdadm.8.in b/mdadm.8.in
index e1d5651..f549dbf 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -2225,8 +2225,8 @@ succeed.
This is for the following reasons:
.IP 1.
-Intel's native IMSM check-pointing is not fully implemented yet.
-This causes IMSM incompatibility during the grow process: an array
+Intel's native IMSM check-pointing is not fully tested yet.
+This can causes IMSM incompatibility during the grow process: an array
which is growing cannot roam between Microsoft Windows(R) and Linux
systems.
@@ -2234,6 +2234,11 @@ systems.
Interrupting a grow operation is not recommended, because it
has not been fully tested for Intel's IMSM container format yet.
+.PP
+Note: Intel's native checkpointing doesn't use
+.B --backup-file
+option and it is transparent for assembly feature.
+
.SS SIZE CHANGES
Normally when an array is built the "size" is taken from the smallest
of the drives. If all the small drives in an arrays are, one at a
^ permalink raw reply related
* [PATCH 20/21] imsm: Optimize expansion speed when no backup is required
From: Adam Kwolek @ 2011-06-08 16:12 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
When no reshape backup is required (e.g. OLCE after critical section),
check-pointing can use bigger steps than backup space allows for.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 8 ++++++++
1 files changed, 8 insertions(+), 0 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index fae1218..71a1189 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8855,11 +8855,19 @@ static int imsm_manage_reshape(
"migration record (UNIT_SRC_IN_CP_AREA)\n");
goto abort;
}
+ } else {
+ /* set next step to use whole border area */
+ border /= next_step;
+ if (border > 1)
+ next_step *= border;
}
/* When data backed up, checkpoint stored,
* kick the kernel to reshape unit of data
*/
next_step = next_step + sra->reshape_progress;
+ /* limit next step to array max position */
+ if (next_step > max_position)
+ next_step = max_position;
sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress);
sysfs_set_num(sra, NULL, "suspend_hi", next_step);
sra->reshape_progress = next_step;
^ permalink raw reply related
* [PATCH 19/21] imsm: FIX: Remove timeout from wait_for_reshape_imsm()
From: Adam Kwolek @ 2011-06-08 16:12 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
Timeout should not be used for select function in wait_for_reshape_imsm().
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 15 +++++----------
1 files changed, 5 insertions(+), 10 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 8806339..fae1218 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8574,8 +8574,6 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
unsigned long long to_complete = sra->reshape_progress;
unsigned long long position_to_set = to_complete / ndata;
- struct timeval timeout;
-
if (fd < 0) {
dprintf("imsm: wait_for_reshape_imsm() "
"cannot open reshape_position\n");
@@ -8606,25 +8604,22 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
return -1;
}
- /* FIXME should not need a timeout at all */
- timeout.tv_sec = 30;
- timeout.tv_usec = 0;
do {
char action[20];
fd_set rfds;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
- select(fd+1, NULL, NULL, &rfds, &timeout);
+ select(fd+1, &rfds, NULL, NULL, NULL);
+ if (sysfs_get_str(sra, NULL, "sync_action",
+ action, 20) > 0 &&
+ strncmp(action, "reshape", 7) != 0)
+ break;
if (sysfs_fd_get_ll(fd, &completed) < 0) {
dprintf("imsm: wait_for_reshape_imsm() "
"cannot read reshape_position (in loop)\n");
close(fd);
return 1;
}
- if (sysfs_get_str(sra, NULL, "sync_action",
- action, 20) > 0 &&
- strncmp(action, "reshape", 7) != 0)
- break;
} while (completed < to_complete);
close(fd);
return 0;
^ permalink raw reply related
* [PATCH 18/21] imsm: FIX: wait_for_reshape_imsm() cleanup
From: Adam Kwolek @ 2011-06-08 16:11 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
This function needs to be corrected.
It should check sysfs operations status and it should not interpret
0 reshape position special meaning.
Unused input parameter is removed also.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 54 +++++++++++++++++++++++++++++++++---------------------
1 files changed, 33 insertions(+), 21 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 25e706f..8806339 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8560,39 +8560,50 @@ exit_imsm_reshape_super:
* reshape process reach new position
* Parameters:
* sra : general array info
- * to_complete : new sync_max position
* ndata : number of disks in new array's layout
* Returns:
* 0 : success,
* 1 : there is no reshape in progress,
* -1 : fail
******************************************************************************/
-int wait_for_reshape_imsm(struct mdinfo *sra, unsigned long long to_complete,
- int ndata)
+int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
{
int fd = sysfs_get_fd(sra, NULL, "reshape_position");
unsigned long long completed;
+ /* to_complete : new sync_max position */
+ unsigned long long to_complete = sra->reshape_progress;
+ unsigned long long position_to_set = to_complete / ndata;
struct timeval timeout;
- if (fd < 0)
+ if (fd < 0) {
+ dprintf("imsm: wait_for_reshape_imsm() "
+ "cannot open reshape_position\n");
return 1;
+ }
- sysfs_fd_get_ll(fd, &completed);
+ if (sysfs_fd_get_ll(fd, &completed) < 0) {
+ dprintf("imsm: wait_for_reshape_imsm() "
+ "cannot read reshape_position (no reshape in progres)\n");
+ close(fd);
+ return 0;
+ }
- if (to_complete == 0) {/* reshape till the end of array */
- sysfs_set_str(sra, NULL, "sync_max", "max");
- to_complete = MaxSector;
- } else {
- if (completed > to_complete) {
- close(fd);
- return -1;
- }
- if (sysfs_set_num(sra, NULL, "sync_max",
- to_complete / ndata) != 0) {
- close(fd);
- return -1;
- }
+ if (completed > to_complete) {
+ dprintf("imsm: wait_for_reshape_imsm() "
+ "wrong next position to set %llu (%llu)\n",
+ to_complete, completed);
+ close(fd);
+ return -1;
+ }
+ dprintf("Position set: %llu\n", position_to_set);
+ if (sysfs_set_num(sra, NULL, "sync_max",
+ position_to_set) != 0) {
+ dprintf("imsm: wait_for_reshape_imsm() "
+ "cannot set reshape position to %llu\n",
+ position_to_set);
+ close(fd);
+ return -1;
}
/* FIXME should not need a timeout at all */
@@ -8605,6 +8616,8 @@ int wait_for_reshape_imsm(struct mdinfo *sra, unsigned long long to_complete,
FD_SET(fd, &rfds);
select(fd+1, NULL, NULL, &rfds, &timeout);
if (sysfs_fd_get_ll(fd, &completed) < 0) {
+ dprintf("imsm: wait_for_reshape_imsm() "
+ "cannot read reshape_position (in loop)\n");
close(fd);
return 1;
}
@@ -8854,15 +8867,14 @@ static int imsm_manage_reshape(
next_step = next_step + sra->reshape_progress;
sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress);
sysfs_set_num(sra, NULL, "suspend_hi", next_step);
+ sra->reshape_progress = next_step;
/* wait until reshape finish */
- if (wait_for_reshape_imsm(sra, next_step, ndata) < 0) {
+ if (wait_for_reshape_imsm(sra, ndata) < 0) {
dprintf("wait_for_reshape_imsm returned error!\n");
goto abort;
}
- sra->reshape_progress = next_step;
-
if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) {
/* ignore error == 2, this can mean end of reshape here
*/
^ permalink raw reply related
* [PATCH 17/21] imsm: FIX: Do not continue reshape when backup exists
From: Adam Kwolek @ 2011-06-08 16:11 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
When backup exists in copy area reshape cannot be continued.
In such situation, array is in unstable state.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 2dd73c0..25e706f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8745,6 +8745,13 @@ static int imsm_manage_reshape(
/* initialize migration record for start condition */
if (sra->reshape_progress == 0)
init_migr_record_imsm(st, dev, sra);
+ else {
+ if (__le32_to_cpu(migr_rec->rec_status) != UNIT_SRC_NORMAL) {
+ dprintf("imsm: cannot restart migration when data "
+ "are present in copy area.\n");
+ goto abort;
+ }
+ }
/* size for data */
buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512;
^ permalink raw reply related
* [PATCH 16/21] FIX: Move buffer to next location
From: Adam Kwolek @ 2011-06-08 16:11 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
When no output file is given save_stripes() should collect amount of stripes
in passed buffer. Currently all stripes are saved in the same area in passed
buffer. This causes that last stripe is returned on buffer begin only.
Increase buffer (buf) pointer when save_stripes() is about switch to next
stripe operation. This allows for proper buffer filling as input parameter
length directs.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
Signed-off-by: Krzysztof Wojcik <krzysztof.wojcik@intel.com>
---
restripe.c | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/restripe.c b/restripe.c
index 1c42b60..79c695d 100644
--- a/restripe.c
+++ b/restripe.c
@@ -652,10 +652,14 @@ int save_stripes(int *source, unsigned long long *offsets,
fdisk[0], fdisk[1], bufs);
}
}
- if (dest)
+ if (dest) {
for (i = 0; i < nwrites; i++)
if (write(dest[i], buf, len) != len)
return -1;
+ } else {
+ /* build next stripe in buffer */
+ buf += len;
+ }
length -= len;
start += len;
}
^ permalink raw reply related
* [PATCH 15/21] imsm: FIX: Remove unused variables and code
From: Adam Kwolek @ 2011-06-08 16:11 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
Unused variables and code can be removed from imsm_manage_reshape()
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 14 +-------------
1 files changed, 1 insertions(+), 13 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 55829cf..2dd73c0 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8695,7 +8695,7 @@ static int imsm_manage_reshape(
struct intel_super *super = st->sb;
struct intel_dev *dv = NULL;
struct imsm_dev *dev = NULL;
- struct imsm_map *map_src, *map_dest;
+ struct imsm_map *map_src;
int migr_vol_qan = 0;
int ndata, odata; /* [bytes] */
int chunk; /* [bytes] */
@@ -8705,7 +8705,6 @@ static int imsm_manage_reshape(
unsigned long long max_position; /* array size [bytes] */
unsigned long long next_step; /* [blocks]/[bytes] */
unsigned long long old_data_stripe_length;
- unsigned long long new_data_stripe_length;
unsigned long long start_src; /* [bytes] */
unsigned long long start; /* [bytes] */
unsigned long long start_buf_shift; /* [bytes] */
@@ -8734,7 +8733,6 @@ static int imsm_manage_reshape(
map_src = get_imsm_map(dev, 1);
if (map_src == NULL)
goto abort;
- map_dest = get_imsm_map(dev, 0);
ndata = imsm_num_data_members(dev, 0);
odata = imsm_num_data_members(dev, 1);
@@ -8744,11 +8742,6 @@ static int imsm_manage_reshape(
migr_rec = super->migr_rec;
- /* [bytes] */
- sra->new_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
- sra->new_level = map_dest->raid_level;
- new_data_stripe_length = sra->new_chunk * ndata;
-
/* initialize migration record for start condition */
if (sra->reshape_progress == 0)
init_migr_record_imsm(st, dev, sra);
@@ -8847,11 +8840,6 @@ static int imsm_manage_reshape(
"migration record (UNIT_SRC_IN_CP_AREA)\n");
goto abort;
}
- /* decrease backup_blocks */
- if (backup_blocks > (unsigned long)next_step)
- backup_blocks -= next_step;
- else
- backup_blocks = 0;
}
/* When data backed up, checkpoint stored,
* kick the kernel to reshape unit of data
^ permalink raw reply related
* [PATCH 14/21] imsm: FIX: Move reshape_progress forward
From: Adam Kwolek @ 2011-06-08 16:11 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
When array under reshape is assembled, reshape position used in sysfs_set_array()
should be set to position after recovered from backup area.
This avoids data corruption due to reshape the same array area again.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 23 +++++++++++++----------
1 files changed, 13 insertions(+), 10 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index c19ffac..55829cf 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2194,6 +2194,13 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
unsigned long long array_blocks;
int used_disks;
+ if (__le32_to_cpu(migr_rec->ascending_migr) &&
+ (units <
+ (__le32_to_cpu(migr_rec->num_migr_units)-1)) &&
+ (super->migr_rec->rec_status ==
+ __cpu_to_le32(UNIT_SRC_IN_CP_AREA)))
+ units++;
+
info->reshape_progress = blocks_per_unit * units;
dprintf("IMSM: General Migration checkpoint : %llu "
@@ -7824,7 +7831,6 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
int retval = 1;
unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit);
unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
- int ascending = __le32_to_cpu(migr_rec->ascending_migr);
char buffer[20];
int skipped_disks = 0;
int max_degradation;
@@ -7907,16 +7913,13 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
goto abort;
}
- if (ascending && curr_migr_unit < (num_migr_units-1))
- curr_migr_unit++;
-
- migr_rec->curr_migr_unit = __le32_to_cpu(curr_migr_unit);
- super->migr_rec->rec_status = __cpu_to_le32(UNIT_SRC_NORMAL);
- if (write_imsm_migr_rec(st) == 0) {
- __u64 blocks_per_unit = blocks_per_migr_unit(super, id->dev);
- info->reshape_progress = curr_migr_unit * blocks_per_unit;
+ if (save_checkpoint_imsm(st, info, UNIT_SRC_NORMAL)) {
+ /* ignore error == 2, this can mean end of reshape here
+ */
+ dprintf("imsm: Cannot write checkpoint to "
+ "migration record (UNIT_SRC_NORMAL) during restart\n");
+ } else
retval = 0;
- }
abort:
if (targets) {
^ permalink raw reply related
* [PATCH 13/21] imsm: FIX: Detect failed devices during recover_backup_imsm()
From: Adam Kwolek @ 2011-06-08 16:11 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
Detect in recover_backup_imsm() if not opened disks number is smaller
than allowed degradation for given raid level. This allows for reshape restart
on degraded array.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 14 ++++++++++++++
1 files changed, 14 insertions(+), 0 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 26083c3..c19ffac 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7826,6 +7826,8 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
int ascending = __le32_to_cpu(migr_rec->ascending_migr);
char buffer[20];
+ int skipped_disks = 0;
+ int max_degradation;
err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20);
if (err < 1)
@@ -7849,6 +7851,7 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
map_dest = get_imsm_map(id->dev, 0);
new_disks = map_dest->num_members;
+ max_degradation = new_disks - imsm_num_data_members(id->dev, 0);
read_offset = (unsigned long long)
__le32_to_cpu(migr_rec->ckpt_area_pba) * 512;
@@ -7867,6 +7870,10 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
open_backup_targets(info, new_disks, targets);
for (i = 0; i < new_disks; i++) {
+ if (targets[i] < 0) {
+ skipped_disks++;
+ continue;
+ }
if (lseek64(targets[i], read_offset, SEEK_SET) < 0) {
fprintf(stderr,
Name ": Cannot seek to block: %s\n",
@@ -7893,6 +7900,13 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
}
}
+ if (skipped_disks > max_degradation) {
+ fprintf(stderr,
+ Name ": Cannot restore data from backup."
+ " Too many failed disks\n");
+ goto abort;
+ }
+
if (ascending && curr_migr_unit < (num_migr_units-1))
curr_migr_unit++;
^ permalink raw reply related
* [PATCH 12/21] imsm: FIX: Use metadata information for restore_stripes() and save_stripes()
From: Adam Kwolek @ 2011-06-08 16:11 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
For raid0 reshape imsm uses degraded raid4 for this operation.
Using real raid level (raid0) for stripe calculation causes no need
for parity calculation and can speed up reshape process.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 25 +++++++++++++++++--------
1 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 708b51d..26083c3 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7698,6 +7698,8 @@ int save_backup_imsm(struct supertype *st,
int i;
struct imsm_map *map_dest = get_imsm_map(dev, 0);
int new_disks = map_dest->num_members;
+ int dest_layout = 0;
+ int dest_chunk;
targets = malloc(new_disks * sizeof(int));
if (!targets)
@@ -7716,15 +7718,19 @@ int save_backup_imsm(struct supertype *st,
if (open_backup_targets(info, new_disks, targets))
goto abort;
+ if (map_dest->raid_level != 0)
+ dest_layout = ALGORITHM_LEFT_ASYMMETRIC;
+ dest_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
+
if (restore_stripes(targets, /* list of dest devices */
target_offsets, /* migration record offsets */
new_disks,
- info->new_chunk,
- info->new_level,
- info->new_layout,
- -1, /* source backup file descriptor */
- 0, /* input buf offset
- * always 0 buf is already offset */
+ dest_chunk,
+ map_dest->raid_level,
+ dest_layout,
+ -1, /* source backup file descriptor */
+ 0, /* input buf offset
+ * always 0 buf is already offseted */
0,
length,
buf) != 0) {
@@ -8687,6 +8693,7 @@ static int imsm_manage_reshape(
unsigned long long start; /* [bytes] */
unsigned long long start_buf_shift; /* [bytes] */
int degraded = 0;
+ int source_layout = 0;
if (!fds || !offsets || !sra)
goto abort;
@@ -8741,6 +8748,8 @@ static int imsm_manage_reshape(
}
max_position = sra->component_size * ndata;
+ if (map_src->raid_level != 0)
+ source_layout = ALGORITHM_LEFT_ASYMMETRIC;
while (__le32_to_cpu(migr_rec->curr_migr_unit) <
__le32_to_cpu(migr_rec->num_migr_units)) {
@@ -8797,8 +8806,8 @@ static int imsm_manage_reshape(
start_buf_shift, next_step_filler);
if (save_stripes(fds, offsets, map_src->num_members,
- chunk, sra->array.level,
- sra->array.layout, 0, NULL, start_src,
+ chunk, map_src->raid_level,
+ source_layout, 0, NULL, start_src,
copy_length +
next_step_filler + start_buf_shift,
buf)) {
^ permalink raw reply related
* [PATCH 11/21] imsm: FIX: Remove unused parameter from save_backup_imsm() interface
From: Adam Kwolek @ 2011-06-08 16:11 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
new_data parameter is not used in save_backup_imsm().
It is removed from function interface.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 6 ++----
1 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index b22c7df..708b51d 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7677,9 +7677,9 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
* and to write it to the Copy Area.
* Parameters:
* st : supertype information
+ * dev : imsm device that backup is saved for
* info : general array info
* buf : input buffer
- * write_offset : address of data to backup
* length : length of data to backup (blocks_per_unit)
* Returns:
* 0 : success
@@ -7689,7 +7689,6 @@ int save_backup_imsm(struct supertype *st,
struct imsm_dev *dev,
struct mdinfo *info,
void *buf,
- int new_data,
int length)
{
int rv = -1;
@@ -8811,8 +8810,7 @@ static int imsm_manage_reshape(
* in backup general migration area
*/
if (save_backup_imsm(st, dev, sra,
- buf + start_buf_shift,
- ndata, copy_length)) {
+ buf + start_buf_shift, copy_length)) {
dprintf("imsm: Cannot save stripes to "
"target devices\n");
goto abort;
^ permalink raw reply related
* [PATCH 10/21] imsm: FIX: Do not use pba_of_lba0 for copy position calculation
From: Adam Kwolek @ 2011-06-08 16:10 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
imsm_manage_reshape() should not shift start copy position.
This offset is passed to manage reshape function /and it is used/
as input parameter in offsets table already.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 4 +---
1 files changed, 1 insertions(+), 3 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 437975f..b22c7df 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8765,9 +8765,7 @@ static int imsm_manage_reshape(
if ((current_position + next_step) > max_position)
next_step = max_position - current_position;
- start = (__le32_to_cpu(map_src->pba_of_lba0) +
- __le32_to_cpu(dev->reserved_blocks) +
- current_position) * 512;
+ start = current_position * 512;
/* allign reading start to old geometry */
start_buf_shift = start % old_data_stripe_length;
^ permalink raw reply related
* [PATCH 09/21] imsm: FIX: Do not verify unused parameters
From: Adam Kwolek @ 2011-06-08 16:10 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
Parameters that are not used by imsm_manage_reshape() should not cause
failure of this function.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 0d46132..437975f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8689,7 +8689,7 @@ static int imsm_manage_reshape(
unsigned long long start_buf_shift; /* [bytes] */
int degraded = 0;
- if (!fds || !offsets || !destfd || !destoffsets || !sra)
+ if (!fds || !offsets || !sra)
goto abort;
/* Find volume during the reshape */
^ permalink raw reply related
* [PATCH 08/21] imsm: FIX: Calculate backup location based on metadata information
From: Adam Kwolek @ 2011-06-08 16:10 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
Use metadata information to calculate backup write offset.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index fea7a3a..0d46132 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7850,7 +7850,7 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
write_offset = ((unsigned long long)
__le32_to_cpu(migr_rec->dest_1st_member_lba) +
- info->data_offset) * 512;
+ __le32_to_cpu(map_dest->pba_of_lba0)) * 512;
unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
if (posix_memalign((void **)&buf, 512, unit_len) != 0)
^ permalink raw reply related
* [PATCH 07/21] imsm: FIX: Use macros to data access
From: Adam Kwolek @ 2011-06-08 16:10 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
Metadata fields has to be accessed using proper macros.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 7 ++++---
1 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 3baea6a..fea7a3a 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1822,7 +1822,7 @@ static __u64 blocks_per_migr_unit(struct intel_super *super,
migr_chunk = migr_strip_blocks_resync(dev);
disks = imsm_num_data_members(dev, 0);
blocks_per_unit = stripes_per_unit * migr_chunk * disks;
- stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
+ stripe = __le16_to_cpu(map->blocks_per_strip) * disks;
segment = blocks_per_unit / stripe;
block_rel = blocks_per_unit - segment * stripe;
parity_depth = parity_segment_depth(dev);
@@ -8716,7 +8716,7 @@ static int imsm_manage_reshape(
ndata = imsm_num_data_members(dev, 0);
odata = imsm_num_data_members(dev, 1);
- chunk = map_src->blocks_per_strip * 512;
+ chunk = __le16_to_cpu(map_src->blocks_per_strip) * 512;
old_data_stripe_length = odata * chunk;
migr_rec = super->migr_rec;
@@ -8765,7 +8765,8 @@ static int imsm_manage_reshape(
if ((current_position + next_step) > max_position)
next_step = max_position - current_position;
- start = (map_src->pba_of_lba0 + dev->reserved_blocks +
+ start = (__le32_to_cpu(map_src->pba_of_lba0) +
+ __le32_to_cpu(dev->reserved_blocks) +
current_position) * 512;
/* allign reading start to old geometry */
^ permalink raw reply related
* [PATCH 06/21] imsm: FIX: Check layout for level migration
From: Adam Kwolek @ 2011-06-08 16:10 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
When user doesn't specify raid 5 layout for raid0->rai5 migration,
layout structure member is uninitialized. Earlier it cannot be determined
if it is correct or not.
In metadata handle proper verification is placed.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
Signed-off-by: Krzysztof Wojcik <krzysztof.wojcik@intel.com>
---
super-intel.c | 9 ++++++++-
1 files changed, 8 insertions(+), 1 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 2468968..3baea6a 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8278,7 +8278,6 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
int chunk;
getinfo_super_imsm_volume(st, &info, NULL);
-
if ((geo->level != info.array.level) &&
(geo->level >= 0) &&
(geo->level != UnSet)) {
@@ -8286,6 +8285,14 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
case 0:
if (geo->level == 5) {
change = CH_MIGRATION;
+ if (geo->layout != ALGORITHM_LEFT_ASYMMETRIC) {
+ fprintf(stderr,
+ Name " Error. Requested Layout "
+ "not supported (left-asymmetric layout "
+ "is supported only)!\n");
+ change = -1;
+ goto analyse_change_exit;
+ }
check_devs = 1;
}
if (geo->level == 10) {
^ permalink raw reply related
* [PATCH 05/21] imsm: FIX: Max position could not be rounded to MB
From: Adam Kwolek @ 2011-06-08 16:10 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
When rounded array size information from metadata is used for number
of migration units calculation it can occurs that result of units
can be smaller (-1) than required due to used (rounded) array size).
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 12 +++---------
1 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 3afa913..2468968 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7621,10 +7621,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
struct imsm_map *map_dest = get_imsm_map(dev, 0);
struct imsm_map *map_src = get_imsm_map(dev, 1);
unsigned long long num_migr_units;
-
- unsigned long long array_blocks =
- (((unsigned long long)__le32_to_cpu(dev->size_high)) << 32) +
- __le32_to_cpu(dev->size_low);
+ unsigned long long array_blocks;
memset(migr_rec, 0, sizeof(struct migr_record));
migr_rec->family_num = __cpu_to_le32(super->anchor->family_num);
@@ -7640,7 +7637,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
__cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks);
migr_rec->dest_depth_per_unit =
__cpu_to_le32(migr_rec->dest_depth_per_unit);
-
+ array_blocks = info->component_size * new_data_disks;
num_migr_units =
array_blocks / __le32_to_cpu(migr_rec->blocks_per_unit);
@@ -8737,10 +8734,7 @@ static int imsm_manage_reshape(
goto abort;
}
- max_position =
- __le32_to_cpu(migr_rec->post_migr_vol_cap) +
- ((unsigned long long)__le32_to_cpu(
- migr_rec->post_migr_vol_cap_hi) << 32);
+ max_position = sra->component_size * ndata;
while (__le32_to_cpu(migr_rec->curr_migr_unit) <
__le32_to_cpu(migr_rec->num_migr_units)) {
^ permalink raw reply related
* [PATCH 04/21] imsm: FIX: Detect migration end during migration record saving
From: Adam Kwolek @ 2011-06-08 16:10 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
Checkpoint should be saved when migration is in progress only.
End of reshape (based on passes status) should be detected and it should
not cause abort of reshape/check-pointing/ operation.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 10 +++++++---
1 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 269cb0a..3afa913 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7760,6 +7760,8 @@ abort:
* Returns:
* 0: success
* 1: failure
+ * 2: failure, means no valid migration record
+ * / no general migration in progress /
******************************************************************************/
int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
{
@@ -7771,8 +7773,8 @@ int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
}
if (__le32_to_cpu(super->migr_rec->blocks_per_unit) == 0) {
- dprintf("ERROR: blocks_per_unit = 0!!!\n");
- return 1;
+ dprintf("imsm: no migration in progress.\n");
+ return 2;
}
super->migr_rec->curr_migr_unit =
@@ -8842,7 +8844,9 @@ static int imsm_manage_reshape(
sra->reshape_progress = next_step;
- if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL)) {
+ if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) {
+ /* ignore error == 2, this can mean end of reshape here
+ */
dprintf("imsm: Cannot write checkpoint to "
"migration record (UNIT_SRC_NORMAL)\n");
goto abort;
^ permalink raw reply related
* [PATCH 03/21] imsm: FIX: Verify if migration record is loaded correctly
From: Adam Kwolek @ 2011-06-08 16:09 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
Migration compatibility can be checked when general migration record
is present.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 32 +++++++++++++++++++-------------
1 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 01ffcc8..269cb0a 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -3038,8 +3038,8 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
if (devname)
- fprintf(stderr,
- Name ": Cannot seek to anchor block on %s: %s\n",
+ fprintf(stderr, Name
+ ": Cannot seek to anchor block on %s: %s\n",
devname, strerror(errno));
return 1;
}
@@ -3836,16 +3836,17 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname)
}
/* load migration record */
- load_imsm_migr_rec(super, NULL);
-
- /* Check for unsupported migration features */
- if (check_mpb_migr_compatibility(super) != 0) {
- fprintf(stderr, Name ": Unsupported migration detected");
- if (devname)
- fprintf(stderr, " on %s\n", devname);
- else
- fprintf(stderr, " (IMSM).\n");
- return 3;
+ if (load_imsm_migr_rec(super, NULL) == 0) {
+ /* Check for unsupported migration features */
+ if (check_mpb_migr_compatibility(super) != 0) {
+ fprintf(stderr,
+ Name ": Unsupported migration detected");
+ if (devname)
+ fprintf(stderr, " on %s\n", devname);
+ else
+ fprintf(stderr, " (IMSM).\n");
+ return 3;
+ }
}
return 0;
@@ -7763,7 +7764,12 @@ abort:
int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
{
struct intel_super *super = st->sb;
- load_imsm_migr_rec(super, info);
+ if (load_imsm_migr_rec(super, info) != 0) {
+ dprintf("imsm: ERROR: Cannot read migration record "
+ "for checkpoint save.\n");
+ return 1;
+ }
+
if (__le32_to_cpu(super->migr_rec->blocks_per_unit) == 0) {
dprintf("ERROR: blocks_per_unit = 0!!!\n");
return 1;
^ permalink raw reply related
* [PATCH 02/21] imsm: FIX: Opened handle is not closed
From: Adam Kwolek @ 2011-06-08 16:09 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
Opened file handle should be closed before function exit.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 471dbd2..01ffcc8 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8550,8 +8550,10 @@ int wait_for_reshape_imsm(struct mdinfo *sra, unsigned long long to_complete,
sysfs_set_str(sra, NULL, "sync_max", "max");
to_complete = MaxSector;
} else {
- if (completed > to_complete)
+ if (completed > to_complete) {
+ close(fd);
return -1;
+ }
if (sysfs_set_num(sra, NULL, "sync_max",
to_complete / ndata) != 0) {
close(fd);
^ permalink raw reply related
* [PATCH 01/21] imsm: FIX: Cannot create volume
From: Adam Kwolek @ 2011-06-08 16:09 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
In-Reply-To: <20110608160222.24327.71439.stgit@gklab-128-013.igk.intel.com>
Clearing info structure causes mdadm is not able to create workable volume.
During volume creation info structure passed to getinfo() function
contains some information already and cannot be cleared.
Signed-off-by: Adam Kwolek <adam.kwolek@intel.com>
---
super-intel.c | 1 -
1 files changed, 0 insertions(+), 1 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index b8d8b4e..471dbd2 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2075,7 +2075,6 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
unsigned int component_size_alligment;
int map_disks = info->array.raid_disks;
- memset(info, 0, sizeof(*info));
if (prev_map)
map_to_analyse = prev_map;
^ permalink raw reply related
* [PATCH 00/21] IMSM Checkpointing Bug Fix Series
From: Adam Kwolek @ 2011-06-08 16:09 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, dan.j.williams, ed.ciechanowski, wojciech.neubauer
The following series fixes problems found in IMSM's checkpointing.
It contains rework based on Neil's comments to previous/initial checkpointing
series and tt should be applied on neil_master branch (on the top
of previous checkpointing patches).
BR
Adam
---
Adam Kwolek (21):
MAN: Man update for check-pointing
imsm: Optimize expansion speed when no backup is required
imsm: FIX: Remove timeout from wait_for_reshape_imsm()
imsm: FIX: wait_for_reshape_imsm() cleanup
imsm: FIX: Do not continue reshape when backup exists
FIX: Move buffer to next location
imsm: FIX: Remove unused variables and code
imsm: FIX: Move reshape_progress forward
imsm: FIX: Detect failed devices during recover_backup_imsm()
imsm: FIX: Use metadata information for restore_stripes() and save_stripes()
imsm: FIX: Remove unused parameter from save_backup_imsm() interface
imsm: FIX: Do not use pba_of_lba0 for copy position calculation
imsm: FIX: Do not verify unused parameters
imsm: FIX: Calculate backup location based on metadata information
imsm: FIX: Use macros to data access
imsm: FIX: Check layout for level migration
imsm: FIX: Max position could not be rounded to MB
imsm: FIX: Detect migration end during migration record saving
imsm: FIX: Verify if migration record is loaded correctly
imsm: FIX: Opened handle is not closed
imsm: FIX: Cannot create volume
mdadm.8.in | 9 ++
restripe.c | 6 +
super-intel.c | 237 ++++++++++++++++++++++++++++++++++-----------------------
3 files changed, 153 insertions(+), 99 deletions(-)
--
Signature
^ permalink raw reply
* Re: from 2x RAID1 to 1x RAID6 ?
From: Stefan G. Weichinger @ 2011-06-08 14:42 UTC (permalink / raw)
To: linux-raid@vger.kernel.org
In-Reply-To: <4DEF8537.3090301@turmel.org>
Am 08.06.2011 16:20, schrieb Phil Turmel:
> Hi All,
>
> On 06/08/2011 06:33 AM, David Brown wrote:
>> On 08/06/2011 12:11, John Robinson wrote:
>>> On 08/06/2011 10:38, David Brown wrote:
>>>> On 08/06/2011 01:59, Thomas Harold wrote:
>>>>> On 6/7/2011 4:07 PM, Maurice Hilarius wrote:
>>>>>> On 6/7/2011 12:12 PM, Stefan G. Weichinger wrote:
>>>>>>> Greetings, could you please advise me how to proceed?
>>>>>>>
>>>>>>> On a server I have 2 RAID1-arrays, each consisting of 2
>>>>>>> TB-drives:
>>>>>>>
>>>>>>> ..
>>>>>>>
>>>>>>> Now I would like to move things to a more reliable RAID6
>>>>>>> consisting of all the four TB-drives ...
>>>>>>>
>>>>>>> How to do that with minimum risk?
>>>>>>>
>>>>>>> .. Maybe I overlook a clever alternative?
>>>>>>
>>>>>> RAID 10 is as secure, and risk free, and much faster. And
>>>>>> will cause much less CPU load.
>>>>>>
>>>>>
>>>>> Well, with both a pair of RAID1 arrays and a pair of RAID-10
>>>>> arrays, you can lose 2 disks without losing data, but only if
>>>>> the right 2 disks fail.
>>>>>
>>>>> With RAID6, any two of the four can fail without data loss.
>>>>>
>>>>
>>>> It /sounds/ like RAID6 is more reliable here because it can
>>>> always survive a second disk failure, while with RAID10 you
>>>> have only a 66% chance of surviving a second disk failure.
>>>>
>>>> However, how often does a disk fail? What is the chance of a
>>>> random disk failure in a given space of time? And how long will
>>>> it go between one disk failing, and it being replaced and the
>>>> array rebuilt? If you figure out these numbers, you'll have the
>>>> probability of losing your RAID10 array due to the second
>>>> critical disk failing.
>>>>
>>>> To pick some rough numbers - say you've got low reliability,
>>>> cheap disks with a 500,000 hour MTBF. If it takes you 3 days to
>>>> replace a disk (over the weekend), and 8 hours to rebuild, you
>>>> have a risk period of 80 hours. That gives you a 0.016% chance
>>>> of having the second disk failing. Even if you consider that a
>>>> rebuild is quite stressful on the critical disk, it's not a big
>>>> risk.
>>>
>>> It's not so much that the mirror disc might fail that I'd be
>>> worried about, it's that you might find the odd sector failure
>>> during the rebuild - this is the reason why RAID5 is now so
>>> disliked, and the reasons apply similarly to RAID1 and RAID10
>>> too, even if you're only relying on one disc ('s worth of data)
>>> being perfect rather than two or more.
>>
>> I can see that problem, but it again boils down to probabilities.
>> The chances of seeing an unrecoverable read error are very low,
>> just as with other disk errors.
>
> The chances of any given unrecoverable read error are low, but during
> the rebuild, you are going to read every sector of the remaining
> drive in a mirror pair, or every sector of every remaining drive in a
> degraded raid5. On large drives, you suddenly have a probability of
> uncorrectable error during rebuild that is orders of magnitude larger
> than the risk of a generic drive failure (in the rebuild window).
>
> Since Stefan reported that he does backups to this array, I suspect
> the performance is less important than the redundancy. The
> difference in redundancy is *very* significant.
>
> Here's some stats on disk failures themselves:
> http://www.storagemojo.com/2007/02/19/googles-disk-failure-experience/
>
> Here's some stats on read errors during rebuild:
> http://storagemojo.com/2010/02/27/does-raid-6-stops-working-in-2019/
>
> If I recall correctly, Google switched to exclusive use of
> triple-disk mirrors on its production servers for this very reason.
> (I can't find a link at the moment....)
>
>> The issue with RAID5 is that people often had large arrays with
>> multiple disks, and on a rebuild /every/ sector had to be read. So
>> if you have a ten disk RAID5 and are rebuilding, you are reading
>> from all other 9 disks - you have 9 times as high a chance of
>> having an unrecoverable read error ruin your day.
>>
>> I look forward to the day bad block lists and hot replace are ready
>> in mdraid - it will give us close to another disk's worth of
>> redundancy without the cost. For example, if one half of your
>> raid1 mirror fails but is not totally dead (such as by having too
>> many bad blocks), during rebuild you can keep both the good and bad
>> halves in place. Then if there is a read failure on the "good"
>> half, you can probably still get the data from the "bad" half.
>
> I don't see where either of these actually help the "rebuild after
> disk failure" situation?
phew ... thanks to all of you for your statements ...
I have to read through all this at first ...
;-)
Thanks, Stefan
^ permalink raw reply
* Re: SRaid with 13 Disks crashed
From: Phil Turmel @ 2011-06-08 14:39 UTC (permalink / raw)
To: Dragon; +Cc: linux-raid
In-Reply-To: <20110608142440.139240@gmx.net>
Hi Dragon,
On 06/08/2011 10:24 AM, Dragon wrote:
> SRaid with 13 Disks crashed
> Hello,
>
>
> this seems to be my last chance to get back all of my data from a sw-raid5 with 12-13 disks.
> i use debian ( 2.6.32-bpo.5-amd64) and last i wanted to grow the raid from 12 to 13 disk with a size at all of 18tb. after run mke2fs i must see that the tool on ext4 allow a maximum size of 16tb. after that i wanted to shrink the size back to 12 disk and now the raid is gone.
Did you actually mean "mke2fs" ? It destroys existing data. I hope you meant "resize2fs".
> i tried some assemble and examine things but without success.
>
> here some information:
> cat /proc/mdstat
> Personalities : [raid6] [raid5] [raid4]
> md0 : inactive sdh[0](S) sda[13](S) sdg[12](S) sdf[11](S) sde[10](S) sdd[9](S) sdc[8](S) sdb[6](S) sdm[5](S) sdl[4](S) sdj[3](S) sdi[2](S)
> 17581661952 blocks
>
> unused devices: <none>
>
> mdadm --detail /dev/md0
> mdadm: md device /dev/md0 does not appear to be active.
>
> mdadm --assemble --force -v /dev/md0 /dev/sdh /dev/sda /dev/sdg /dev/sdf /dev/sde /dev/sdd /dev/sdc /dev/sdb /dev/sdm /dev/sdl /dev/sdj /dev/sdi --update=super-minor /dev/sdh
Was /dev/sdk supposed to be in this list?
> mdadm: looking for devices for /dev/md0
> mdadm: updating superblock of /dev/sdh with minor number 0
> mdadm: /dev/sdh is identified as a member of /dev/md0, slot 0.
> mdadm: updating superblock of /dev/sda with minor number 0
> mdadm: /dev/sda is identified as a member of /dev/md0, slot 13.
This is suspicious. Looks like sda was added as a spare?
> mdadm: updating superblock of /dev/sdg with minor number 0
> mdadm: /dev/sdg is identified as a member of /dev/md0, slot 12.
> mdadm: updating superblock of /dev/sdf with minor number 0
> mdadm: /dev/sdf is identified as a member of /dev/md0, slot 11.
> mdadm: updating superblock of /dev/sde with minor number 0
> mdadm: /dev/sde is identified as a member of /dev/md0, slot 10.
> mdadm: updating superblock of /dev/sdd with minor number 0
> mdadm: /dev/sdd is identified as a member of /dev/md0, slot 9.
> mdadm: updating superblock of /dev/sdc with minor number 0
> mdadm: /dev/sdc is identified as a member of /dev/md0, slot 8.
> mdadm: updating superblock of /dev/sdb with minor number 0
> mdadm: /dev/sdb is identified as a member of /dev/md0, slot 6.
> mdadm: updating superblock of /dev/sdm with minor number 0
> mdadm: /dev/sdm is identified as a member of /dev/md0, slot 5.
> mdadm: updating superblock of /dev/sdl with minor number 0
> mdadm: /dev/sdl is identified as a member of /dev/md0, slot 4.
> mdadm: updating superblock of /dev/sdj with minor number 0
> mdadm: /dev/sdj is identified as a member of /dev/md0, slot 3.
> mdadm: updating superblock of /dev/sdi with minor number 0
> mdadm: /dev/sdi is identified as a member of /dev/md0, slot 2.
> mdadm: updating superblock of /dev/sdh with minor number 0
> mdadm: /dev/sdh is identified as a member of /dev/md0, slot 0.
> mdadm: no uptodate device for slot 1 of /dev/md0
> mdadm: added /dev/sdi to /dev/md0 as 2
> mdadm: added /dev/sdj to /dev/md0 as 3
> mdadm: added /dev/sdl to /dev/md0 as 4
> mdadm: added /dev/sdm to /dev/md0 as 5
> mdadm: added /dev/sdb to /dev/md0 as 6
> mdadm: no uptodate device for slot 7 of /dev/md0
> mdadm: added /dev/sdc to /dev/md0 as 8
> mdadm: added /dev/sdd to /dev/md0 as 9
> mdadm: added /dev/sde to /dev/md0 as 10
> mdadm: added /dev/sdf to /dev/md0 as 11
> mdadm: added /dev/sdg to /dev/md0 as 12
> mdadm: added /dev/sda to /dev/md0 as 13
> mdadm: added /dev/sdh to /dev/md0 as 0
> mdadm: /dev/md0 assembled from 11 drives and 1 spare - not enough to start the array.
Indeed. Your problem is likely to be /dev/sda.
> mdadm.conf
> #old=ARRAY /dev/md0 level=raid5 num-devices=13 metadata=0.90 UUID=975d6eb2:285eed11:021df236:c2d05073
> ARRAY /dev/md0 UUID=975d6eb2:285eed11:021df236:c2d05073
>
> Hope some can help. Thx
Please share the output of "mdadm -E /dev/sd[abcdefghijklm]"
Phil
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox