* [PATCH 01/13] imsm: catch attempt to auto-layout zero-length arrays
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
@ 2009-12-22 23:59 ` Dan Williams
2009-12-22 23:59 ` [PATCH 02/13] imsm: honor orom constraints for auto-layout Dan Williams
` (12 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-22 23:59 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
When -z is omitted reserve_space() looks to satisfy a zero length
allocation which lo and behold is equal to the amount of free space on a
full disk. So, catch maxsize == 0 and simplify the return value from
merge_extents() to always equal amount of free space (no benefit to
having a special case ~0ULL == error).
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
super-intel.c | 21 ++++++++++++---------
1 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 2e119f8..6fe5e0d 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -3333,7 +3333,7 @@ static unsigned long long merge_extents(struct intel_super *super, int sum_exten
unsigned long reserve;
if (!e)
- return ~0ULL; /* error */
+ return 0;
/* coalesce and sort all extents. also, check to see if we need to
* reserve space between member arrays
@@ -3376,17 +3376,23 @@ static unsigned long long merge_extents(struct intel_super *super, int sum_exten
} while (e[i-1].size);
free(e);
+ if (maxsize == 0)
+ return 0;
+
+ /* FIXME assumes volume at offset 0 is the first volume in a
+ * container
+ */
if (start_extent > 0)
reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
else
reserve = 0;
if (maxsize < reserve)
- return ~0ULL;
+ return 0;
super->create_offset = ~((__u32) 0);
if (start + reserve > super->create_offset)
- return ~0ULL; /* start overflows create_offset */
+ return 0; /* start overflows create_offset */
super->create_offset = start + reserve;
return maxsize - reserve;
@@ -3569,15 +3575,11 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
i += dl->extent_cnt;
maxsize = merge_extents(super, i);
- if (maxsize < size) {
+ if (maxsize < size || maxsize == 0) {
if (verbose)
fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
maxsize, size);
return 0;
- } else if (maxsize == ~0ULL) {
- if (verbose)
- fprintf(stderr, Name ": failed to merge %d extents\n", i);
- return 0;
}
*freesize = maxsize;
@@ -3634,7 +3636,8 @@ static int reserve_space(struct supertype *st, int raiddisks,
if (cnt < raiddisks ||
(super->orom && used && used != raiddisks) ||
- maxsize < minsize) {
+ maxsize < minsize ||
+ maxsize == 0) {
fprintf(stderr, Name ": not enough devices with space to create array.\n");
return 0; /* No enough free spaces large enough */
}
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 02/13] imsm: honor orom constraints for auto-layout
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
2009-12-22 23:59 ` [PATCH 01/13] imsm: catch attempt to auto-layout zero-length arrays Dan Williams
@ 2009-12-22 23:59 ` Dan Williams
2009-12-22 23:59 ` [PATCH 03/13] imsm: fix spare promotion Dan Williams
` (11 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-22 23:59 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
Factor out the orom checking bits to validate_geometry_imsm_orom() and
share it between validate_geometry_imsm_volume() and the entry path to
reserve_space().
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
super-intel.c | 52 +++++++++++++++++++++++++++++++++-------------------
1 files changed, 33 insertions(+), 19 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 6fe5e0d..4372ab4 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -3421,6 +3421,34 @@ static int is_raid_level_supported(const struct imsm_orom *orom, int level, int
}
#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
+static int
+validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
+ int raiddisks, int chunk, int verbose)
+{
+ if (!is_raid_level_supported(super->orom, level, raiddisks)) {
+ pr_vrb(": platform does not support raid%d with %d disk%s\n",
+ level, raiddisks, raiddisks > 1 ? "s" : "");
+ return 0;
+ }
+ if (super->orom && level != 1 &&
+ !imsm_orom_has_chunk(super->orom, chunk)) {
+ pr_vrb(": platform does not support a chunk size of: %d\n", chunk);
+ return 0;
+ }
+ if (layout != imsm_level_to_layout(level)) {
+ if (level == 5)
+ pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
+ else if (level == 10)
+ pr_vrb(": imsm raid 10 only supports the n2 layout\n");
+ else
+ pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
+ layout, level);
+ return 0;
+ }
+
+ return 1;
+}
+
/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
* FIX ME add ahci details
*/
@@ -3443,26 +3471,8 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
if (!super)
return 0;
- if (!is_raid_level_supported(super->orom, level, raiddisks)) {
- pr_vrb(": platform does not support raid%d with %d disk%s\n",
- level, raiddisks, raiddisks > 1 ? "s" : "");
+ if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose))
return 0;
- }
- if (super->orom && level != 1 &&
- !imsm_orom_has_chunk(super->orom, chunk)) {
- pr_vrb(": platform does not support a chunk size of: %d\n", chunk);
- return 0;
- }
- if (layout != imsm_level_to_layout(level)) {
- if (level == 5)
- pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
- else if (level == 10)
- pr_vrb(": imsm raid 10 only supports the n2 layout\n");
- else
- pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
- layout, level);
- return 0;
- }
if (!dev) {
/* General test: make sure there is space for
@@ -3689,6 +3699,10 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
* created. add_to_super and getinfo_super
* detect when autolayout is in progress.
*/
+ if (!validate_geometry_imsm_orom(st->sb, level, layout,
+ raiddisks, chunk,
+ verbose))
+ return 0;
return reserve_space(st, raiddisks, size, chunk, freesize);
}
return 1;
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 03/13] imsm: fix spare promotion
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
2009-12-22 23:59 ` [PATCH 01/13] imsm: catch attempt to auto-layout zero-length arrays Dan Williams
2009-12-22 23:59 ` [PATCH 02/13] imsm: honor orom constraints for auto-layout Dan Williams
@ 2009-12-22 23:59 ` Dan Williams
2009-12-22 23:59 ` [PATCH 04/13] imsm: fix thunderdome segfault Dan Williams
` (10 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-22 23:59 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
When associating a spare take on the target's metadata version number to
satisfy future compare_super checks.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
super-intel.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 4372ab4..39a2985 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1566,6 +1566,7 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst)
first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
first->anchor->orig_family_num = sec->anchor->orig_family_num;
first->anchor->family_num = sec->anchor->family_num;
+ memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
for (i = 0; i < sec->anchor->num_raid_devs; i++)
imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
}
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 04/13] imsm: fix thunderdome segfault
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
` (2 preceding siblings ...)
2009-12-22 23:59 ` [PATCH 03/13] imsm: fix spare promotion Dan Williams
@ 2009-12-22 23:59 ` Dan Williams
2009-12-22 23:59 ` [PATCH 05/13] util: fix devnum2devname for devnum == 0 Dan Williams
` (9 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-22 23:59 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
disk_list_get() can return NULL if:
1/ A formerly missing disk is re-added
2/ The original array has not been rebuilt, so the family number of the
missing disk still matches
3/ The metadata record of the in-sync disks are read before the missing
disk
This will result in the missing disk not adding its own serial number to
the disk_list, only its truncated value will be present.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
super-intel.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 39a2985..2d5796f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2429,7 +2429,7 @@ imsm_thunderdome(struct intel_super **super_list, int len)
struct intel_disk *idisk;
idisk = disk_list_get(dl->serial, disk_list);
- if (is_spare(&idisk->disk) &&
+ if (idisk && is_spare(&idisk->disk) &&
!is_failed(&idisk->disk) && !is_configured(&idisk->disk))
dl->index = -1;
else {
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 05/13] util: fix devnum2devname for devnum == 0
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
` (3 preceding siblings ...)
2009-12-22 23:59 ` [PATCH 04/13] imsm: fix thunderdome segfault Dan Williams
@ 2009-12-22 23:59 ` Dan Williams
2009-12-22 23:59 ` [PATCH 06/13] imsm: cleanup print_imsm_dev() Dan Williams
` (8 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-22 23:59 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
devnum 0 is md0 no md_d-1
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
util.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/util.c b/util.c
index a0e4bcf..d49a4ed 100644
--- a/util.c
+++ b/util.c
@@ -1210,7 +1210,7 @@ int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
char *devnum2devname(int num)
{
char name[100];
- if (num > 0)
+ if (num >= 0)
sprintf(name, "md%d", num);
else
sprintf(name, "md_d%d", -1-num);
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 06/13] imsm: cleanup print_imsm_dev()
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
` (4 preceding siblings ...)
2009-12-22 23:59 ` [PATCH 05/13] util: fix devnum2devname for devnum == 0 Dan Williams
@ 2009-12-22 23:59 ` Dan Williams
2009-12-22 23:59 ` [PATCH 07/13] mdmon: cleanup manage_member() leak Dan Williams
` (7 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-22 23:59 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
When printing the migration state there is no need to print "migrating".
The fact that the state is non-idle should be enough indication.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
super-intel.c | 19 ++++++++++---------
1 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 2d5796f..ab8172d 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -669,23 +669,24 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
printf(" Chunk Size : %u KiB\n",
__le16_to_cpu(map->blocks_per_strip) / 2);
printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
- printf(" Migrate State : %s", dev->vol.migr_state ? "migrating" : "idle\n");
+ printf(" Migrate State : ");
if (dev->vol.migr_state) {
if (migr_type(dev) == MIGR_INIT)
- printf(": initializing\n");
+ printf("initialize\n");
else if (migr_type(dev) == MIGR_REBUILD)
- printf(": rebuilding\n");
+ printf("rebuild\n");
else if (migr_type(dev) == MIGR_VERIFY)
- printf(": check\n");
+ printf("check\n");
else if (migr_type(dev) == MIGR_GEN_MIGR)
- printf(": general migration\n");
+ printf("general migration\n");
else if (migr_type(dev) == MIGR_STATE_CHANGE)
- printf(": state change\n");
+ printf("state change\n");
else if (migr_type(dev) == MIGR_REPAIR)
- printf(": repair\n");
+ printf("repair\n");
else
- printf(": <unknown:%d>\n", migr_type(dev));
- }
+ printf("<unknown:%d>\n", migr_type(dev));
+ } else
+ printf("idle\n");
printf(" Map State : %s", map_state_str[map->map_state]);
if (dev->vol.migr_state) {
struct imsm_map *map = get_imsm_map(dev, 1);
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 07/13] mdmon: cleanup manage_member() leak
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
` (5 preceding siblings ...)
2009-12-22 23:59 ` [PATCH 06/13] imsm: cleanup print_imsm_dev() Dan Williams
@ 2009-12-22 23:59 ` Dan Williams
2009-12-23 0:00 ` [PATCH 08/13] mdmon: cleanup resync_start Dan Williams
` (6 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-22 23:59 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
free() the results of activate_spare().
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
managemon.c | 85 ++++++++++++++++++++++++++++++++++++-----------------------
1 files changed, 52 insertions(+), 33 deletions(-)
diff --git a/managemon.c b/managemon.c
index 5958e18..19effe4 100644
--- a/managemon.c
+++ b/managemon.c
@@ -209,16 +209,22 @@ struct metadata_update *update_queue = NULL;
struct metadata_update *update_queue_handled = NULL;
struct metadata_update *update_queue_pending = NULL;
-void check_update_queue(struct supertype *container)
+static void free_updates(struct metadata_update **update)
{
- while (update_queue_handled) {
- struct metadata_update *this = update_queue_handled;
- update_queue_handled = this->next;
+ while (*update) {
+ struct metadata_update *this = *update;
+
+ *update = this->next;
free(this->buf);
- if (this->space)
- free(this->space);
+ free(this->space);
free(this);
}
+}
+
+void check_update_queue(struct supertype *container)
+{
+ free_updates(&update_queue_handled);
+
if (update_queue == NULL &&
update_queue_pending) {
update_queue = update_queue_pending;
@@ -376,8 +382,9 @@ static void manage_member(struct mdstat_ent *mdstat,
if (a->check_degraded) {
struct metadata_update *updates = NULL;
- struct mdinfo *newdev;
+ struct mdinfo *newdev = NULL;
struct active_array *newa;
+ struct mdinfo *d;
a->check_degraded = 0;
@@ -385,34 +392,46 @@ static void manage_member(struct mdstat_ent *mdstat,
* to check.
*/
newdev = a->container->ss->activate_spare(a, &updates);
- if (newdev) {
- struct mdinfo *d;
- /* Cool, we can add a device or several. */
- newa = duplicate_aa(a);
- /* suspend recovery - maybe not needed */
-
- /* Add device to array and set offset/size/slot.
- * and open files for each newdev */
- for (d = newdev; d ; d = d->next) {
- struct mdinfo *newd;
- if (sysfs_add_disk(&newa->info, d, 0) < 0)
- continue;
- newd = malloc(sizeof(*newd));
- *newd = *d;
- newd->next = newa->info.devs;
- newa->info.devs = newd;
-
- newd->state_fd = sysfs_open(a->devnum,
- newd->sys_name,
- "state");
- newd->prev_state
- = read_dev_state(newd->state_fd);
- newd->curr_state = newd->prev_state;
+ if (!newdev)
+ return;
+
+ newa = duplicate_aa(a);
+ if (!newa)
+ goto out;
+ /* Cool, we can add a device or several. */
+
+ /* Add device to array and set offset/size/slot.
+ * and open files for each newdev */
+ for (d = newdev; d ; d = d->next) {
+ struct mdinfo *newd;
+
+ newd = malloc(sizeof(*newd));
+ if (!newd)
+ continue;
+ if (sysfs_add_disk(&newa->info, d, 0) < 0) {
+ free(newd);
+ continue;
}
- queue_metadata_update(updates);
- replace_array(a->container, a, newa);
- sysfs_set_str(&a->info, NULL, "sync_action", "recover");
+ *newd = *d;
+ newd->next = newa->info.devs;
+ newa->info.devs = newd;
+
+ newd->state_fd = sysfs_open(a->devnum, newd->sys_name,
+ "state");
+ newd->prev_state = read_dev_state(newd->state_fd);
+ newd->curr_state = newd->prev_state;
+ }
+ queue_metadata_update(updates);
+ updates = NULL;
+ replace_array(a->container, a, newa);
+ sysfs_set_str(&a->info, NULL, "sync_action", "recover");
+ out:
+ while (newdev) {
+ d = newdev->next;
+ free(newdev);
+ newdev = d;
}
+ free_updates(&updates);
}
}
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 08/13] mdmon: cleanup resync_start
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
` (6 preceding siblings ...)
2009-12-22 23:59 ` [PATCH 07/13] mdmon: cleanup manage_member() leak Dan Williams
@ 2009-12-23 0:00 ` Dan Williams
2009-12-23 0:00 ` [PATCH 10/13] Introduce MaxSector Dan Williams
` (5 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-23 0:00 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
We don't need to sprinkle reads of this attribute all over the place,
just once at the entry of read_and_act(). Also, the mdinfo structure
for the array already has a 'resync_start' member, so just reuse that.
Finally, rename get_resync_start() to read_resync_start to make it
consistent with the other sysfs accessors in monitor.c.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
managemon.c | 1 -
mdmon.h | 7 ++-----
monitor.c | 19 ++++++-------------
super-ddf.c | 8 ++++----
super-intel.c | 9 ++++-----
5 files changed, 16 insertions(+), 28 deletions(-)
diff --git a/managemon.c b/managemon.c
index 19effe4..e77f045 100644
--- a/managemon.c
+++ b/managemon.c
@@ -541,7 +541,6 @@ static void manage_new(struct mdstat_ent *mdstat,
new->info.state_fd = sysfs_open(new->devnum, NULL, "array_state");
new->resync_start_fd = sysfs_open(new->devnum, NULL, "resync_start");
new->metadata_fd = sysfs_open(new->devnum, NULL, "metadata_version");
- get_resync_start(new);
dprintf("%s: inst: %d action: %d state: %d\n", __func__, atoi(inst),
new->action_fd, new->info.state_fd);
diff --git a/mdmon.h b/mdmon.h
index 7cfee35..4494085 100644
--- a/mdmon.h
+++ b/mdmon.h
@@ -39,8 +39,6 @@ struct active_array {
int check_degraded; /* flag set by mon, read by manage */
int devnum;
-
- unsigned long long resync_start;
};
/*
@@ -73,7 +71,6 @@ extern int socket_hup_requested;
extern int sigterm;
int read_dev_state(int fd);
-int get_resync_start(struct active_array *a);
int is_container_member(struct mdstat_ent *mdstat, char *container);
struct mdstat_ent *mdstat_read(int hold, int start);
@@ -85,9 +82,9 @@ extern int monitor_loop_cnt;
/* helper routine to determine resync completion since MaxSector is a
* moving target
*/
-static inline int is_resync_complete(struct active_array *a)
+static inline int is_resync_complete(struct mdinfo *array)
{
- if (a->resync_start >= a->info.component_size)
+ if (array->resync_start >= array->component_size)
return 1;
return 0;
}
diff --git a/monitor.c b/monitor.c
index 0cafc3a..a8e0af3 100644
--- a/monitor.c
+++ b/monitor.c
@@ -66,23 +66,20 @@ static int read_attr(char *buf, int len, int fd)
return n;
}
-int get_resync_start(struct active_array *a)
+static unsigned long long read_resync_start(int fd)
{
char buf[30];
int n;
- n = read_attr(buf, 30, a->resync_start_fd);
+ n = read_attr(buf, 30, fd);
if (n <= 0)
- return n;
+ return 0;
if (strncmp(buf, "none", 4) == 0)
- a->resync_start = ~0ULL;
+ return ~0ULL;
else
- a->resync_start = strtoull(buf, NULL, 10);
-
- return 1;
+ return strtoull(buf, NULL, 10);
}
-
static enum array_state read_state(int fd)
{
char buf[20];
@@ -208,6 +205,7 @@ static int read_and_act(struct active_array *a)
a->curr_state = read_state(a->info.state_fd);
a->curr_action = read_action(a->action_fd);
+ a->info.resync_start = read_resync_start(a->resync_start_fd);
for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
mdi->next_state = 0;
if (mdi->state_fd >= 0)
@@ -217,13 +215,11 @@ static int read_and_act(struct active_array *a)
if (a->curr_state <= inactive &&
a->prev_state > inactive) {
/* array has been stopped */
- get_resync_start(a);
a->container->ss->set_array_state(a, 1);
a->next_state = clear;
deactivate = 1;
}
if (a->curr_state == write_pending) {
- get_resync_start(a);
a->container->ss->set_array_state(a, 0);
a->next_state = active;
dirty = 1;
@@ -236,7 +232,6 @@ static int read_and_act(struct active_array *a)
dirty = 1;
}
if (a->curr_state == clean) {
- get_resync_start(a);
a->container->ss->set_array_state(a, 1);
}
if (a->curr_state == active ||
@@ -253,7 +248,6 @@ static int read_and_act(struct active_array *a)
/* explicit request for readonly array. Leave it alone */
;
} else {
- get_resync_start(a);
if (a->container->ss->set_array_state(a, 2))
a->next_state = read_auto; /* array is clean */
else {
@@ -271,7 +265,6 @@ static int read_and_act(struct active_array *a)
* until the array goes inactive or readonly though.
* Just check if we need to fiddle spares.
*/
- get_resync_start(a);
a->container->ss->set_array_state(a, a->curr_state <= clean);
check_degraded = 1;
}
diff --git a/super-ddf.c b/super-ddf.c
index fe83642..f5eb816 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -3066,7 +3066,7 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
if (consistent == 2) {
/* Should check if a recovery should be started FIXME */
consistent = 1;
- if (!is_resync_complete(a))
+ if (!is_resync_complete(&a->info))
consistent = 0;
}
if (consistent)
@@ -3078,9 +3078,9 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
old = ddf->virt->entries[inst].init_state;
ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
- if (is_resync_complete(a))
+ if (is_resync_complete(&a->info))
ddf->virt->entries[inst].init_state |= DDF_init_full;
- else if (a->resync_start == 0)
+ else if (a->info.resync_start == 0)
ddf->virt->entries[inst].init_state |= DDF_init_not;
else
ddf->virt->entries[inst].init_state |= DDF_init_quick;
@@ -3088,7 +3088,7 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
ddf->updates_pending = 1;
dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
- a->resync_start);
+ a->info.resync_start);
return consistent;
}
diff --git a/super-intel.c b/super-intel.c
index ab8172d..4072fc8 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -4108,12 +4108,12 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
}
if (consistent == 2 &&
- (!is_resync_complete(a) ||
+ (!is_resync_complete(&a->info) ||
map_state != IMSM_T_STATE_NORMAL ||
dev->vol.migr_state))
consistent = 0;
- if (is_resync_complete(a)) {
+ if (is_resync_complete(&a->info)) {
/* complete intialization / resync,
* recovery and interrupted recovery is completed in
* ->set_disk
@@ -4125,7 +4125,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
}
} else if (!is_resyncing(dev) && !failed) {
/* mark the start of the init process if nothing is failed */
- dprintf("imsm: mark resync start (%llu)\n", a->resync_start);
+ dprintf("imsm: mark resync start\n");
if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
migrate(dev, IMSM_T_STATE_NORMAL, MIGR_INIT);
else
@@ -4137,8 +4137,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
/* mark dirty / clean */
if (dev->vol.dirty != !consistent) {
- dprintf("imsm: mark '%s' (%llu)\n",
- consistent ? "clean" : "dirty", a->resync_start);
+ dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
if (consistent)
dev->vol.dirty = 0;
else
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 10/13] Introduce MaxSector
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
` (7 preceding siblings ...)
2009-12-23 0:00 ` [PATCH 08/13] mdmon: cleanup resync_start Dan Williams
@ 2009-12-23 0:00 ` Dan Williams
2009-12-23 0:00 ` [PATCH 11/13] Teach sysfs_add_disk() callers to use ->recovery_start versus 'insync' parameter Dan Williams
` (4 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-23 0:00 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
Replace occurrences of ~0ULL to make it clear we are talking about maximal
resync/recovery position.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
Create.c | 2 +-
mdadm.h | 1 +
monitor.c | 2 +-
super-ddf.c | 4 ++--
super-intel.c | 8 ++++----
super1.c | 6 +++---
6 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/Create.c b/Create.c
index 5b01b63..1ae7f92 100644
--- a/Create.c
+++ b/Create.c
@@ -527,7 +527,7 @@ int Create(struct supertype *st, char *mddev,
assume_clean
) {
info.array.state = 1; /* clean, but one+ drive will be missing*/
- info.resync_start = ~0ULL;
+ info.resync_start = MaxSector;
} else {
info.array.state = 0; /* not clean, but no errors */
info.resync_start = 0;
diff --git a/mdadm.h b/mdadm.h
index 7b75540..9cf15c4 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -149,6 +149,7 @@ struct mdinfo {
union {
unsigned long long resync_start; /* per-array resync position */
unsigned long long recovery_start; /* per-device rebuild position */
+ #define MaxSector (~0ULL) /* resync/recovery complete position */
};
unsigned long safe_mode_delay; /* ms delay to mark clean */
int new_level, delta_disks, new_layout, new_chunk;
diff --git a/monitor.c b/monitor.c
index 58752a8..81fef49 100644
--- a/monitor.c
+++ b/monitor.c
@@ -75,7 +75,7 @@ static unsigned long long read_resync_start(int fd)
if (n <= 0)
return 0;
if (strncmp(buf, "none", 4) == 0)
- return ~0ULL;
+ return MaxSector;
else
return strtoull(buf, NULL, 10);
}
diff --git a/super-ddf.c b/super-ddf.c
index f5eb816..8c3f4be 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1433,7 +1433,7 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
(ddf->virt->entries[info->container_member].init_state
& DDF_initstate_mask)
== DDF_init_full)
- info->resync_start = ~0ULL;
+ info->resync_start = MaxSector;
uuid_from_super_ddf(st, info->uuid);
@@ -2921,7 +2921,7 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
this->resync_start = 0;
} else {
this->array.state = 1;
- this->resync_start = ~0ULL;
+ this->resync_start = MaxSector;
}
memcpy(this->name, ddf->virt->entries[i].name, 16);
this->name[16]=0;
diff --git a/super-intel.c b/super-intel.c
index 4072fc8..4bb1990 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1271,7 +1271,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
/* FIXME add curr_migr_unit to resync_start conversion */
info->resync_start = 0;
else
- info->resync_start = ~0ULL;
+ info->resync_start = MaxSector;
strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
info->name[MAX_RAID_SERIAL_LEN] = 0;
@@ -3482,7 +3482,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
* offset
*/
unsigned long long minsize = size;
- unsigned long long start_offset = ~0ULL;
+ unsigned long long start_offset = MaxSector;
int dcnt = 0;
if (minsize == 0)
minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
@@ -3498,7 +3498,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
esize = e[i].start - pos;
if (esize >= minsize)
found = 1;
- if (found && start_offset == ~0ULL) {
+ if (found && start_offset == MaxSector) {
start_offset = pos;
break;
} else if (found && pos != start_offset) {
@@ -3856,7 +3856,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
* FIXME handle dirty degraded
*/
if (skip && !dev->vol.dirty)
- this->resync_start = ~0ULL;
+ this->resync_start = MaxSector;
if (skip)
continue;
diff --git a/super1.c b/super1.c
index 2c992a4..85bb598 100644
--- a/super1.c
+++ b/super1.c
@@ -659,9 +659,9 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
switch(__le32_to_cpu(sb->level)) {
case 5: case 4: case 6:
/* need to force clean */
- if (sb->resync_offset != ~0ULL)
+ if (sb->resync_offset != MaxSector)
rv = 1;
- sb->resync_offset = ~0ULL;
+ sb->resync_offset = MaxSector;
}
}
if (strcmp(update, "assemble")==0) {
@@ -855,7 +855,7 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
sb->utime = sb->ctime;
sb->events = __cpu_to_le64(1);
if (info->state & (1<<MD_SB_CLEAN))
- sb->resync_offset = ~0ULL;
+ sb->resync_offset = MaxSector;
else
sb->resync_offset = 0;
sb->max_dev = __cpu_to_le32((1024- sizeof(struct mdp_superblock_1))/
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 11/13] Teach sysfs_add_disk() callers to use ->recovery_start versus 'insync' parameter
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
` (8 preceding siblings ...)
2009-12-23 0:00 ` [PATCH 10/13] Introduce MaxSector Dan Williams
@ 2009-12-23 0:00 ` Dan Williams
2009-12-23 0:00 ` [PATCH 12/13] Support external metadata recovery-resume Dan Williams
` (3 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-23 0:00 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
Also fixup 'in_sync' versus 'insync' typo.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
Assemble.c | 2 +-
Manage.c | 3 ++-
managemon.c | 2 +-
mdadm.h | 3 +--
super-ddf.c | 2 ++
super-intel.c | 2 ++
sysfs.c | 6 +++---
util.c | 7 +++++--
8 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 014d644..560e2fe 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1286,7 +1286,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
sysfs_free(sra);
for (dev = content->devs; dev; dev = dev->next)
- if (sysfs_add_disk(content, dev, 1) == 0)
+ if (sysfs_add_disk(content, dev) == 0)
working++;
else if (errno == EEXIST)
preexist++;
diff --git a/Manage.c b/Manage.c
index fb9b972..6f0a6a2 100644
--- a/Manage.c
+++ b/Manage.c
@@ -696,7 +696,8 @@ int Manage_subdevs(char *devname, int fd,
tst->ss->getinfo_super(tst, &new_mdi);
new_mdi.disk.major = disc.major;
new_mdi.disk.minor = disc.minor;
- if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
+ new_mdi.recovery_start = 0;
+ if (sysfs_add_disk(sra, &new_mdi) != 0) {
fprintf(stderr, Name ": add new device to external metadata"
" failed for %s\n", dv->devname);
close(container_fd);
diff --git a/managemon.c b/managemon.c
index e335077..3a20e2b 100644
--- a/managemon.c
+++ b/managemon.c
@@ -410,7 +410,7 @@ static void manage_member(struct mdstat_ent *mdstat,
newd = malloc(sizeof(*newd));
if (!newd)
continue;
- if (sysfs_add_disk(&newa->info, d, 0) < 0) {
+ if (sysfs_add_disk(&newa->info, d) < 0) {
free(newd);
continue;
}
diff --git a/mdadm.h b/mdadm.h
index 9cf15c4..af6d91b 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -385,8 +385,7 @@ extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
char *name, char *val, int size);
extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms);
extern int sysfs_set_array(struct mdinfo *info, int vers);
-extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd,
- int in_sync);
+extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd);
extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
extern int sysfs_unique_holder(int devnum, long rdev);
extern int load_sys(char *path, char *buf);
diff --git a/super-ddf.c b/super-ddf.c
index 8c3f4be..14f8330 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -2968,6 +2968,7 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
dev->disk.minor = d->minor;
dev->disk.raid_disk = i;
dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+ dev->recovery_start = MaxSector;
dev->events = __be32_to_cpu(ddf->primary.seq);
dev->data_offset = __be64_to_cpu(vc->lba_offset[i]);
@@ -3547,6 +3548,7 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
di->disk.major = dl->major;
di->disk.minor = dl->minor;
di->disk.state = 0;
+ di->recovery_start = 0;
di->data_offset = pos;
di->component_size = a->info.component_size;
di->container_member = dl->pdnum;
diff --git a/super-intel.c b/super-intel.c
index 4bb1990..9f879c5 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -3876,6 +3876,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
info_d->disk.major = d->major;
info_d->disk.minor = d->minor;
info_d->disk.raid_disk = slot;
+ info_d->recovery_start = MaxSector;
this->array.working_disks++;
@@ -4454,6 +4455,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
di->disk.major = dl->major;
di->disk.minor = dl->minor;
di->disk.state = 0;
+ di->recovery_start = 0;
di->data_offset = __le32_to_cpu(map->pba_of_lba0);
di->component_size = a->info.component_size;
di->container_member = inst;
diff --git a/sysfs.c b/sysfs.c
index 35dfbd4..8fdb529 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -572,7 +572,7 @@ int sysfs_set_array(struct mdinfo *info, int vers)
return rv;
}
-int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int in_sync)
+int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd)
{
char dv[100];
char nm[100];
@@ -598,11 +598,11 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int in_sync)
rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
if (sra->array.level != LEVEL_CONTAINER) {
- if (in_sync)
+ if (sd->recovery_start == MaxSector)
/* This can correctly fail if array isn't started,
* yet, so just ignore status for now.
*/
- sysfs_set_str(sra, sd, "state", "in_sync");
+ sysfs_set_str(sra, sd, "state", "insync");
rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
}
return rv;
diff --git a/util.c b/util.c
index d49a4ed..05be64c 100644
--- a/util.c
+++ b/util.c
@@ -1162,8 +1162,11 @@ int add_disk(int mdfd, struct supertype *st,
int rv;
#ifndef MDASSEMBLE
if (st->ss->external) {
- rv = sysfs_add_disk(sra, info,
- info->disk.state & (1<<MD_DISK_SYNC));
+ if (info->disk.state & (1<<MD_DISK_SYNC))
+ info->recovery_start = MaxSector;
+ else
+ info->recovery_start = 0;
+ rv = sysfs_add_disk(sra, info);
if (! rv) {
struct mdinfo *sd2;
for (sd2 = sra->devs; sd2; sd2=sd2->next)
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 12/13] Support external metadata recovery-resume
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
` (9 preceding siblings ...)
2009-12-23 0:00 ` [PATCH 11/13] Teach sysfs_add_disk() callers to use ->recovery_start versus 'insync' parameter Dan Williams
@ 2009-12-23 0:00 ` Dan Williams
2009-12-23 0:00 ` [PATCH 13/13] imsm: add support for checkpointing via 'curr_migr_unit' Dan Williams
` (2 subsequent siblings)
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-23 0:00 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
Minimal changes needed to permit reassembling partially recovered
external metadata arrays. The biggest logical change is that
->container_content() can now surface partially rebuilt members rather
than omitting them from the disk list.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
Assemble.c | 2 +-
Manage.c | 2 +-
managemon.c | 2 +-
mdadm.h | 2 +-
sysfs.c | 11 ++++++++++-
util.c | 2 +-
6 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 560e2fe..014d644 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1286,7 +1286,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
sysfs_free(sra);
for (dev = content->devs; dev; dev = dev->next)
- if (sysfs_add_disk(content, dev) == 0)
+ if (sysfs_add_disk(content, dev, 1) == 0)
working++;
else if (errno == EEXIST)
preexist++;
diff --git a/Manage.c b/Manage.c
index 6f0a6a2..df6079b 100644
--- a/Manage.c
+++ b/Manage.c
@@ -697,7 +697,7 @@ int Manage_subdevs(char *devname, int fd,
new_mdi.disk.major = disc.major;
new_mdi.disk.minor = disc.minor;
new_mdi.recovery_start = 0;
- if (sysfs_add_disk(sra, &new_mdi) != 0) {
+ if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
fprintf(stderr, Name ": add new device to external metadata"
" failed for %s\n", dv->devname);
close(container_fd);
diff --git a/managemon.c b/managemon.c
index 3a20e2b..e335077 100644
--- a/managemon.c
+++ b/managemon.c
@@ -410,7 +410,7 @@ static void manage_member(struct mdstat_ent *mdstat,
newd = malloc(sizeof(*newd));
if (!newd)
continue;
- if (sysfs_add_disk(&newa->info, d) < 0) {
+ if (sysfs_add_disk(&newa->info, d, 0) < 0) {
free(newd);
continue;
}
diff --git a/mdadm.h b/mdadm.h
index af6d91b..27ef693 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -385,7 +385,7 @@ extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
char *name, char *val, int size);
extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms);
extern int sysfs_set_array(struct mdinfo *info, int vers);
-extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd);
+extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume);
extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
extern int sysfs_unique_holder(int devnum, long rdev);
extern int load_sys(char *path, char *buf);
diff --git a/sysfs.c b/sysfs.c
index 8fdb529..c3bbbe3 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -572,7 +572,7 @@ int sysfs_set_array(struct mdinfo *info, int vers)
return rv;
}
-int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd)
+int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
{
char dv[100];
char nm[100];
@@ -595,6 +595,13 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd)
strcpy(sd->sys_name, "dev-");
strcpy(sd->sys_name+4, dname);
+ /* test write to see if 'recovery_start' is available */
+ if (resume && sd->recovery_start < MaxSector &&
+ sysfs_set_num(sra, sd, "recovery_start", 0)) {
+ sysfs_set_str(sra, sd, "state", "remove");
+ return -1;
+ }
+
rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
if (sra->array.level != LEVEL_CONTAINER) {
@@ -604,6 +611,8 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd)
*/
sysfs_set_str(sra, sd, "state", "insync");
rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
+ if (resume)
+ sysfs_set_num(sra, sd, "recovery_start", sd->recovery_start);
}
return rv;
}
diff --git a/util.c b/util.c
index 05be64c..927a0ee 100644
--- a/util.c
+++ b/util.c
@@ -1166,7 +1166,7 @@ int add_disk(int mdfd, struct supertype *st,
info->recovery_start = MaxSector;
else
info->recovery_start = 0;
- rv = sysfs_add_disk(sra, info);
+ rv = sysfs_add_disk(sra, info, 0);
if (! rv) {
struct mdinfo *sd2;
for (sd2 = sra->devs; sd2; sd2=sd2->next)
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH 13/13] imsm: add support for checkpointing via 'curr_migr_unit'
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
` (10 preceding siblings ...)
2009-12-23 0:00 ` [PATCH 12/13] Support external metadata recovery-resume Dan Williams
@ 2009-12-23 0:00 ` Dan Williams
2009-12-23 0:13 ` [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
2009-12-30 2:56 ` Neil Brown
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-23 0:00 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
Unlike native md checkpointing some data about the geometry and type of
the migration process is coded into curr_migr_unit. Provide logic to
convert between md/{resync_start|recovery_start} and imsm/curr_migr_unit.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
bitmap.c | 2
mdadm.h | 17 +++
super-intel.c | 320 +++++++++++++++++++++++++++++++++++++++++++++++++++------
util.c | 15 +++
4 files changed, 320 insertions(+), 34 deletions(-)
diff --git a/bitmap.c b/bitmap.c
index 850b0ce..088e37d 100644
--- a/bitmap.c
+++ b/bitmap.c
@@ -20,8 +20,6 @@
#include "mdadm.h"
-#define min(a,b) (((a) < (b)) ? (a) : (b))
-
inline void sb_le_to_cpu(bitmap_super_t *sb)
{
sb->magic = __le32_to_cpu(sb->magic);
diff --git a/mdadm.h b/mdadm.h
index 27ef693..c1c36af 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -129,6 +129,22 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#endif /* __KLIBC__ */
+/*
+ * min()/max()/clamp() macros that also do
+ * strict type-checking.. See the
+ * "unnecessary" pointer comparison.
+ */
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+
+#define max(x, y) ({ \
+ typeof(x) _max1 = (x); \
+ typeof(y) _max2 = (y); \
+ (void) (&_max1 == &_max2); \
+ _max1 > _max2 ? _max1 : _max2; })
/* general information that might be extracted from a superblock */
struct mdinfo {
@@ -842,6 +858,7 @@ extern int assemble_container_content(struct supertype *st, int mdfd,
extern int add_disk(int mdfd, struct supertype *st,
struct mdinfo *sra, struct mdinfo *info);
extern int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info);
+unsigned long long min_recovery_start(struct mdinfo *array);
extern char *human_size(long long bytes);
extern char *human_size_brief(long long bytes);
diff --git a/super-intel.c b/super-intel.c
index 9f879c5..609aaf5 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -635,6 +635,8 @@ static int is_failed(struct imsm_disk *disk)
}
#ifndef MDASSEMBLE
+static __u64 blocks_per_migr_unit(struct imsm_dev *dev);
+
static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
{
__u64 sz;
@@ -690,7 +692,11 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
printf(" Map State : %s", map_state_str[map->map_state]);
if (dev->vol.migr_state) {
struct imsm_map *map = get_imsm_map(dev, 1);
+
printf(" <-- %s", map_state_str[map->map_state]);
+ printf("\n Checkpoint : %u (%llu)",
+ __le32_to_cpu(dev->vol.curr_migr_unit),
+ blocks_per_migr_unit(dev));
}
printf("\n");
printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
@@ -1216,6 +1222,179 @@ get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
}
#endif
+static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
+{
+ /* migr_strip_size when repairing or initializing parity */
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch (get_imsm_raid_level(map)) {
+ case 5:
+ case 10:
+ return chunk;
+ default:
+ return 128*1024 >> 9;
+ }
+}
+
+static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
+{
+ /* migr_strip_size when rebuilding a degraded disk, no idea why
+ * this is different than migr_strip_size_resync(), but it's good
+ * to be compatible
+ */
+ struct imsm_map *map = get_imsm_map(dev, 1);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch (get_imsm_raid_level(map)) {
+ case 1:
+ case 10:
+ if (map->num_members % map->num_domains == 0)
+ return 128*1024 >> 9;
+ else
+ return chunk;
+ case 5:
+ return max((__u32) 64*1024 >> 9, chunk);
+ default:
+ return 128*1024 >> 9;
+ }
+}
+
+static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
+{
+ struct imsm_map *lo = get_imsm_map(dev, 0);
+ struct imsm_map *hi = get_imsm_map(dev, 1);
+ __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
+ __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
+
+ return max((__u32) 1, hi_chunk / lo_chunk);
+}
+
+static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
+{
+ struct imsm_map *lo = get_imsm_map(dev, 0);
+ int level = get_imsm_raid_level(lo);
+
+ if (level == 1 || level == 10) {
+ struct imsm_map *hi = get_imsm_map(dev, 1);
+
+ return hi->num_domains;
+ } else
+ return num_stripes_per_unit_resync(dev);
+}
+
+static __u8 imsm_num_data_members(struct imsm_dev *dev)
+{
+ /* named 'imsm_' because raid0, raid1 and raid10
+ * counter-intuitively have the same number of data disks
+ */
+ struct imsm_map *map = get_imsm_map(dev, 0);
+
+ switch (get_imsm_raid_level(map)) {
+ case 0:
+ case 1:
+ case 10:
+ return map->num_members;
+ case 5:
+ return map->num_members - 1;
+ default:
+ dprintf("%s: unsupported raid level\n", __func__);
+ return 0;
+ }
+}
+
+static __u32 parity_segment_depth(struct imsm_dev *dev)
+{
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch(get_imsm_raid_level(map)) {
+ case 1:
+ case 10:
+ return chunk * map->num_domains;
+ case 5:
+ return chunk * map->num_members;
+ default:
+ return chunk;
+ }
+}
+
+static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
+{
+ struct imsm_map *map = get_imsm_map(dev, 1);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+ __u32 strip = block / chunk;
+
+ switch (get_imsm_raid_level(map)) {
+ case 1:
+ case 10: {
+ __u32 vol_strip = (strip * map->num_domains) + 1;
+ __u32 vol_stripe = vol_strip / map->num_members;
+
+ return vol_stripe * chunk + block % chunk;
+ } case 5: {
+ __u32 stripe = strip / (map->num_members - 1);
+
+ return stripe * chunk + block % chunk;
+ }
+ default:
+ return 0;
+ }
+}
+
+static __u64 blocks_per_migr_unit(struct imsm_dev *dev)
+{
+ /* calculate the conversion factor between per member 'blocks'
+ * (md/{resync,rebuild}_start) and imsm migration units, return
+ * 0 for the 'not migrating' and 'unsupported migration' cases
+ */
+ if (!dev->vol.migr_state)
+ return 0;
+
+ switch (migr_type(dev)) {
+ case MIGR_VERIFY:
+ case MIGR_REPAIR:
+ case MIGR_INIT: {
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 stripes_per_unit;
+ __u32 blocks_per_unit;
+ __u32 parity_depth;
+ __u32 migr_chunk;
+ __u32 block_map;
+ __u32 block_rel;
+ __u32 segment;
+ __u32 stripe;
+ __u8 disks;
+
+ /* yes, this is really the translation of migr_units to
+ * per-member blocks in the 'resync' case
+ */
+ stripes_per_unit = num_stripes_per_unit_resync(dev);
+ migr_chunk = migr_strip_blocks_resync(dev);
+ disks = imsm_num_data_members(dev);
+ blocks_per_unit = stripes_per_unit * migr_chunk * disks;
+ stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
+ segment = blocks_per_unit / stripe;
+ block_rel = blocks_per_unit - segment * stripe;
+ parity_depth = parity_segment_depth(dev);
+ block_map = map_migr_block(dev, block_rel);
+ return block_map + parity_depth * segment;
+ }
+ case MIGR_REBUILD: {
+ __u32 stripes_per_unit;
+ __u32 migr_chunk;
+
+ stripes_per_unit = num_stripes_per_unit_rebuild(dev);
+ migr_chunk = migr_strip_blocks_rebuild(dev);
+ return migr_chunk * stripes_per_unit;
+ }
+ case MIGR_GEN_MIGR:
+ case MIGR_STATE_CHANGE:
+ default:
+ return 0;
+ }
+}
+
static int imsm_level_to_layout(int level)
{
switch (level) {
@@ -1265,12 +1444,33 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
info->component_size = __le32_to_cpu(map->blocks_per_member);
memset(info->uuid, 0, sizeof(info->uuid));
- if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty)
+ if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty) {
info->resync_start = 0;
- else if (dev->vol.migr_state)
- /* FIXME add curr_migr_unit to resync_start conversion */
- info->resync_start = 0;
- else
+ } else if (dev->vol.migr_state) {
+ switch (migr_type(dev)) {
+ case MIGR_REPAIR:
+ case MIGR_INIT: {
+ __u64 blocks_per_unit = blocks_per_migr_unit(dev);
+ __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
+
+ info->resync_start = blocks_per_unit * units;
+ break;
+ }
+ case MIGR_VERIFY:
+ /* we could emulate the checkpointing of
+ * 'sync_action=check' migrations, but for now
+ * we just immediately complete them
+ */
+ case MIGR_REBUILD:
+ /* this is handled by container_content_imsm() */
+ case MIGR_GEN_MIGR:
+ case MIGR_STATE_CHANGE:
+ /* FIXME handle other migrations */
+ default:
+ /* we are not dirty, so... */
+ info->resync_start = MaxSector;
+ }
+ } else
info->resync_start = MaxSector;
strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
@@ -3782,6 +3982,46 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
}
#endif /* MDASSEMBLE */
+static int is_rebuilding(struct imsm_dev *dev)
+{
+ struct imsm_map *migr_map;
+
+ if (!dev->vol.migr_state)
+ return 0;
+
+ if (migr_type(dev) != MIGR_REBUILD)
+ return 0;
+
+ migr_map = get_imsm_map(dev, 1);
+
+ if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
+ return 1;
+ else
+ return 0;
+}
+
+static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array)
+{
+ struct mdinfo *rebuild = NULL;
+ struct mdinfo *d;
+ __u32 units;
+
+ if (!is_rebuilding(dev))
+ return;
+
+ /* Find the rebuild target, but punt on the dual rebuild case */
+ for (d = array->devs; d; d = d->next)
+ if (d->recovery_start == 0) {
+ if (rebuild)
+ return;
+ rebuild = d;
+ }
+
+ units = __le32_to_cpu(dev->vol.curr_migr_unit);
+ rebuild->recovery_start = units * blocks_per_migr_unit(dev);
+}
+
+
static struct mdinfo *container_content_imsm(struct supertype *st)
{
/* Given a container loaded by load_super_imsm_all,
@@ -3829,6 +4069,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
super->current_vol = i;
getinfo_super_imsm_volume(st, this);
for (slot = 0 ; slot < map->num_members; slot++) {
+ unsigned long long recovery_start;
struct mdinfo *info_d;
struct dl *d;
int idx;
@@ -3842,33 +4083,41 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
if (d->index == idx)
break;
+ recovery_start = MaxSector;
if (d == NULL)
skip = 1;
if (d && is_failed(&d->disk))
skip = 1;
if (ord & IMSM_ORD_REBUILD)
- skip = 1;
+ recovery_start = 0;
/*
* if we skip some disks the array will be assmebled degraded;
- * reset resync start to avoid a dirty-degraded situation
+ * reset resync start to avoid a dirty-degraded
+ * situation when performing the intial sync
*
* FIXME handle dirty degraded
*/
- if (skip && !dev->vol.dirty)
+ if ((skip || recovery_start == 0) && !dev->vol.dirty)
this->resync_start = MaxSector;
if (skip)
continue;
- info_d = malloc(sizeof(*info_d));
+ info_d = calloc(1, sizeof(*info_d));
if (!info_d) {
fprintf(stderr, Name ": failed to allocate disk"
" for volume %.16s\n", dev->volume);
+ info_d = this->devs;
+ while (info_d) {
+ struct mdinfo *d = info_d->next;
+
+ free(info_d);
+ info_d = d;
+ }
free(this);
this = rest;
break;
}
- memset(info_d, 0, sizeof(*info_d));
info_d->next = this->devs;
this->devs = info_d;
@@ -3876,9 +4125,10 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
info_d->disk.major = d->major;
info_d->disk.minor = d->minor;
info_d->disk.raid_disk = slot;
- info_d->recovery_start = MaxSector;
+ info_d->recovery_start = recovery_start;
- this->array.working_disks++;
+ if (info_d->recovery_start == MaxSector)
+ this->array.working_disks++;
info_d->events = __le32_to_cpu(mpb->generation_num);
info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
@@ -3886,6 +4136,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
if (d->devname)
strcpy(info_d->name, d->devname);
}
+ /* now that the disk list is up-to-date fixup recovery_start */
+ update_recovery_start(dev, this);
rest = this;
}
@@ -4028,24 +4280,6 @@ static int is_resyncing(struct imsm_dev *dev)
return 0;
}
-static int is_rebuilding(struct imsm_dev *dev)
-{
- struct imsm_map *migr_map;
-
- if (!dev->vol.migr_state)
- return 0;
-
- if (migr_type(dev) != MIGR_REBUILD)
- return 0;
-
- migr_map = get_imsm_map(dev, 1);
-
- if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
- return 1;
- else
- return 0;
-}
-
/* return true if we recorded new information */
static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
{
@@ -4096,6 +4330,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
struct imsm_map *map = get_imsm_map(dev, 0);
int failed = imsm_count_failed(super, dev);
__u8 map_state = imsm_check_degraded(super, dev, failed);
+ __u32 blocks_per_unit;
/* before we activate this array handle any missing disks */
if (consistent == 2 && super->missing) {
@@ -4107,7 +4342,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
mark_missing(dev, &dl->disk, dl->index);
super->updates_pending++;
}
-
+
if (consistent == 2 &&
(!is_resync_complete(&a->info) ||
map_state != IMSM_T_STATE_NORMAL ||
@@ -4134,7 +4369,28 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
super->updates_pending++;
}
- /* FIXME check if we can update curr_migr_unit from resync_start */
+ /* check if we can update curr_migr_unit from resync_start, recovery_start */
+ blocks_per_unit = blocks_per_migr_unit(dev);
+ if (blocks_per_unit && failed <= 1) {
+ __u32 units32;
+ __u64 units;
+
+ if (migr_type(dev) == MIGR_REBUILD)
+ units = min_recovery_start(&a->info) / blocks_per_unit;
+ else
+ units = a->info.resync_start / blocks_per_unit;
+ units32 = units;
+
+ /* check that we did not overflow 32-bits, and that
+ * curr_migr_unit needs updating
+ */
+ if (units32 == units &&
+ __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
+ dprintf("imsm: mark checkpoint (%u)\n", units32);
+ dev->vol.curr_migr_unit = __cpu_to_le32(units32);
+ super->updates_pending++;
+ }
+ }
/* mark dirty / clean */
if (dev->vol.dirty != !consistent) {
diff --git a/util.c b/util.c
index 927a0ee..53c21e3 100644
--- a/util.c
+++ b/util.c
@@ -1210,6 +1210,21 @@ int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
return rv;
}
+unsigned long long min_recovery_start(struct mdinfo *array)
+{
+ /* find the minimum recovery_start in an array for metadata
+ * formats that only record per-array recovery progress instead
+ * of per-device
+ */
+ unsigned long long recovery_start = MaxSector;
+ struct mdinfo *d;
+
+ for (d = array->devs; d; d = d->next)
+ recovery_start = min(recovery_start, d->recovery_start);
+
+ return recovery_start;
+}
+
char *devnum2devname(int num)
{
char name[100];
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
` (11 preceding siblings ...)
2009-12-23 0:00 ` [PATCH 13/13] imsm: add support for checkpointing via 'curr_migr_unit' Dan Williams
@ 2009-12-23 0:13 ` Dan Williams
2009-12-30 2:56 ` Neil Brown
13 siblings, 0 replies; 17+ messages in thread
From: Dan Williams @ 2009-12-23 0:13 UTC (permalink / raw)
To: neilb; +Cc: linux-raid, ed.ciechanowski, marcin.labun
On Tue, Dec 22, 2009 at 4:59 PM, Dan Williams <dan.j.williams@intel.com> wrote:
> Hi Neil,
>
> The latter half of this update consumes the new
> md/dev-XXX/recovery_start attribute for checkpointing and resuming
> resync/rebuild operations. It requires one more kernel patch [1] on top
> of 2.6.33-rc1 to ensure that mdmon sees the recovery state. One caveat
> is that we only get checkpoint events when the sync_action == idle, at
> array shutdown or as a part of 'mdadm --wait-clean --scan'.
>
> The other bits are a collection of fixes to address unrelated issues
> discovered along the way. The result is pushed out to:
>
> git://github.com:djbw/mdadm.git master
That ':' should be a '/'
git://github.com/djbw/mdadm.git master
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes
2009-12-22 23:59 [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
` (12 preceding siblings ...)
2009-12-23 0:13 ` [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes Dan Williams
@ 2009-12-30 2:56 ` Neil Brown
2009-12-30 7:19 ` Luca Berra
13 siblings, 1 reply; 17+ messages in thread
From: Neil Brown @ 2009-12-30 2:56 UTC (permalink / raw)
To: Dan Williams; +Cc: linux-raid, ed.ciechanowski, marcin.labun
On Tue, 22 Dec 2009 16:59:19 -0700
Dan Williams <dan.j.williams@intel.com> wrote:
> Hi Neil,
>
> The latter half of this update consumes the new
> md/dev-XXX/recovery_start attribute for checkpointing and resuming
> resync/rebuild operations. It requires one more kernel patch [1] on top
> of 2.6.33-rc1 to ensure that mdmon sees the recovery state. One caveat
> is that we only get checkpoint events when the sync_action == idle, at
> array shutdown or as a part of 'mdadm --wait-clean --scan'.
>
> The other bits are a collection of fixes to address unrelated issues
> discovered along the way. The result is pushed out to:
>
> git://github.com:djbw/mdadm.git master
>
> Please have a look.
Thanks. Looks good. Applied.
NeilBrown
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes
2009-12-30 2:56 ` Neil Brown
@ 2009-12-30 7:19 ` Luca Berra
2009-12-30 7:57 ` Neil Brown
0 siblings, 1 reply; 17+ messages in thread
From: Luca Berra @ 2009-12-30 7:19 UTC (permalink / raw)
To: linux-raid
On Wed, Dec 30, 2009 at 01:56:50PM +1100, Neil Brown wrote:
>On Tue, 22 Dec 2009 16:59:19 -0700
>Dan Williams <dan.j.williams@intel.com> wrote:
>
>> Hi Neil,
>>
>> The other bits are a collection of fixes to address unrelated issues
>> discovered along the way. The result is pushed out to:
>>
>> git://github.com:djbw/mdadm.git master
>>
>> Please have a look.
>
>Thanks. Looks good. Applied.
>
Hi, Dan, Neil
I had the 'pleasure' of testing this patchset (my box keeps kicking disks
out).
I am pleased to note it works perfectly, thanks a lot Dan.
btw, the patches apply both to 3.0.3 and 3.1.1, Neil do you plan on
issuing a 3.0.4 or is now 3.1 considered the stable branch?
Regards,
L.
--
Luca Berra -- bluca@comedia.it
Communication Media & Services S.r.l.
/"\
\ / ASCII RIBBON CAMPAIGN
X AGAINST HTML MAIL
/ \
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [mdadm PATCH 00/13] rebuild / resync checkpointing and other external metadata fixes
2009-12-30 7:19 ` Luca Berra
@ 2009-12-30 7:57 ` Neil Brown
0 siblings, 0 replies; 17+ messages in thread
From: Neil Brown @ 2009-12-30 7:57 UTC (permalink / raw)
To: Luca Berra; +Cc: linux-raid
On Wed, 30 Dec 2009 08:19:05 +0100
Luca Berra <bluca@comedia.it> wrote:
> On Wed, Dec 30, 2009 at 01:56:50PM +1100, Neil Brown wrote:
> >On Tue, 22 Dec 2009 16:59:19 -0700
> >Dan Williams <dan.j.williams@intel.com> wrote:
> >
> >> Hi Neil,
> >>
> >> The other bits are a collection of fixes to address unrelated issues
> >> discovered along the way. The result is pushed out to:
> >>
> >> git://github.com:djbw/mdadm.git master
> >>
> >> Please have a look.
> >
> >Thanks. Looks good. Applied.
> >
> Hi, Dan, Neil
> I had the 'pleasure' of testing this patchset (my box keeps kicking disks
> out).
> I am pleased to note it works perfectly, thanks a lot Dan.
>
> btw, the patches apply both to 3.0.3 and 3.1.1, Neil do you plan on
> issuing a 3.0.4 or is now 3.1 considered the stable branch?
3.1 is now the 'stable' branch - I don't plan for a 3.0.4.
NeilBrown
^ permalink raw reply [flat|nested] 17+ messages in thread