From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org
Subject: [PATCH 16/18] md: add ->takeover method to support changing the personality managing an array
Date: Thu, 12 Feb 2009 14:10:11 +1100 [thread overview]
Message-ID: <20090212031011.23983.49110.stgit@notabene.brown> (raw)
In-Reply-To: <20090212031009.23983.14496.stgit@notabene.brown>
Implement this for RAID6 to be able to 'takeover' a RAID5 array. The
new RAID6 will use a layout which places Q on the last device, and
that device will be missing.
If there are any available spares, one will immediately have Q
recovered onto it.
Signed-off-by: NeilBrown <neilb@suse.de>
---
drivers/md/md.c | 92 ++++++++++++++++++++++++++++++++++----
drivers/md/raid5.c | 106 +++++++++++++++++++++++++++++++++++++-------
include/linux/raid/md_k.h | 10 ++++
include/linux/raid/raid5.h | 5 ++
4 files changed, 186 insertions(+), 27 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0e0e1ff..bd003d7 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2614,18 +2614,92 @@ level_show(mddev_t *mddev, char *page)
static ssize_t
level_store(mddev_t *mddev, const char *buf, size_t len)
{
+ char level[16];
ssize_t rv = len;
- if (mddev->pers)
+ struct mdk_personality *pers;
+ void *priv;
+
+ if (mddev->pers == NULL) {
+ if (len == 0)
+ return 0;
+ if (len >= sizeof(mddev->clevel))
+ return -ENOSPC;
+ strncpy(mddev->clevel, buf, len);
+ if (mddev->clevel[len-1] == '\n')
+ len--;
+ mddev->clevel[len] = 0;
+ mddev->level = LEVEL_NONE;
+ return rv;
+ }
+
+ /* request to change the personality. Need to ensure:
+ * - array is not engaged in resync/recovery/reshape
+ * - old personality can be suspended
+ * - new personality will access other array.
+ */
+
+ if (mddev->sync_thread || mddev->reshape_position != MaxSector)
return -EBUSY;
- if (len == 0)
- return 0;
- if (len >= sizeof(mddev->clevel))
- return -ENOSPC;
- strncpy(mddev->clevel, buf, len);
- if (mddev->clevel[len-1] == '\n')
+
+ if (!mddev->pers->quiesce) {
+ printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
+ mdname(mddev), mddev->pers->name);
+ return -EINVAL;
+ }
+
+ /* Now find the new personality */
+ if (len == 0 || len >= sizeof(level))
+ return -EINVAL;
+ strncpy(level, buf, len);
+ if (level[len-1] == '\n')
len--;
- mddev->clevel[len] = 0;
- mddev->level = LEVEL_NONE;
+ level[len] = 0;
+
+ request_module("md-%s", level);
+ spin_lock(&pers_lock);
+ pers = find_pers(LEVEL_NONE, level);
+ if (!pers || !try_module_get(pers->owner)) {
+ spin_unlock(&pers_lock);
+ printk(KERN_WARNING "md: personality %s not loaded\n", level);
+ return -EINVAL;
+ }
+ spin_unlock(&pers_lock);
+
+ if (pers == mddev->pers) {
+ /* Nothing to do! */
+ module_put(pers->owner);
+ return rv;
+ }
+ if (!pers->takeover) {
+ module_put(pers->owner);
+ printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
+ mdname(mddev), level);
+ return -EINVAL;
+ }
+
+ priv = pers->takeover(mddev);
+ if (IS_ERR(priv)) {
+ module_put(pers->owner);
+ printk(KERN_WARNING "md: %s: %s would not accept array\n",
+ mdname(mddev), level);
+ return PTR_ERR(priv);
+ }
+
+ /* Looks like we have a winner */
+ mddev_suspend(mddev);
+ mddev->pers->stop(mddev);
+ module_put(mddev->pers->owner);
+ mddev->pers = pers;
+ mddev->private = priv;
+ strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+ mddev->level = pers->level;
+ mddev->new_level = pers->level;
+ mddev->new_layout = mddev->layout;
+ mddev->new_chunk = mddev->chunk_size;
+ pers->run(mddev);
+ mddev_resume(mddev);
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
return rv;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6c33add..89ce65d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -912,8 +912,10 @@ static int grow_stripes(raid5_conf_t *conf, int num)
struct kmem_cache *sc;
int devs = conf->raid_disks;
- sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev));
- sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev));
+ sprintf(conf->cache_name[0],
+ "raid%d-%s", conf->level, mdname(conf->mddev));
+ sprintf(conf->cache_name[1],
+ "raid%d-%s-alt", conf->level, mdname(conf->mddev));
conf->active_name = 0;
sc = kmem_cache_create(conf->cache_name[conf->active_name],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -4149,22 +4151,22 @@ static struct attribute_group raid5_attrs_group = {
.attrs = raid5_attrs,
};
-static raid5_conf_t *setup_conf(mddev_t *mddev)
+static raid5_conf_t *setup_conf(mddev_t *mddev, int raid_disks, int level, int layout)
{
raid5_conf_t *conf;
int raid_disk, memory;
mdk_rdev_t *rdev;
struct disk_info *disk;
- if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) {
+ if (level != 5 && level != 4 && level != 6) {
printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n",
- mdname(mddev), mddev->level);
+ mdname(mddev), level);
return ERR_PTR(-EIO);
}
- if ((mddev->level == 5 && !algorithm_valid_raid5(mddev->layout)) ||
- (mddev->level == 6 && !algorithm_valid_raid6(mddev->layout))) {
+ if ((level == 5 && !algorithm_valid_raid5(layout)) ||
+ (level == 6 && !algorithm_valid_raid6(layout))) {
printk(KERN_ERR "raid5: %s: layout %d not supported\n",
- mdname(mddev), mddev->layout);
+ mdname(mddev), layout);
return ERR_PTR(-EIO);
}
@@ -4180,10 +4182,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
goto abort;
if (mddev->reshape_position == MaxSector) {
- conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks;
+ conf->previous_raid_disks = conf->raid_disks = raid_disks;
} else {
- conf->raid_disks = mddev->raid_disks;
- conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
+ conf->raid_disks = raid_disks;
+ conf->previous_raid_disks = raid_disks - mddev->delta_disks;
}
conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info),
@@ -4196,7 +4198,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
goto abort;
- if (mddev->level == 6) {
+ if (level == 6) {
conf->spare_page = alloc_page(GFP_KERNEL);
if (!conf->spare_page)
goto abort;
@@ -4236,12 +4238,12 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
}
conf->chunk_size = mddev->chunk_size;
- conf->level = mddev->level;
+ conf->level = level;
if (conf->level == 6)
conf->max_degraded = 2;
else
conf->max_degraded = 1;
- conf->algorithm = mddev->layout;
+ conf->algorithm = layout;
conf->max_nr_stripes = NR_STRIPES;
conf->expand_progress = mddev->reshape_position;
@@ -4327,10 +4329,14 @@ static int run(mddev_t *mddev)
/* OK, we should be able to continue; */
}
- conf = setup_conf(mddev);
+ if (mddev->private == NULL)
+ conf = setup_conf(mddev, mddev->raid_disks, mddev->level, mddev->layout);
+ else {
+ conf = mddev->private;
+ mddev->raid_disks = conf->raid_disks;
+ mddev->layout = conf->algorithm;
+ }
- if (conf == NULL)
- return -EIO;
if (IS_ERR(conf))
return PTR_ERR(conf);
@@ -4383,7 +4389,11 @@ static int run(mddev_t *mddev)
}
}
- mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5");
+ if (conf->thread) {
+ mddev->thread = conf->thread;
+ conf->thread = NULL;
+ } else
+ mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5");
if (!mddev->thread) {
printk(KERN_ERR
"raid5: couldn't allocate thread for %s\n",
@@ -4859,6 +4869,65 @@ static void raid5_quiesce(mddev_t *mddev, int state)
}
}
+static struct mdk_personality raid5_personality;
+
+static void *raid6_takeover(mddev_t *mddev)
+{
+ /* Currently can only take over a raid5. We map the
+ * personality to an equivalent raid6 personality
+ * with the Q block at the end.
+ */
+ int new_layout;
+ raid5_conf_t *conf;
+
+ if (mddev->pers != &raid5_personality)
+ return ERR_PTR(-EINVAL);
+ if (mddev->degraded > 1)
+ return ERR_PTR(-EINVAL);
+ if (mddev->raid_disks > 253)
+ return ERR_PTR(-EINVAL);
+ if (mddev->raid_disks < 3)
+ return ERR_PTR(-EINVAL);
+
+ switch(mddev->layout) {
+ case ALGORITHM_LEFT_ASYMMETRIC:
+ new_layout = ALGORITHM_LEFT_ASYMMETRIC_6;
+ break;
+ case ALGORITHM_RIGHT_ASYMMETRIC:
+ new_layout = ALGORITHM_RIGHT_ASYMMETRIC_6;
+ break;
+ case ALGORITHM_LEFT_SYMMETRIC:
+ new_layout = ALGORITHM_LEFT_SYMMETRIC_6;
+ break;
+ case ALGORITHM_RIGHT_SYMMETRIC:
+ new_layout = ALGORITHM_RIGHT_SYMMETRIC_6;
+ break;
+ case ALGORITHM_PARITY_0:
+ new_layout = ALGORITHM_PARITY_0_6;
+ break;
+ case ALGORITHM_PARITY_N:
+ new_layout = ALGORITHM_PARITY_N;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+ conf = setup_conf(mddev, mddev->raid_disks + 1, 6, new_layout);
+ if (IS_ERR(conf))
+ return conf;
+
+ conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
+ if (conf->thread)
+ return conf;
+
+ safe_put_page(conf->spare_page);
+ kfree(conf->disks);
+ kfree(conf->stripe_hashtbl);
+ kfree(conf);
+
+ return ERR_PTR(-ENOMEM);
+}
+
+
static struct mdk_personality raid6_personality =
{
.name = "raid6",
@@ -4879,6 +4948,7 @@ static struct mdk_personality raid6_personality =
.start_reshape = raid5_start_reshape,
#endif
.quiesce = raid5_quiesce,
+ .takeover = raid6_takeover,
};
static struct mdk_personality raid5_personality =
{
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index a815bab..3755045 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -334,6 +334,16 @@ struct mdk_personality
* others - reserved
*/
void (*quiesce) (mddev_t *mddev, int state);
+ /* takeover is used to transition an array from one
+ * personality to another. The new personality must be able
+ * to handle the data in the current layout.
+ * e.g. 2drive raid1 -> 2drive raid5
+ * ndrive raid5 -> degraded n+1drive raid6 with special layout
+ * If the takeover succeeds, a new 'private' structure is returned.
+ * This needs to be installed and then ->quiesce used to activate the
+ * array.
+ */
+ void *(*takeover) (mddev_t *mddev);
};
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 3adda05..4894cd5 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -387,6 +387,11 @@ struct raid5_private_data {
int pool_size; /* number of disks in stripeheads in pool */
spinlock_t device_lock;
struct disk_info *disks;
+
+ /* When taking over an array from a different personality, we store
+ * the new thread here until we fully activate the array.
+ */
+ struct mdk_thread_s *thread;
};
typedef struct raid5_private_data raid5_conf_t;
next prev parent reply other threads:[~2009-02-12 3:10 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-02-12 3:10 [PATCH 00/18] Assorted md patches headed for 2.6.30 NeilBrown
2009-02-12 3:10 ` [PATCH 01/18] md: never clear bit from the write-intent bitmap when the array is degraded NeilBrown
2009-02-12 3:10 ` [PATCH 04/18] md: be more consistent about setting WriteMostly flag when adding a drive to an array NeilBrown
2009-02-12 3:10 ` [PATCH 05/18] md: Make mddev->size sector-based NeilBrown
2009-02-12 3:10 ` [PATCH 06/18] md: Represent raid device size in sectors NeilBrown
2009-02-12 3:10 ` [PATCH 02/18] md: write bitmap information to devices that are undergoing recovery NeilBrown
2009-02-12 3:10 ` [PATCH 08/18] md/raid5: change raid5_compute_sector and stripe_to_pdidx to take a 'previous' argument NeilBrown
2009-02-12 3:10 ` [PATCH 07/18] md/raid5: simplify interface for init_stripe and get_active_stripe NeilBrown
2009-02-12 3:10 ` [PATCH 03/18] md: occasionally checkpoint drive recovery to reduce duplicate effort after a crash NeilBrown
2009-02-12 17:26 ` John Stoffel
2009-02-13 16:20 ` Bill Davidsen
2009-02-13 16:34 ` Jon Nelson
2009-02-12 3:10 ` [PATCH 09/18] md/raid6: remove expectation that Q device is immediately after P device NeilBrown
2009-02-12 16:56 ` Andre Noll
2009-02-13 22:19 ` Dan Williams
2009-02-16 0:08 ` Neil Brown
2009-02-13 16:37 ` Bill Davidsen
2009-02-16 5:15 ` Neil Brown
2009-02-12 3:10 ` [PATCH 14/18] md: md_unregister_thread should cope with being passed NULL NeilBrown
2009-02-12 3:10 ` [PATCH 15/18] md: hopefully enable suspend/resume of md devices NeilBrown
2009-02-12 3:10 ` [PATCH 10/18] md/raid5: simplify raid5_compute_sector interface NeilBrown
2009-02-12 3:10 ` [PATCH 12/18] md/raid5: finish support for DDF/raid6 NeilBrown
2009-02-12 3:10 ` [PATCH 18/18] md/raid5: allow layout/chunksize to be changed on an active2-drive raid5 NeilBrown
2009-02-12 3:10 ` NeilBrown [this message]
2009-02-12 3:10 ` [PATCH 17/18] md: add ->takeover method for raid5 to be able to take over raid1 NeilBrown
2009-02-12 3:10 ` [PATCH 11/18] md/raid5: Add support for new layouts for raid5 and raid6 NeilBrown
2009-02-12 3:10 ` [PATCH 13/18] md/raid5: refactor raid5 "run" NeilBrown
2009-02-12 8:11 ` [PATCH 00/18] Assorted md patches headed for 2.6.30 Keld Jørn Simonsen
2009-02-12 9:13 ` Steve Fairbairn
2009-02-12 9:46 ` Keld Jørn Simonsen
2009-02-12 10:52 ` NeilBrown
2009-02-12 11:16 ` Keld Jørn Simonsen
2009-02-12 10:53 ` Julian Cowley
2009-02-13 16:54 ` Bill Davidsen
2009-02-16 5:35 ` Neil Brown
2009-02-16 17:31 ` Nagilum
2009-02-12 22:57 ` Dan Williams
2009-02-13 16:56 ` Bill Davidsen
2009-02-12 9:21 ` NeilBrown
2009-02-12 9:53 ` Keld Jørn Simonsen
2009-02-12 10:45 ` NeilBrown
2009-02-12 11:11 ` Keld Jørn Simonsen
2009-02-12 15:28 ` Wil Reichert
2009-02-12 17:44 ` Keld Jørn Simonsen
2009-02-12 9:42 ` Farkas Levente
2009-02-12 10:40 ` NeilBrown
2009-02-12 11:17 ` Farkas Levente
2009-02-13 17:02 ` Bill Davidsen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090212031011.23983.49110.stgit@notabene.brown \
--to=neilb@suse.de \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).