linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org
Subject: [PATCH 16/18] md: add ->takeover method to support changing the personality managing an array
Date: Thu, 12 Feb 2009 14:10:11 +1100	[thread overview]
Message-ID: <20090212031011.23983.49110.stgit@notabene.brown> (raw)
In-Reply-To: <20090212031009.23983.14496.stgit@notabene.brown>

Implement this for RAID6 to be able to 'takeover' a RAID5 array.  The
new RAID6 will use a layout which places Q on the last device, and
that device will be missing.
If there are any available spares, one will immediately have Q
recovered onto it.

Signed-off-by: NeilBrown <neilb@suse.de>
---

 drivers/md/md.c            |   92 ++++++++++++++++++++++++++++++++++----
 drivers/md/raid5.c         |  106 +++++++++++++++++++++++++++++++++++++-------
 include/linux/raid/md_k.h  |   10 ++++
 include/linux/raid/raid5.h |    5 ++
 4 files changed, 186 insertions(+), 27 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0e0e1ff..bd003d7 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2614,18 +2614,92 @@ level_show(mddev_t *mddev, char *page)
 static ssize_t
 level_store(mddev_t *mddev, const char *buf, size_t len)
 {
+	char level[16];
 	ssize_t rv = len;
-	if (mddev->pers)
+	struct mdk_personality *pers;
+	void *priv;
+
+	if (mddev->pers == NULL) {
+		if (len == 0)
+			return 0;
+		if (len >= sizeof(mddev->clevel))
+			return -ENOSPC;
+		strncpy(mddev->clevel, buf, len);
+		if (mddev->clevel[len-1] == '\n')
+			len--;
+		mddev->clevel[len] = 0;
+		mddev->level = LEVEL_NONE;
+		return rv;
+	}
+
+	/* request to change the personality.  Need to ensure:
+	 *  - array is not engaged in resync/recovery/reshape
+	 *  - old personality can be suspended
+	 *  - new personality will access other array.
+	 */
+
+	if (mddev->sync_thread || mddev->reshape_position != MaxSector)
 		return -EBUSY;
-	if (len == 0)
-		return 0;
-	if (len >= sizeof(mddev->clevel))
-		return -ENOSPC;
-	strncpy(mddev->clevel, buf, len);
-	if (mddev->clevel[len-1] == '\n')
+
+	if (!mddev->pers->quiesce) {
+		printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
+		       mdname(mddev), mddev->pers->name);
+		return -EINVAL;
+	}
+
+	/* Now find the new personality */
+	if (len == 0 || len >= sizeof(level))
+		return -EINVAL;
+	strncpy(level, buf, len);
+	if (level[len-1] == '\n')
 		len--;
-	mddev->clevel[len] = 0;
-	mddev->level = LEVEL_NONE;
+	level[len] = 0;
+
+	request_module("md-%s", level);
+	spin_lock(&pers_lock);
+	pers = find_pers(LEVEL_NONE, level);
+	if (!pers || !try_module_get(pers->owner)) {
+		spin_unlock(&pers_lock);
+		printk(KERN_WARNING "md: personality %s not loaded\n", level);
+		return -EINVAL;
+	}
+	spin_unlock(&pers_lock);
+
+	if (pers == mddev->pers) {
+		/* Nothing to do! */
+		module_put(pers->owner);
+		return rv;
+	}
+	if (!pers->takeover) {
+		module_put(pers->owner);
+		printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
+		       mdname(mddev), level);
+		return -EINVAL;
+	}
+
+	priv = pers->takeover(mddev);
+	if (IS_ERR(priv)) {
+		module_put(pers->owner);
+		printk(KERN_WARNING "md: %s: %s would not accept array\n",
+		       mdname(mddev), level);
+		return PTR_ERR(priv);
+	}
+
+	/* Looks like we have a winner */
+	mddev_suspend(mddev);
+	mddev->pers->stop(mddev);
+	module_put(mddev->pers->owner);
+	mddev->pers = pers;
+	mddev->private = priv;
+	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+	mddev->level = pers->level;
+	mddev->new_level = pers->level;
+	mddev->new_layout = mddev->layout;
+	mddev->new_chunk = mddev->chunk_size;
+	pers->run(mddev);
+	mddev_resume(mddev);
+	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+	md_wakeup_thread(mddev->thread);
 	return rv;
 }
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6c33add..89ce65d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -912,8 +912,10 @@ static int grow_stripes(raid5_conf_t *conf, int num)
 	struct kmem_cache *sc;
 	int devs = conf->raid_disks;
 
-	sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev));
-	sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev));
+	sprintf(conf->cache_name[0],
+		"raid%d-%s", conf->level, mdname(conf->mddev));
+	sprintf(conf->cache_name[1],
+		"raid%d-%s-alt", conf->level, mdname(conf->mddev));
 	conf->active_name = 0;
 	sc = kmem_cache_create(conf->cache_name[conf->active_name],
 			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -4149,22 +4151,22 @@ static struct attribute_group raid5_attrs_group = {
 	.attrs = raid5_attrs,
 };
 
-static raid5_conf_t *setup_conf(mddev_t *mddev)
+static raid5_conf_t *setup_conf(mddev_t *mddev, int raid_disks, int level, int layout)
 {
 	raid5_conf_t *conf;
 	int raid_disk, memory;
 	mdk_rdev_t *rdev;
 	struct disk_info *disk;
 
-	if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) {
+	if (level != 5 && level != 4 && level != 6) {
 		printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n",
-		       mdname(mddev), mddev->level);
+		       mdname(mddev), level);
 		return ERR_PTR(-EIO);
 	}
-	if ((mddev->level == 5 && !algorithm_valid_raid5(mddev->layout)) ||
-	    (mddev->level == 6 && !algorithm_valid_raid6(mddev->layout))) {
+	if ((level == 5 && !algorithm_valid_raid5(layout)) ||
+	    (level == 6 && !algorithm_valid_raid6(layout))) {
 		printk(KERN_ERR "raid5: %s: layout %d not supported\n",
-		       mdname(mddev), mddev->layout);
+		       mdname(mddev), layout);
 		return ERR_PTR(-EIO);
 	}
 
@@ -4180,10 +4182,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
 		goto abort;
 
 	if (mddev->reshape_position == MaxSector) {
-		conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks;
+		conf->previous_raid_disks = conf->raid_disks = raid_disks;
 	} else {
-		conf->raid_disks = mddev->raid_disks;
-		conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
+		conf->raid_disks = raid_disks;
+		conf->previous_raid_disks = raid_disks - mddev->delta_disks;
 	}
 
 	conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info),
@@ -4196,7 +4198,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
 	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
 		goto abort;
 
-	if (mddev->level == 6) {
+	if (level == 6) {
 		conf->spare_page = alloc_page(GFP_KERNEL);
 		if (!conf->spare_page)
 			goto abort;
@@ -4236,12 +4238,12 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
 	}
 
 	conf->chunk_size = mddev->chunk_size;
-	conf->level = mddev->level;
+	conf->level = level;
 	if (conf->level == 6)
 		conf->max_degraded = 2;
 	else
 		conf->max_degraded = 1;
-	conf->algorithm = mddev->layout;
+	conf->algorithm = layout;
 	conf->max_nr_stripes = NR_STRIPES;
 	conf->expand_progress = mddev->reshape_position;
 
@@ -4327,10 +4329,14 @@ static int run(mddev_t *mddev)
 		/* OK, we should be able to continue; */
 	}
 
-	conf = setup_conf(mddev);
+	if (mddev->private == NULL)
+		conf = setup_conf(mddev, mddev->raid_disks, mddev->level, mddev->layout);
+	else {
+		conf = mddev->private;
+		mddev->raid_disks = conf->raid_disks;
+		mddev->layout = conf->algorithm;
+	}
 
-	if (conf == NULL)
-		return -EIO;
 	if (IS_ERR(conf))
 		return PTR_ERR(conf);
 
@@ -4383,7 +4389,11 @@ static int run(mddev_t *mddev)
 		}
 	}
 
-	mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5");
+	if (conf->thread) {
+		mddev->thread = conf->thread;
+		conf->thread = NULL;
+	} else
+		mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5");
 	if (!mddev->thread) {
 		printk(KERN_ERR 
 		       "raid5: couldn't allocate thread for %s\n",
@@ -4859,6 +4869,65 @@ static void raid5_quiesce(mddev_t *mddev, int state)
 	}
 }
 
+static struct mdk_personality raid5_personality;
+
+static void *raid6_takeover(mddev_t *mddev)
+{
+	/* Currently can only take over a raid5.  We map the
+	 * personality to an equivalent raid6 personality
+	 * with the Q block at the end.
+	 */
+	int new_layout;
+	raid5_conf_t *conf;
+
+	if (mddev->pers != &raid5_personality)
+		return ERR_PTR(-EINVAL);
+	if (mddev->degraded > 1)
+		return ERR_PTR(-EINVAL);
+	if (mddev->raid_disks > 253)
+		return ERR_PTR(-EINVAL);
+	if (mddev->raid_disks < 3)
+		return ERR_PTR(-EINVAL);
+
+	switch(mddev->layout) {
+	case ALGORITHM_LEFT_ASYMMETRIC:
+		new_layout = ALGORITHM_LEFT_ASYMMETRIC_6;
+		break;
+	case ALGORITHM_RIGHT_ASYMMETRIC:
+		new_layout = ALGORITHM_RIGHT_ASYMMETRIC_6;
+		break;
+	case ALGORITHM_LEFT_SYMMETRIC:
+		new_layout = ALGORITHM_LEFT_SYMMETRIC_6;
+		break;
+	case ALGORITHM_RIGHT_SYMMETRIC:
+		new_layout = ALGORITHM_RIGHT_SYMMETRIC_6;
+		break;
+	case ALGORITHM_PARITY_0:
+		new_layout = ALGORITHM_PARITY_0_6;
+		break;
+	case ALGORITHM_PARITY_N:
+		new_layout = ALGORITHM_PARITY_N;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+	conf = setup_conf(mddev, mddev->raid_disks + 1, 6, new_layout);
+	if (IS_ERR(conf))
+		return conf;
+
+	conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
+	if (conf->thread)
+		return conf;
+
+	safe_put_page(conf->spare_page);
+	kfree(conf->disks);
+	kfree(conf->stripe_hashtbl);
+	kfree(conf);
+
+	return ERR_PTR(-ENOMEM);
+}
+
+
 static struct mdk_personality raid6_personality =
 {
 	.name		= "raid6",
@@ -4879,6 +4948,7 @@ static struct mdk_personality raid6_personality =
 	.start_reshape  = raid5_start_reshape,
 #endif
 	.quiesce	= raid5_quiesce,
+	.takeover	= raid6_takeover,
 };
 static struct mdk_personality raid5_personality =
 {
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index a815bab..3755045 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -334,6 +334,16 @@ struct mdk_personality
 	 * others - reserved
 	 */
 	void (*quiesce) (mddev_t *mddev, int state);
+	/* takeover is used to transition an array from one
+	 * personality to another.  The new personality must be able
+	 * to handle the data in the current layout.
+	 * e.g. 2drive raid1 -> 2drive raid5
+	 *      ndrive raid5 -> degraded n+1drive raid6 with special layout
+	 * If the takeover succeeds, a new 'private' structure is returned.
+	 * This needs to be installed and then ->quiesce used to activate the
+	 * array.
+	 */
+	void *(*takeover) (mddev_t *mddev);
 };
 
 
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 3adda05..4894cd5 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -387,6 +387,11 @@ struct raid5_private_data {
 	int			pool_size; /* number of disks in stripeheads in pool */
 	spinlock_t		device_lock;
 	struct disk_info	*disks;
+
+	/* When taking over an array from a different personality, we store
+	 * the new thread here until we fully activate the array.
+	 */
+	struct mdk_thread_s		*thread;
 };
 
 typedef struct raid5_private_data raid5_conf_t;



  parent reply	other threads:[~2009-02-12  3:10 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-02-12  3:10 [PATCH 00/18] Assorted md patches headed for 2.6.30 NeilBrown
2009-02-12  3:10 ` [PATCH 01/18] md: never clear bit from the write-intent bitmap when the array is degraded NeilBrown
2009-02-12  3:10 ` [PATCH 04/18] md: be more consistent about setting WriteMostly flag when adding a drive to an array NeilBrown
2009-02-12  3:10 ` [PATCH 05/18] md: Make mddev->size sector-based NeilBrown
2009-02-12  3:10 ` [PATCH 06/18] md: Represent raid device size in sectors NeilBrown
2009-02-12  3:10 ` [PATCH 02/18] md: write bitmap information to devices that are undergoing recovery NeilBrown
2009-02-12  3:10 ` [PATCH 08/18] md/raid5: change raid5_compute_sector and stripe_to_pdidx to take a 'previous' argument NeilBrown
2009-02-12  3:10 ` [PATCH 07/18] md/raid5: simplify interface for init_stripe and get_active_stripe NeilBrown
2009-02-12  3:10 ` [PATCH 03/18] md: occasionally checkpoint drive recovery to reduce duplicate effort after a crash NeilBrown
2009-02-12 17:26   ` John Stoffel
2009-02-13 16:20   ` Bill Davidsen
2009-02-13 16:34     ` Jon Nelson
2009-02-12  3:10 ` [PATCH 09/18] md/raid6: remove expectation that Q device is immediately after P device NeilBrown
2009-02-12 16:56   ` Andre Noll
2009-02-13 22:19     ` Dan Williams
2009-02-16  0:08     ` Neil Brown
2009-02-13 16:37   ` Bill Davidsen
2009-02-16  5:15     ` Neil Brown
2009-02-12  3:10 ` [PATCH 14/18] md: md_unregister_thread should cope with being passed NULL NeilBrown
2009-02-12  3:10 ` [PATCH 15/18] md: hopefully enable suspend/resume of md devices NeilBrown
2009-02-12  3:10 ` [PATCH 10/18] md/raid5: simplify raid5_compute_sector interface NeilBrown
2009-02-12  3:10 ` [PATCH 12/18] md/raid5: finish support for DDF/raid6 NeilBrown
2009-02-12  3:10 ` [PATCH 18/18] md/raid5: allow layout/chunksize to be changed on an active2-drive raid5 NeilBrown
2009-02-12  3:10 ` NeilBrown [this message]
2009-02-12  3:10 ` [PATCH 17/18] md: add ->takeover method for raid5 to be able to take over raid1 NeilBrown
2009-02-12  3:10 ` [PATCH 11/18] md/raid5: Add support for new layouts for raid5 and raid6 NeilBrown
2009-02-12  3:10 ` [PATCH 13/18] md/raid5: refactor raid5 "run" NeilBrown
2009-02-12  8:11 ` [PATCH 00/18] Assorted md patches headed for 2.6.30 Keld Jørn Simonsen
2009-02-12  9:13   ` Steve Fairbairn
2009-02-12  9:46     ` Keld Jørn Simonsen
2009-02-12 10:52       ` NeilBrown
2009-02-12 11:16         ` Keld Jørn Simonsen
2009-02-12 10:53       ` Julian Cowley
2009-02-13 16:54         ` Bill Davidsen
2009-02-16  5:35           ` Neil Brown
2009-02-16 17:31             ` Nagilum
2009-02-12 22:57     ` Dan Williams
2009-02-13 16:56     ` Bill Davidsen
2009-02-12  9:21   ` NeilBrown
2009-02-12  9:53     ` Keld Jørn Simonsen
2009-02-12 10:45       ` NeilBrown
2009-02-12 11:11         ` Keld Jørn Simonsen
2009-02-12 15:28         ` Wil Reichert
2009-02-12 17:44           ` Keld Jørn Simonsen
2009-02-12  9:42 ` Farkas Levente
2009-02-12 10:40   ` NeilBrown
2009-02-12 11:17     ` Farkas Levente
2009-02-13 17:02       ` Bill Davidsen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090212031011.23983.49110.stgit@notabene.brown \
    --to=neilb@suse.de \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).