All of lore.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 002 of 7] md: Allow stripes to be expanded in preparation for expanding an array.
Date: Tue, 24 Jan 2006 11:40:56 +1100	[thread overview]
Message-ID: <1060124004056.5027@suse.de> (raw)
In-Reply-To: 20060124112626.4447.patches@notabene


Before a RAID-5 can be expanded, we need to be able to expand the
stripe-cache data structure.  
This requires allocating new stripes in a new kmem_cache.
If this succeeds, we copy cache pages over and release the old
stripes and kmem_cache.
We then allocate new pages.  If that fails, we leave the stripe
cache at it's new size.  It isn't worth the effort to shink 
it back again.

Signed-off-by: Neil Brown <neilb@suse.de>

### Diffstat output
 ./drivers/md/raid5.c         |  118 +++++++++++++++++++++++++++++++++++++++++--
 ./drivers/md/raid6main.c     |    4 -
 ./include/linux/raid/raid5.h |    9 ++-
 3 files changed, 123 insertions(+), 8 deletions(-)

diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~	2006-01-24 10:18:31.000000000 +1100
+++ ./drivers/md/raid5.c	2006-01-24 10:20:33.000000000 +1100
@@ -313,20 +313,130 @@ static int grow_stripes(raid5_conf_t *co
 	kmem_cache_t *sc;
 	int devs = conf->raid_disks;
 
-	sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev));
-
-	sc = kmem_cache_create(conf->cache_name, 
+	sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev));
+	sprintf(conf->cache_name[1], "raid5/%s-alt", mdname(conf->mddev));
+	conf->active_name = 0;
+	sc = kmem_cache_create(conf->cache_name[conf->active_name],
 			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
 			       0, 0, NULL, NULL);
 	if (!sc)
 		return 1;
 	conf->slab_cache = sc;
+	conf->pool_size = devs;
 	while (num--) {
 		if (!grow_one_stripe(conf))
 			return 1;
 	}
 	return 0;
 }
+static int resize_stripes(raid5_conf_t *conf, int newsize)
+{
+	/* make all the stripes able to hold 'newsize' devices.
+	 * New slots in each stripe get 'page' set to a new page.
+	 * We allocate all the new stripes first, then if that succeeds,
+	 * copy everything across.
+	 * Finally we add new pages.  This could fail, but we leave
+	 * the stripe cache at it's new size, just with some pages empty.
+	 *
+	 * We use GFP_NOIO allocations as IO to the raid5 is blocked
+	 * at some points in this operation.
+	 */
+	struct stripe_head *osh, *nsh;
+	struct list_head newstripes, oldstripes;
+	struct disk_info *ndisks;
+	int err = 0;
+	kmem_cache_t *sc;
+	int i;
+
+	if (newsize <= conf->pool_size)
+		return 0; /* never bother to shrink */
+
+	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
+			       sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
+			       0, 0, NULL, NULL);
+	if (!sc)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&newstripes);
+	for (i = conf->max_nr_stripes; i; i--) {
+		nsh = kmem_cache_alloc(sc, GFP_NOIO);
+		if (!nsh)
+			break;
+
+		memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
+
+		nsh->raid_conf = conf;
+		spin_lock_init(&nsh->lock);
+
+		list_add(&nsh->lru, &newstripes);
+	}
+	if (i) {
+		/* didn't get enough, give up */
+		while (!list_empty(&newstripes)) {
+			nsh = list_entry(newstripes.next, struct stripe_head, lru);
+			list_del(&nsh->lru);
+			kmem_cache_free(sc, nsh);
+		}
+		kmem_cache_destroy(sc);
+		return -ENOMEM;
+	}
+	/* OK, we have enough stripes, start collecting inactive
+	 * stripes and copying them over
+	 */
+	INIT_LIST_HEAD(&oldstripes);
+	list_for_each_entry(nsh, &newstripes, lru) {
+		spin_lock_irq(&conf->device_lock);
+		wait_event_lock_irq(conf->wait_for_stripe,
+				    !list_empty(&conf->inactive_list),
+				    conf->device_lock,
+				    unplug_slaves(conf->mddev);
+			);
+		osh = get_free_stripe(conf);
+		spin_unlock_irq(&conf->device_lock);
+		atomic_set(&nsh->count, 1);
+		for(i=0; i<conf->pool_size; i++)
+			nsh->dev[i].page = osh->dev[i].page;
+		for( ; i<newsize; i++)
+			nsh->dev[i].page = NULL;
+		list_add(&osh->lru, &oldstripes);
+	}
+	/* Got them all.
+	 * Return the new ones and free the old ones.
+	 * At this point, we are holding all the stripes so the array
+	 * is completely stalled, so now is a good time to resize
+	 * conf->disks.
+	 */
+	ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
+	if (ndisks) {
+		for (i=0; i<conf->raid_disks; i++)
+			ndisks[i] = conf->disks[i];
+		kfree(conf->disks);
+		conf->disks = ndisks;
+	} else
+		err = -ENOMEM;
+	while(!list_empty(&newstripes)) {
+		nsh = list_entry(newstripes.next, struct stripe_head, lru);
+		list_del_init(&nsh->lru);
+		for (i=conf->raid_disks; i < newsize; i++)
+			if (nsh->dev[i].page == NULL) {
+				struct page *p = alloc_page(GFP_NOIO);
+				nsh->dev[i].page = p;
+				if (!p)
+					err = -ENOMEM;
+			}
+		release_stripe(nsh);
+	}
+	while(!list_empty(&oldstripes)) {
+		osh = list_entry(oldstripes.next, struct stripe_head, lru);
+		list_del(&osh->lru);
+		kmem_cache_free(conf->slab_cache, osh);
+	}
+	kmem_cache_destroy(conf->slab_cache);
+	conf->slab_cache = sc;
+	conf->active_name = 1-conf->active_name;
+	conf->pool_size = newsize;
+	return err;
+}
+
 
 static int drop_one_stripe(raid5_conf_t *conf)
 {
@@ -339,7 +449,7 @@ static int drop_one_stripe(raid5_conf_t 
 		return 0;
 	if (atomic_read(&sh->count))
 		BUG();
-	shrink_buffers(sh, conf->raid_disks);
+	shrink_buffers(sh, conf->pool_size);
 	kmem_cache_free(conf->slab_cache, sh);
 	atomic_dec(&conf->active_stripes);
 	return 1;

diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
--- ./drivers/md/raid6main.c~current~	2006-01-24 10:18:31.000000000 +1100
+++ ./drivers/md/raid6main.c	2006-01-24 10:18:50.000000000 +1100
@@ -308,9 +308,9 @@ static int grow_stripes(raid6_conf_t *co
 	kmem_cache_t *sc;
 	int devs = conf->raid_disks;
 
-	sprintf(conf->cache_name, "raid6/%s", mdname(conf->mddev));
+	sprintf(conf->cache_name[0], "raid6/%s", mdname(conf->mddev));
 
-	sc = kmem_cache_create(conf->cache_name,
+	sc = kmem_cache_create(conf->cache_name[0],
 			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
 			       0, 0, NULL, NULL);
 	if (!sc)

diff ./include/linux/raid/raid5.h~current~ ./include/linux/raid/raid5.h
--- ./include/linux/raid/raid5.h~current~	2006-01-24 10:18:31.000000000 +1100
+++ ./include/linux/raid/raid5.h	2006-01-24 10:18:50.000000000 +1100
@@ -216,7 +216,11 @@ struct raid5_private_data {
 	struct list_head	bitmap_list; /* stripes delaying awaiting bitmap update */
 	atomic_t		preread_active_stripes; /* stripes with scheduled io */
 
-	char			cache_name[20];
+	/* unfortunately we need two cache names as we temporarily have
+	 * two caches.
+	 */
+	int			active_name;
+	char			cache_name[2][20];
 	kmem_cache_t		*slab_cache; /* for allocating stripes */
 
 	int			seq_flush, seq_write;
@@ -238,7 +242,8 @@ struct raid5_private_data {
 	wait_queue_head_t	wait_for_overlap;
 	int			inactive_blocked;	/* release of inactive stripes blocked,
 							 * waiting for 25% to be free
-							 */        
+							 */
+	int			pool_size; /* number of disks in stripeheads in pool */
 	spinlock_t		device_lock;
 	struct disk_info	*disks;
 };

  parent reply	other threads:[~2006-01-24  0:40 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-01-24  0:40 [PATCH 000 of 7] md: Introduction - raid5 reshape mark-2 NeilBrown
2006-01-24  0:40 ` [PATCH 001 of 7] md: Split disks array out of raid5 conf structure so it is easier to grow NeilBrown
2006-01-24  0:40 ` NeilBrown [this message]
2006-01-24  0:41 ` [PATCH 003 of 7] md: Infrastructure to allow normal IO to continue while array is expanding NeilBrown
2006-01-24  0:41 ` [PATCH 004 of 7] md: Core of raid5 resize process NeilBrown
2006-01-24  0:41 ` [PATCH 005 of 7] md: Final stages of raid5 expand code NeilBrown
2006-01-24  0:41 ` [PATCH 006 of 7] md: Checkpoint and allow restart of raid5 reshape NeilBrown
2006-01-27 12:37   ` Molle Bestefich
2006-01-24  0:41 ` [PATCH 007 of 7] md: Only checkpoint expansion progress occasionally NeilBrown
2006-01-24  9:23 ` [PATCH 000 of 7] md: Introduction - raid5 reshape mark-2 Lars Marowsky-Bree
2006-01-24  9:23   ` Lars Marowsky-Bree
2006-01-24  9:32   ` Neil Brown
2006-02-07 17:13 ` Henrik Holst
2006-02-09  3:32   ` Neil Brown
2006-02-09  6:35     ` Kernels and MD versions (was: md: Introduction - raid5 reshape mark-2) Patrik Jonsson
2006-02-09 18:07       ` Mr. James W. Laferriere

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1060124004056.5027@suse.de \
    --to=neilb@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.