From: NeilBrown <neilb@suse.de>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 005 of 13] md: Allow stripes to be expanded in preparation for expanding an array.
Date: Fri, 17 Mar 2006 15:47:45 +1100 [thread overview]
Message-ID: <1060317044745.16072@suse.de> (raw)
In-Reply-To: 20060317154017.15880.patches@notabene
Before a RAID-5 can be expanded, we need to be able to expand the
stripe-cache data structure.
This requires allocating new stripes in a new kmem_cache.
If this succeeds, we copy cache pages over and release the old
stripes and kmem_cache.
We then allocate new pages. If that fails, we leave the stripe
cache at it's new size. It isn't worth the effort to shrink
it back again.
Unfortuanately this means we need two kmem_cache names as we, for a
short period of time, we have two kmem_caches. So they are
raid5/%s and raid5/%s-alt
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid5.c | 118 +++++++++++++++++++++++++++++++++++++++++--
./drivers/md/raid6main.c | 4 -
./include/linux/raid/raid5.h | 9 ++-
3 files changed, 123 insertions(+), 8 deletions(-)
diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~ 2006-03-17 11:48:55.000000000 +1100
+++ ./drivers/md/raid5.c 2006-03-17 11:48:56.000000000 +1100
@@ -313,20 +313,130 @@ static int grow_stripes(raid5_conf_t *co
kmem_cache_t *sc;
int devs = conf->raid_disks;
- sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev));
-
- sc = kmem_cache_create(conf->cache_name,
+ sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev));
+ sprintf(conf->cache_name[1], "raid5/%s-alt", mdname(conf->mddev));
+ conf->active_name = 0;
+ sc = kmem_cache_create(conf->cache_name[conf->active_name],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
0, 0, NULL, NULL);
if (!sc)
return 1;
conf->slab_cache = sc;
+ conf->pool_size = devs;
while (num--) {
if (!grow_one_stripe(conf))
return 1;
}
return 0;
}
+static int resize_stripes(raid5_conf_t *conf, int newsize)
+{
+ /* make all the stripes able to hold 'newsize' devices.
+ * New slots in each stripe get 'page' set to a new page.
+ * We allocate all the new stripes first, then if that succeeds,
+ * copy everything across.
+ * Finally we add new pages. This could fail, but we leave
+ * the stripe cache at it's new size, just with some pages empty.
+ *
+ * We use GFP_NOIO allocations as IO to the raid5 is blocked
+ * at some points in this operation.
+ */
+ struct stripe_head *osh, *nsh;
+ struct list_head newstripes, oldstripes;
+ struct disk_info *ndisks;
+ int err = 0;
+ kmem_cache_t *sc;
+ int i;
+
+ if (newsize <= conf->pool_size)
+ return 0; /* never bother to shrink */
+
+ sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
+ sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
+ 0, 0, NULL, NULL);
+ if (!sc)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&newstripes);
+ for (i = conf->max_nr_stripes; i; i--) {
+ nsh = kmem_cache_alloc(sc, GFP_NOIO);
+ if (!nsh)
+ break;
+
+ memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
+
+ nsh->raid_conf = conf;
+ spin_lock_init(&nsh->lock);
+
+ list_add(&nsh->lru, &newstripes);
+ }
+ if (i) {
+ /* didn't get enough, give up */
+ while (!list_empty(&newstripes)) {
+ nsh = list_entry(newstripes.next, struct stripe_head, lru);
+ list_del(&nsh->lru);
+ kmem_cache_free(sc, nsh);
+ }
+ kmem_cache_destroy(sc);
+ return -ENOMEM;
+ }
+ /* OK, we have enough stripes, start collecting inactive
+ * stripes and copying them over
+ */
+ INIT_LIST_HEAD(&oldstripes);
+ list_for_each_entry(nsh, &newstripes, lru) {
+ spin_lock_irq(&conf->device_lock);
+ wait_event_lock_irq(conf->wait_for_stripe,
+ !list_empty(&conf->inactive_list),
+ conf->device_lock,
+ unplug_slaves(conf->mddev);
+ );
+ osh = get_free_stripe(conf);
+ spin_unlock_irq(&conf->device_lock);
+ atomic_set(&nsh->count, 1);
+ for(i=0; i<conf->pool_size; i++)
+ nsh->dev[i].page = osh->dev[i].page;
+ for( ; i<newsize; i++)
+ nsh->dev[i].page = NULL;
+ list_add(&osh->lru, &oldstripes);
+ }
+ /* Got them all.
+ * Return the new ones and free the old ones.
+ * At this point, we are holding all the stripes so the array
+ * is completely stalled, so now is a good time to resize
+ * conf->disks.
+ */
+ ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
+ if (ndisks) {
+ for (i=0; i<conf->raid_disks; i++)
+ ndisks[i] = conf->disks[i];
+ kfree(conf->disks);
+ conf->disks = ndisks;
+ } else
+ err = -ENOMEM;
+ while(!list_empty(&newstripes)) {
+ nsh = list_entry(newstripes.next, struct stripe_head, lru);
+ list_del_init(&nsh->lru);
+ for (i=conf->raid_disks; i < newsize; i++)
+ if (nsh->dev[i].page == NULL) {
+ struct page *p = alloc_page(GFP_NOIO);
+ nsh->dev[i].page = p;
+ if (!p)
+ err = -ENOMEM;
+ }
+ release_stripe(nsh);
+ }
+ while(!list_empty(&oldstripes)) {
+ osh = list_entry(oldstripes.next, struct stripe_head, lru);
+ list_del(&osh->lru);
+ kmem_cache_free(conf->slab_cache, osh);
+ }
+ kmem_cache_destroy(conf->slab_cache);
+ conf->slab_cache = sc;
+ conf->active_name = 1-conf->active_name;
+ conf->pool_size = newsize;
+ return err;
+}
+
static int drop_one_stripe(raid5_conf_t *conf)
{
@@ -339,7 +449,7 @@ static int drop_one_stripe(raid5_conf_t
return 0;
if (atomic_read(&sh->count))
BUG();
- shrink_buffers(sh, conf->raid_disks);
+ shrink_buffers(sh, conf->pool_size);
kmem_cache_free(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes);
return 1;
diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
--- ./drivers/md/raid6main.c~current~ 2006-03-17 11:48:55.000000000 +1100
+++ ./drivers/md/raid6main.c 2006-03-17 11:48:56.000000000 +1100
@@ -331,9 +331,9 @@ static int grow_stripes(raid6_conf_t *co
kmem_cache_t *sc;
int devs = conf->raid_disks;
- sprintf(conf->cache_name, "raid6/%s", mdname(conf->mddev));
+ sprintf(conf->cache_name[0], "raid6/%s", mdname(conf->mddev));
- sc = kmem_cache_create(conf->cache_name,
+ sc = kmem_cache_create(conf->cache_name[0],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
0, 0, NULL, NULL);
if (!sc)
diff ./include/linux/raid/raid5.h~current~ ./include/linux/raid/raid5.h
--- ./include/linux/raid/raid5.h~current~ 2006-03-17 11:48:55.000000000 +1100
+++ ./include/linux/raid/raid5.h 2006-03-17 11:48:56.000000000 +1100
@@ -216,7 +216,11 @@ struct raid5_private_data {
struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */
atomic_t preread_active_stripes; /* stripes with scheduled io */
- char cache_name[20];
+ /* unfortunately we need two cache names as we temporarily have
+ * two caches.
+ */
+ int active_name;
+ char cache_name[2][20];
kmem_cache_t *slab_cache; /* for allocating stripes */
int seq_flush, seq_write;
@@ -238,7 +242,8 @@ struct raid5_private_data {
wait_queue_head_t wait_for_overlap;
int inactive_blocked; /* release of inactive stripes blocked,
* waiting for 25% to be free
- */
+ */
+ int pool_size; /* number of disks in stripeheads in pool */
spinlock_t device_lock;
struct disk_info *disks;
};
next prev parent reply other threads:[~2006-03-17 4:47 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-03-17 4:47 [PATCH 000 of 13] md: Introduction NeilBrown
2006-03-17 4:47 ` [PATCH 001 of 13] md: Add '4' to the list of levels for which bitmaps are supported NeilBrown
2006-03-17 4:47 ` [PATCH 002 of 13] md: Fix the 'failed' count for version-0 superblocks NeilBrown
2006-03-17 4:47 ` [PATCH 003 of 13] md: Update status_resync to handle LARGE devices NeilBrown
2006-03-17 4:47 ` [PATCH 004 of 13] md: Split disks array out of raid5 conf structure so it is easier to grow NeilBrown
2006-03-17 4:47 ` NeilBrown [this message]
2006-03-17 5:50 ` [PATCH 005 of 13] md: Allow stripes to be expanded in preparation for expanding an array Andrew Morton
2006-03-17 6:04 ` Neil Brown
2006-03-17 5:53 ` Andrew Morton
2006-03-17 5:57 ` Andrew Morton
2006-03-17 6:24 ` Neil Brown
2006-03-17 4:47 ` [PATCH 006 of 13] md: Infrastructure to allow normal IO to continue while array is expanding NeilBrown
2006-03-17 6:01 ` Andrew Morton
2006-03-17 6:17 ` Neil Brown
2006-03-17 4:47 ` [PATCH 007 of 13] md: Core of raid5 resize process NeilBrown
2006-03-17 6:03 ` Andrew Morton
2006-03-17 7:10 ` Neil Brown
2006-03-17 4:48 ` [PATCH 008 of 13] md: Final stages of raid5 expand code NeilBrown
2006-03-17 4:48 ` [PATCH 009 of 13] md: Checkpoint and allow restart of raid5 reshape NeilBrown
2006-03-17 4:48 ` [PATCH 010 of 13] md: Only checkpoint expansion progress occasionally NeilBrown
2006-03-17 6:17 ` Andrew Morton
2006-03-17 4:48 ` [PATCH 011 of 13] md: Split reshape handler in check_reshape and start_reshape NeilBrown
2006-03-17 4:48 ` [PATCH 012 of 13] md: Make 'reshape' a possible sync_action action NeilBrown
2006-03-17 4:48 ` [PATCH 013 of 13] md: Support suspending of IO to regions of an md array NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1060317044745.16072@suse.de \
--to=neilb@suse.de \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).