From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Subject: [PATCH 002 of 5] md: Allow stripes to be expanded in preparation for expanding an array.
Date: Tue, 17 Jan 2006 17:56:19 +1100 [thread overview]
Message-ID: <1060117065619.27843@suse.de> (raw)
In-Reply-To: 20060117174531.27739.patches@notabene
Before a RAID-5 can be expanded, we need to be able to expand the
stripe-cache data structure.
This requires allocating new stripes in a new kmem_cache.
If this succeeds, we copy cache pages over and release the old
stripes and kmem_cache.
We then allocate new pages. If that fails, we leave the stripe
cache at it's new size. It isn't worth the effort to shink
it back again.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid5.c | 116 +++++++++++++++++++++++++++++++++++++++++--
./drivers/md/raid6main.c | 4 -
./include/linux/raid/raid5.h | 9 ++-
3 files changed, 121 insertions(+), 8 deletions(-)
diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~ 2006-01-17 17:33:09.000000000 +1100
+++ ./drivers/md/raid5.c 2006-01-17 17:33:23.000000000 +1100
@@ -313,14 +313,16 @@ static int grow_stripes(raid5_conf_t *co
kmem_cache_t *sc;
int devs = conf->raid_disks;
- sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev));
-
- sc = kmem_cache_create(conf->cache_name,
+ sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev));
+ sprintf(conf->cache_name[1], "raid5/%s-alt", mdname(conf->mddev));
+ conf->active_name = 0;
+ sc = kmem_cache_create(conf->cache_name[conf->active_name],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
0, 0, NULL, NULL);
if (!sc)
return 1;
conf->slab_cache = sc;
+ conf->pool_size = devs;
while (num--) {
if (!grow_one_stripe(conf))
return 1;
@@ -328,6 +330,112 @@ static int grow_stripes(raid5_conf_t *co
return 0;
}
+static int resize_stripes(raid5_conf_t *conf, int newsize)
+{
+ /* make all the stripes able to hold 'newsize' devices.
+ * New slots in each stripe get 'page' set to a new page.
+ * We allocate all the new stripes first, then if that succeeds,
+ * copy everything across.
+ * Finally we add new pages. This could fail, but we leave
+ * the stripe cache at it's new size, just with some pages empty.
+ */
+ struct stripe_head *osh, *nsh;
+ struct list_head newstripes, oldstripes;
+ struct disk_info *ndisks;
+ int err = 0;
+ kmem_cache_t *sc;
+ int i;
+
+ if (newsize <= conf->pool_size)
+ return 0; /* never bother to shrink */
+
+ sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
+ sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
+ 0, 0, NULL, NULL);
+ if (!sc)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&newstripes);
+ for (i = conf->max_nr_stripes; i; i--) {
+ nsh = kmem_cache_alloc(sc, GFP_KERNEL);
+ if (!nsh)
+ break;
+
+ memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
+
+ nsh->raid_conf = conf;
+ spin_lock_init(&nsh->lock);
+
+ list_add(&nsh->lru, &newstripes);
+ }
+ if (i) {
+ /* didn't get enough, give up */
+ while (!list_empty(&newstripes)) {
+ nsh = list_entry(newstripes.next, struct stripe_head, lru);
+ list_del(&nsh->lru);
+ kmem_cache_free(sc, nsh);
+ }
+ kmem_cache_destroy(sc);
+ return -ENOMEM;
+ }
+ /* OK, we have enough stripes, start collecting inactive
+ * stripes and copying them over
+ */
+ INIT_LIST_HEAD(&oldstripes);
+ list_for_each_entry(nsh, &newstripes, lru) {
+ spin_lock_irq(&conf->device_lock);
+ wait_event_lock_irq(conf->wait_for_stripe,
+ !list_empty(&conf->inactive_list),
+ conf->device_lock,
+ unplug_slaves(conf->mddev);
+ );
+ osh = get_free_stripe(conf);
+ spin_unlock_irq(&conf->device_lock);
+ atomic_set(&nsh->count, 1);
+ for(i=0; i<conf->pool_size; i++)
+ nsh->dev[i].page = osh->dev[i].page;
+ for( ; i<newsize; i++)
+ nsh->dev[i].page = NULL;
+ list_add(&osh->lru, &oldstripes);
+ }
+ /* Got them all.
+ * Return the new ones and free the old ones.
+ * At this point, we are holding all the stripes so the array
+ * is completely stalled, so now is a good time to resize
+ * conf->disks.
+ */
+ ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_KERNEL);
+ if (ndisks) {
+ for (i=0; i<conf->raid_disks; i++)
+ ndisks[i] = conf->disks[i];
+ kfree(conf->disks);
+ conf->disks = ndisks;
+ } else
+ err = -ENOMEM;
+ while(!list_empty(&newstripes)) {
+ nsh = list_entry(newstripes.next, struct stripe_head, lru);
+ list_del_init(&nsh->lru);
+ for (i=conf->raid_disks; i < newsize; i++)
+ if (nsh->dev[i].page == NULL) {
+ struct page *p = alloc_page(GFP_KERNEL);
+ nsh->dev[i].page = p;
+ if (!p)
+ err = -ENOMEM;
+ }
+ release_stripe(nsh);
+ }
+ while(!list_empty(&oldstripes)) {
+ osh = list_entry(oldstripes.next, struct stripe_head, lru);
+ list_del(&osh->lru);
+ kmem_cache_free(conf->slab_cache, osh);
+ }
+ kmem_cache_destroy(conf->slab_cache);
+ conf->slab_cache = sc;
+ conf->active_name = 1-conf->active_name;
+ conf->pool_size = newsize;
+ return err;
+}
+
+
static int drop_one_stripe(raid5_conf_t *conf)
{
struct stripe_head *sh;
@@ -339,7 +447,7 @@ static int drop_one_stripe(raid5_conf_t
return 0;
if (atomic_read(&sh->count))
BUG();
- shrink_buffers(sh, conf->raid_disks);
+ shrink_buffers(sh, conf->pool_size);
kmem_cache_free(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes);
return 1;
diff ./drivers/md/raid6main.c~current~ ./drivers/md/raid6main.c
--- ./drivers/md/raid6main.c~current~ 2006-01-17 17:33:09.000000000 +1100
+++ ./drivers/md/raid6main.c 2006-01-17 17:33:23.000000000 +1100
@@ -308,9 +308,9 @@ static int grow_stripes(raid6_conf_t *co
kmem_cache_t *sc;
int devs = conf->raid_disks;
- sprintf(conf->cache_name, "raid6/%s", mdname(conf->mddev));
+ sprintf(conf->cache_name[0], "raid6/%s", mdname(conf->mddev));
- sc = kmem_cache_create(conf->cache_name,
+ sc = kmem_cache_create(conf->cache_name[0],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
0, 0, NULL, NULL);
if (!sc)
diff ./include/linux/raid/raid5.h~current~ ./include/linux/raid/raid5.h
--- ./include/linux/raid/raid5.h~current~ 2006-01-17 17:33:09.000000000 +1100
+++ ./include/linux/raid/raid5.h 2006-01-17 17:33:23.000000000 +1100
@@ -216,7 +216,11 @@ struct raid5_private_data {
struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */
atomic_t preread_active_stripes; /* stripes with scheduled io */
- char cache_name[20];
+ /* unfortunately we need two cache names as we temporarily have
+ * two caches.
+ */
+ int active_name;
+ char cache_name[2][20];
kmem_cache_t *slab_cache; /* for allocating stripes */
int seq_flush, seq_write;
@@ -238,7 +242,8 @@ struct raid5_private_data {
wait_queue_head_t wait_for_overlap;
int inactive_blocked; /* release of inactive stripes blocked,
* waiting for 25% to be free
- */
+ */
+ int pool_size; /* number of disks in stripeheads in pool */
spinlock_t device_lock;
struct disk_info *disks;
};
next prev parent reply other threads:[~2006-01-17 6:56 UTC|newest]
Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-01-17 6:56 [PATCH 000 of 5] md: Introduction NeilBrown
2006-01-17 6:56 ` [PATCH 001 of 5] md: Split disks array out of raid5 conf structure so it is easier to grow NeilBrown
2006-01-17 14:37 ` John Stoffel
2006-01-19 0:26 ` Neil Brown
2006-01-21 3:37 ` John Stoffel
2006-01-22 22:57 ` Neil Brown
2006-01-17 6:56 ` NeilBrown [this message]
2006-01-17 6:56 ` [PATCH 003 of 5] md: Infrastructure to allow normal IO to continue while array is expanding NeilBrown
2006-01-17 6:56 ` [PATCH 004 of 5] md: Core of raid5 resize process NeilBrown
2006-01-17 6:56 ` [PATCH 005 of 5] md: Final stages of raid5 expand code NeilBrown
2006-01-17 9:55 ` Sander
2006-01-19 0:32 ` Neil Brown
2006-01-17 8:17 ` [PATCH 000 of 5] md: Introduction Michael Tokarev
[not found] ` <fd8d0180601170121s1e6a55b7o@mail.gmail.com>
2006-01-17 9:38 ` Francois Barre
2006-01-19 0:35 ` Neil Brown
2006-01-17 9:50 ` Sander
2006-01-17 11:26 ` Michael Tokarev
2006-01-17 11:37 ` Francois Barre
2006-01-17 14:03 ` Kyle Moffett
2006-01-19 0:28 ` Neil Brown
2006-01-17 16:08 ` Ross Vandegrift
2006-01-17 16:08 ` Ross Vandegrift
2006-01-17 18:12 ` Michael Tokarev
2006-01-17 18:12 ` Michael Tokarev
2006-01-18 8:14 ` Sander
2006-01-18 8:14 ` Sander
2006-01-18 8:37 ` Brad Campbell
2006-01-18 9:03 ` Alan Cox
2006-01-18 12:46 ` John Hendrikx
2006-01-18 12:51 ` Gordon Henderson
2006-01-18 23:51 ` Neil Brown
2006-01-19 7:20 ` PFC
2006-01-19 8:01 ` dean gaudet
2006-01-18 23:54 ` Neil Brown
2006-01-19 0:22 ` Neil Brown
2006-01-19 0:22 ` Neil Brown
2006-01-19 9:01 ` Jakob Oestergaard
2006-01-19 9:01 ` Jakob Oestergaard
2006-01-17 22:38 ` Phillip Susi
2006-01-17 22:57 ` Neil Brown
2006-01-17 14:10 ` Steinar H. Gunderson
2006-01-17 15:07 ` Mr. James W. Laferriere
2006-01-19 0:23 ` Neil Brown
2006-01-22 4:42 ` Adam Kropelin
2006-01-22 22:52 ` Neil Brown
2006-01-23 23:02 ` Adam Kropelin
2006-01-23 23:02 ` Adam Kropelin
2006-01-23 1:08 ` John Hendrikx
2006-01-23 1:25 ` Neil Brown
2006-01-23 1:54 ` Kyle Moffett
2006-01-23 2:09 ` Mr. James W. Laferriere
2006-01-23 2:33 ` Neil Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1060117065619.27843@suse.de \
--to=neilb@suse.de \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=sgunderson@bigfoot.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.