* Subject:[PATCH 005:013]: raid0: refactor create_strip and raid0_run
@ 2009-06-16 21:53 raz ben yehuda
0 siblings, 0 replies; only message in thread
From: raz ben yehuda @ 2009-06-16 21:53 UTC (permalink / raw)
To: linux raid, Neil Brown
split raid0_run and create_strip_zones
have create_strip work with conf structure instead of mddev
have create_strip accept a list of disks instead of mddev->disks
remove illegal disks before moving into create_strip
raid0.c | 292 +++++++++++++++++++++++++++++++++++++---------------------------
1 file changed, 173 insertions(+), 119 deletions(-)
Signed-off-by: razb <raziebe@gmail.com>
---
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 6f87db2..0bb151b 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -84,68 +84,35 @@ static void print_conf(raid0_conf_t *conf, int raid_disks, char *name)
printk(KERN_INFO "**********************************\n\n");
}
-static int create_strip_zones(mddev_t *mddev)
+static void set_queues(struct list_head *disks, struct request_queue *queue)
{
- int i, c, j, err;
+ mdk_rdev_t *rdev1;
+ list_for_each_entry(rdev1, disks, same_set) {
+ blk_queue_stack_limits(queue,
+ rdev1->bdev->bd_disk->queue);
+ /* as we don't honour merge_bvec_fn, we must never risk
+ * violating it, so limit ->max_sector to one PAGE, as
+ * a one page request is never in violation.
+ */
+ if (rdev1->bdev->bd_disk->queue->merge_bvec_fn &&
+ queue_max_sectors(queue) > (PAGE_SIZE>>9))
+ blk_queue_max_sectors(queue, PAGE_SIZE>>9);
+ }
+}
+
+/*
+ * calculate the zones of the array.
+ * we calcuate the size of each zone and its offset.
+*/
+static int calc_zones(raid0_conf_t *conf, struct list_head *disks,
+ int raid_disks)
+{
+ int i, c, j;
sector_t current_start, curr_zone_start, sectors;
- mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
+ mdk_rdev_t *smallest, *rdev1, *rdev, **dev;
struct strip_zone *zone;
int cnt;
char b[BDEVNAME_SIZE];
- raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
-
- if (!conf)
- return -ENOMEM;
- list_for_each_entry(rdev1, &mddev->disks, same_set) {
- printk(KERN_INFO "raid0: looking at %s\n",
- bdevname(rdev1->bdev,b));
- c = 0;
-
- /* round size to chunk_size */
- sectors = rdev1->sectors;
- sector_div(sectors, mddev->chunk_sectors);
- rdev1->sectors = sectors * mddev->chunk_sectors;
-
- list_for_each_entry(rdev2, &mddev->disks, same_set) {
- printk(KERN_INFO "raid0: comparing %s(%llu)",
- bdevname(rdev1->bdev,b),
- (unsigned long long)rdev1->sectors);
- printk(KERN_INFO " with %s(%llu)\n",
- bdevname(rdev2->bdev,b),
- (unsigned long long)rdev2->sectors);
- if (rdev2 == rdev1) {
- printk(KERN_INFO "raid0: END\n");
- break;
- }
- if (rdev2->sectors == rdev1->sectors) {
- /*
- * Not unique, don't count it as a new
- * group
- */
- printk(KERN_INFO "raid0: EQUAL\n");
- c = 1;
- break;
- }
- printk(KERN_INFO "raid0: NOT EQUAL\n");
- }
- if (!c) {
- printk(KERN_INFO "raid0: ==> UNIQUE\n");
- conf->nr_strip_zones++;
- printk(KERN_INFO "raid0: %d zones\n",
- conf->nr_strip_zones);
- }
- }
- printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones);
- err = -ENOMEM;
- conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
- conf->nr_strip_zones, GFP_KERNEL);
- if (!conf->strip_zone)
- goto abort;
- conf->devlist = kzalloc(sizeof(mdk_rdev_t*)*
- conf->nr_strip_zones*mddev->raid_disks,
- GFP_KERNEL);
- if (!conf->devlist)
- goto abort;
/* The first zone must contain all devices, so here we check that
* there is a proper alignment of slots to devices and find them all
@@ -154,41 +121,30 @@ static int create_strip_zones(mddev_t *mddev)
cnt = 0;
smallest = NULL;
dev = conf->devlist;
- err = -EINVAL;
- list_for_each_entry(rdev1, &mddev->disks, same_set) {
+
+ list_for_each_entry(rdev1, disks, same_set) {
int j = rdev1->raid_disk;
- if (j < 0 || j >= mddev->raid_disks) {
- printk(KERN_ERR "raid0: bad disk number %d - "
- "aborting!\n", j);
- goto abort;
+ if (j < 0 || j >= raid_disks) {
+ printk(KERN_INFO "raid0: %s bad disk number id=%d"
+ " aborting!\n",
+ bdevname(rdev1->bdev, b), j);
}
if (dev[j]) {
- printk(KERN_ERR "raid0: multiple devices for %d - "
- "aborting!\n", j);
- goto abort;
+ printk(KERN_ERR "raid0: multiple devices for %d/%d - "
+ "aborting!\n", j, raid_disks);
+ return -1;
}
dev[j] = rdev1;
- blk_queue_stack_limits(mddev->queue,
- rdev1->bdev->bd_disk->queue);
- /* as we don't honour merge_bvec_fn, we must never risk
- * violating it, so limit ->max_sector to one PAGE, as
- * a one page request is never in violation.
- */
-
- if (rdev1->bdev->bd_disk->queue->merge_bvec_fn &&
- queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
- blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
-
if (!smallest || (rdev1->sectors < smallest->sectors))
smallest = rdev1;
cnt++;
}
- if (cnt != mddev->raid_disks) {
+ if (cnt < raid_disks) {
printk(KERN_ERR "raid0: too few disks (%d of %d) - "
- "aborting!\n", cnt, mddev->raid_disks);
- goto abort;
+ "aborting!\n", cnt, raid_disks);
+ return -1;
}
zone->nb_dev = cnt;
zone->zone_end = smallest->sectors * cnt;
@@ -200,7 +156,7 @@ static int create_strip_zones(mddev_t *mddev)
for (i = 1; i < conf->nr_strip_zones; i++)
{
zone = conf->strip_zone + i;
- dev = conf->devlist + i * mddev->raid_disks;
+ dev = conf->devlist + i * raid_disks;
printk(KERN_INFO "raid0: zone %d\n", i);
zone->dev_start = current_start;
@@ -225,7 +181,6 @@ static int create_strip_zones(mddev_t *mddev)
(unsigned long long)rdev->sectors);
}
}
-
zone->nb_dev = c;
sectors = (smallest->sectors - current_start) * c;
printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
@@ -238,29 +193,74 @@ static int create_strip_zones(mddev_t *mddev)
printk(KERN_INFO "raid0: current zone start: %llu\n",
(unsigned long long)current_start);
}
- mddev->queue->unplug_fn = raid0_unplug;
- mddev->queue->backing_dev_info.congested_fn = raid0_congested;
- mddev->queue->backing_dev_info.congested_data = mddev;
+ return 0;
+}
- /*
- * now since we have the hard sector sizes, we can make sure
- * chunk size is a multiple of that sector size
- */
- if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
- printk(KERN_ERR "%s chunk_size of %d not valid\n",
- mdname(mddev),
- mddev->chunk_sectors << 9);
- goto abort;
+/*
+ * create a strip for each zone.
+*/
+static int create_strip_zones(raid0_conf_t *conf, struct list_head *disks,
+ int chunk_sectors, int raid_disks)
+{
+ int c;
+ mdk_rdev_t *rdev1, *rdev2;
+ char b[BDEVNAME_SIZE];
+ sector_t sectors;
+
+ list_for_each_entry(rdev1, disks, same_set) {
+ printk(KERN_INFO "raid0: looking at %s\n",
+ bdevname(rdev1->bdev, b));
+ c = 0;
+ /* round size to chunk_size */
+ sectors = rdev1->sectors;
+ sector_div(sectors, chunk_sectors);
+ rdev1->sectors = sectors*chunk_sectors;
+
+ list_for_each_entry(rdev2, disks, same_set) {
+ printk(KERN_INFO "raid0: comparing %s(%llu)",
+ bdevname(rdev1->bdev, b),
+ (unsigned long long)rdev1->sectors);
+ printk(KERN_INFO " with %s(%llu)\n",
+ bdevname(rdev2->bdev, b),
+ (unsigned long long)rdev2->sectors);
+ if (rdev2 == rdev1) {
+ printk(KERN_INFO "raid0: END\n");
+ break;
+ }
+ if (rdev2->sectors == rdev1->sectors) {
+ /*
+ * Not unique, don't count it as a new
+ * group
+ */
+ printk(KERN_INFO "raid0: EQUAL\n");
+ c = 1;
+ break;
+ }
+ printk(KERN_INFO "raid0: NOT EQUAL\n");
+ }
+ if (!c) {
+ printk(KERN_INFO "raid0: ==> UNIQUE\n");
+ conf->nr_strip_zones++;
+ printk(KERN_INFO "raid0: %d zones\n",
+ conf->nr_strip_zones);
+ }
}
- printk(KERN_INFO "raid0: done.\n");
- mddev->private = conf;
+ printk(KERN_INFO "raid0: FINAL %d zones\n", conf->nr_strip_zones);
+ conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
+ conf->nr_strip_zones, GFP_KERNEL);
+ if (!conf->strip_zone)
+ goto abort;
+ conf->devlist = kzalloc(sizeof(mdk_rdev_t *)*
+ conf->nr_strip_zones*raid_disks,
+ GFP_KERNEL);
+ if (!conf->devlist)
+ goto abort;
return 0;
abort:
kfree(conf->strip_zone);
kfree(conf->devlist);
kfree(conf);
- mddev->private = NULL;
- return err;
+ return -1;
}
/**
@@ -311,9 +311,46 @@ static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
return array_sectors;
}
+/* calculate the max read-ahead size.
+ * For read-ahead of large files to be effective, we need to
+ * readahead at least twice a whole stripe. i.e. number of devices
+ * multiplied by chunk size times 2.
+ * If an individual device has an ra_pages greater than the
+ * chunk size, then we will not drive that device as hard as it
+ * wants. We consider this a configuration error: a larger
+ * chunksize should be used in that case.
+ */
+static void set_readahead(mddev_t *mddev)
+{
+ int stripe = mddev->raid_disks *
+ (mddev->chunk_sectors << 9) / PAGE_SIZE;
+ if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
+ mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
+}
+
+/*
+ * now since we have the hard sector sizes, we can make sure
+ * chunk size is a multiple of that sector size
+ */
+static int validate_chunk_alignment(mddev_t *mddev)
+{
+ if ((mddev->chunk_sectors << 9) %
+ queue_logical_block_size(mddev->queue)) {
+ printk(KERN_ERR
+ "%s chunk_size of %d not valid\n",
+ mdname(mddev),
+ mddev->chunk_sectors << 9);
+ return -1;
+ }
+ return 0;
+}
+
static int raid0_run(mddev_t *mddev)
{
- int ret;
+ int ret = -ENOMEM;
+ raid0_conf_t *conf;
+ mdk_rdev_t *rdev1, *rdev2;
+ LIST_HEAD(new_disks);
if (mddev->chunk_sectors == 0) {
printk(KERN_ERR "md/raid0: chunk size must be set.\n");
@@ -321,35 +358,52 @@ static int raid0_run(mddev_t *mddev)
}
blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors);
mddev->queue->queue_lock = &mddev->queue->__queue_lock;
+ /*
+ * in the case of assemble of an interrupted reshape,
+ * we remove temporarily any new disk from list.
+ */
+ list_for_each_entry_safe(rdev1, rdev2, &mddev->disks, same_set) {
+ if (rdev1->raid_disk < 0) {
+ list_del(&rdev1->same_set);
+ list_add_tail(&rdev1->same_set, &new_disks);
+ }
+ }
+ conf = kzalloc(sizeof(*conf), GFP_KERNEL);
+ if (!conf)
+ goto abort;
+ mddev->private = conf;
- ret = create_strip_zones(mddev);
- if (ret < 0)
- return ret;
-
+ if (create_strip_zones(conf, &mddev->disks,
+ mddev->chunk_sectors, mddev->raid_disks))
+ goto abort;
+ ret = -EINVAL;
+ if (calc_zones(mddev->private, &mddev->disks, mddev->raid_disks))
+ goto abort;
+ set_queues(&mddev->disks, mddev->queue);
+ mddev->queue->unplug_fn = raid0_unplug;
+ mddev->queue->backing_dev_info.congested_fn = raid0_congested;
+ mddev->queue->backing_dev_info.congested_data = mddev;
+ if (validate_chunk_alignment(mddev))
+ goto abort;
/* calculate array device size */
md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
(unsigned long long)mddev->array_sectors);
- /* calculate the max read-ahead size.
- * For read-ahead of large files to be effective, we need to
- * readahead at least twice a whole stripe. i.e. number of devices
- * multiplied by chunk size times 2.
- * If an individual device has an ra_pages greater than the
- * chunk size, then we will not drive that device as hard as it
- * wants. We consider this a configuration error: a larger
- * chunksize should be used in that case.
- */
- {
- int stripe = mddev->raid_disks *
- (mddev->chunk_sectors << 9) / PAGE_SIZE;
- if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
- mddev->queue->backing_dev_info.ra_pages = 2* stripe;
- }
-
+ set_readahead(mddev);
blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
print_conf(mddev->private, mddev->raid_disks, mdname(mddev));
+ list_splice(&new_disks, &mddev->disks);
return 0;
+abort:
+ {
+ raid0_conf_t *conf = mddev->private;
+ kfree(conf->strip_zone);
+ kfree(conf->devlist);
+ kfree(conf);
+ }
+ list_splice(&new_disks, &mddev->disks);
+ return ret;
}
static int raid0_stop(mddev_t *mddev)
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2009-06-16 21:53 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-06-16 21:53 Subject:[PATCH 005:013]: raid0: refactor create_strip and raid0_run raz ben yehuda
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).