From: NeilBrown <neilb@suse.de>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 009 of 11] md: Support stripe/offset mode in raid10
Date: Mon, 1 May 2006 15:30:54 +1000 [thread overview]
Message-ID: <1060501053054.23009@suse.de> (raw)
In-Reply-To: 20060501152229.18367.patches@notabene
The "industry standard" DDF format allows for a stripe/offset layout
where data is duplicated on different stripes. e.g.
A B C D
D A B C
E F G H
H E F G
(columns are drives, rows are stripes, LETTERS are chunks of data).
This is similar to raid10's 'far' mode, but not quite the same. So
enhance 'far' mode with a 'far/offset' option which follows the layout
of DDFs stripe/offset.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid10.c | 64 ++++++++++++++++++++++++++++--------------
./include/linux/raid/raid10.h | 7 +++-
2 files changed, 49 insertions(+), 22 deletions(-)
diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~ 2006-05-01 15:12:34.000000000 +1000
+++ ./drivers/md/raid10.c 2006-05-01 15:13:22.000000000 +1000
@@ -29,6 +29,7 @@
* raid_disks
* near_copies (stored in low byte of layout)
* far_copies (stored in second byte of layout)
+ * far_offset (stored in bit 16 of layout )
*
* The data to be stored is divided into chunks using chunksize.
* Each device is divided into far_copies sections.
@@ -36,10 +37,14 @@
* near_copies copies of each chunk is stored (each on a different drive).
* The starting device for each section is offset near_copies from the starting
* device of the previous section.
- * Thus there are (near_copies*far_copies) of each chunk, and each is on a different
+ * Thus they are (near_copies*far_copies) of each chunk, and each is on a different
* drive.
* near_copies and far_copies must be at least one, and their product is at most
* raid_disks.
+ *
+ * If far_offset is true, then the far_copies are handled a bit differently.
+ * The copies are still in different stripes, but instead of be very far apart
+ * on disk, there are adjacent stripes.
*/
/*
@@ -357,8 +362,7 @@ static int raid10_end_write_request(stru
* With this layout, and block is never stored twice on the one device.
*
* raid10_find_phys finds the sector offset of a given virtual sector
- * on each device that it is on. If a block isn't on a device,
- * that entry in the array is set to MaxSector.
+ * on each device that it is on.
*
* raid10_find_virt does the reverse mapping, from a device and a
* sector offset to a virtual address
@@ -381,6 +385,8 @@ static void raid10_find_phys(conf_t *con
chunk *= conf->near_copies;
stripe = chunk;
dev = sector_div(stripe, conf->raid_disks);
+ if (conf->far_offset)
+ stripe *= conf->far_copies;
sector += stripe << conf->chunk_shift;
@@ -414,16 +420,24 @@ static sector_t raid10_find_virt(conf_t
{
sector_t offset, chunk, vchunk;
- while (sector > conf->stride) {
- sector -= conf->stride;
- if (dev < conf->near_copies)
- dev += conf->raid_disks - conf->near_copies;
- else
- dev -= conf->near_copies;
- }
-
offset = sector & conf->chunk_mask;
- chunk = sector >> conf->chunk_shift;
+ if (conf->far_offset) {
+ int fc;
+ chunk = sector >> conf->chunk_shift;
+ fc = sector_div(chunk, conf->far_copies);
+ dev -= fc * conf->near_copies;
+ if (dev < 0)
+ dev += conf->raid_disks;
+ } else {
+ while (sector > conf->stride) {
+ sector -= conf->stride;
+ if (dev < conf->near_copies)
+ dev += conf->raid_disks - conf->near_copies;
+ else
+ dev -= conf->near_copies;
+ }
+ chunk = sector >> conf->chunk_shift;
+ }
vchunk = chunk * conf->raid_disks + dev;
sector_div(vchunk, conf->near_copies);
return (vchunk << conf->chunk_shift) + offset;
@@ -900,9 +914,12 @@ static void status(struct seq_file *seq,
seq_printf(seq, " %dK chunks", mddev->chunk_size/1024);
if (conf->near_copies > 1)
seq_printf(seq, " %d near-copies", conf->near_copies);
- if (conf->far_copies > 1)
- seq_printf(seq, " %d far-copies", conf->far_copies);
-
+ if (conf->far_copies > 1) {
+ if (conf->far_offset)
+ seq_printf(seq, " %d offset-copies", conf->far_copies);
+ else
+ seq_printf(seq, " %d far-copies", conf->far_copies);
+ }
seq_printf(seq, " [%d/%d] [", conf->raid_disks,
conf->working_disks);
for (i = 0; i < conf->raid_disks; i++)
@@ -1915,7 +1932,7 @@ static int run(mddev_t *mddev)
mirror_info_t *disk;
mdk_rdev_t *rdev;
struct list_head *tmp;
- int nc, fc;
+ int nc, fc, fo;
sector_t stride, size;
if (mddev->chunk_size == 0) {
@@ -1925,8 +1942,9 @@ static int run(mddev_t *mddev)
nc = mddev->layout & 255;
fc = (mddev->layout >> 8) & 255;
+ fo = mddev->layout & (1<<16);
if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
- (mddev->layout >> 16)) {
+ (mddev->layout >> 17)) {
printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n",
mdname(mddev), mddev->layout);
goto out;
@@ -1958,12 +1976,16 @@ static int run(mddev_t *mddev)
conf->near_copies = nc;
conf->far_copies = fc;
conf->copies = nc*fc;
+ conf->far_offset = fo;
conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
- stride = mddev->size >> (conf->chunk_shift-1);
- sector_div(stride, fc);
- conf->stride = stride << conf->chunk_shift;
-
+ if (fo)
+ conf->stride = 1 << conf->chunk_shift;
+ else {
+ stride = mddev->size >> (conf->chunk_shift-1);
+ sector_div(stride, fc);
+ conf->stride = stride << conf->chunk_shift;
+ }
conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
r10bio_pool_free, conf);
if (!conf->r10bio_pool) {
diff ./include/linux/raid/raid10.h~current~ ./include/linux/raid/raid10.h
--- ./include/linux/raid/raid10.h~current~ 2006-05-01 15:09:20.000000000 +1000
+++ ./include/linux/raid/raid10.h 2006-05-01 15:13:22.000000000 +1000
@@ -24,11 +24,16 @@ struct r10_private_data_s {
int far_copies; /* number of copies layed out
* at large strides across drives
*/
+ int far_offset; /* far_copies are offset by 1 stripe
+ * instead of many
+ */
int copies; /* near_copies * far_copies.
* must be <= raid_disks
*/
sector_t stride; /* distance between far copies.
- * This is size / far_copies
+ * This is size / far_copies unless
+ * far_offset, in which case it is
+ * 1 stripe.
*/
int chunk_shift; /* shift from chunks to sectors */
next prev parent reply other threads:[~2006-05-01 5:30 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-05-01 5:29 [PATCH 000 of 11] md: Introduction - assort md enhancements for 2.6.18 NeilBrown
2006-05-01 5:29 ` NeilBrown
2006-05-01 5:30 ` [PATCH 001 of 11] md: Reformat code in raid1_end_write_request to avoid goto NeilBrown
2006-05-01 5:30 ` [PATCH 002 of 11] md: Remove arbitrary limit on chunk size NeilBrown
2006-05-01 5:30 ` [PATCH 003 of 11] md: Remove useless ioctl warning NeilBrown
2006-05-01 5:30 ` [PATCH 004 of 11] md: Increase the delay before marking metadata clean, and make it configurable NeilBrown
2006-05-01 5:44 ` Andrew Morton
2006-05-01 6:02 ` Neil Brown
2006-05-01 6:13 ` Andrew Morton
2006-05-01 15:17 ` Linus Torvalds
2006-05-01 6:15 ` Nick Piggin
2006-05-02 5:56 ` bert hubert
2006-05-09 1:40 ` Neil Brown
2006-05-01 5:30 ` [PATCH 006 of 11] md: Remove nuisance message at shutdown NeilBrown
2006-05-01 5:30 ` [PATCH 007 of 11] md: Allow checkpoint of recovery with version-1 superblock NeilBrown
2006-05-01 5:30 ` [PATCH 008 of 11] md: Allow a linear array to have drives added while active NeilBrown
2006-05-01 5:30 ` NeilBrown [this message]
2006-05-02 16:38 ` [PATCH 009 of 11] md: Support stripe/offset mode in raid10 Al Boldi
2006-05-03 0:05 ` Neil Brown
2006-05-03 4:00 ` Al Boldi
2006-05-08 7:17 ` Neil Brown
2006-05-08 16:59 ` Al Boldi
2006-05-17 21:32 ` Raid5 resize "testing opportunity" Patrik Jonsson
2006-05-17 23:49 ` Neil Brown
2006-05-19 0:40 ` Patrik Jonsson
2006-05-19 0:44 ` Neil Brown
2006-05-19 20:11 ` Per Lindstrand
2006-05-01 5:31 ` [PATCH 010 of 11] md: make md_print_devices() static NeilBrown
2006-05-01 5:31 ` NeilBrown
2006-05-01 5:31 ` [PATCH 011 of 11] md: Split reshape portion of raid5 sync_request into a separate function NeilBrown
[not found] ` <1060501053025.22961@suse.de>
2006-05-01 5:40 ` [PATCH 005 of 11] md: Merge raid5 and raid6 code H. Peter Anvin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1060501053054.23009@suse.de \
--to=neilb@suse.de \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.