From: NeilBrown <neilb@suse.de>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 009 of 11] md: Support stripe/offset mode in raid10
Date: Mon, 1 May 2006 15:30:54 +1000 [thread overview]
Message-ID: <1060501053054.23009@suse.de> (raw)
In-Reply-To: 20060501152229.18367.patches@notabene
The "industry standard" DDF format allows for a stripe/offset layout
where data is duplicated on different stripes. e.g.
A B C D
D A B C
E F G H
H E F G
(columns are drives, rows are stripes, LETTERS are chunks of data).
This is similar to raid10's 'far' mode, but not quite the same. So
enhance 'far' mode with a 'far/offset' option which follows the layout
of DDFs stripe/offset.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/raid10.c | 64 ++++++++++++++++++++++++++++--------------
./include/linux/raid/raid10.h | 7 +++-
2 files changed, 49 insertions(+), 22 deletions(-)
diff ./drivers/md/raid10.c~current~ ./drivers/md/raid10.c
--- ./drivers/md/raid10.c~current~ 2006-05-01 15:12:34.000000000 +1000
+++ ./drivers/md/raid10.c 2006-05-01 15:13:22.000000000 +1000
@@ -29,6 +29,7 @@
* raid_disks
* near_copies (stored in low byte of layout)
* far_copies (stored in second byte of layout)
+ * far_offset (stored in bit 16 of layout )
*
* The data to be stored is divided into chunks using chunksize.
* Each device is divided into far_copies sections.
@@ -36,10 +37,14 @@
* near_copies copies of each chunk is stored (each on a different drive).
* The starting device for each section is offset near_copies from the starting
* device of the previous section.
- * Thus there are (near_copies*far_copies) of each chunk, and each is on a different
+ * Thus they are (near_copies*far_copies) of each chunk, and each is on a different
* drive.
* near_copies and far_copies must be at least one, and their product is at most
* raid_disks.
+ *
+ * If far_offset is true, then the far_copies are handled a bit differently.
+ * The copies are still in different stripes, but instead of be very far apart
+ * on disk, there are adjacent stripes.
*/
/*
@@ -357,8 +362,7 @@ static int raid10_end_write_request(stru
* With this layout, and block is never stored twice on the one device.
*
* raid10_find_phys finds the sector offset of a given virtual sector
- * on each device that it is on. If a block isn't on a device,
- * that entry in the array is set to MaxSector.
+ * on each device that it is on.
*
* raid10_find_virt does the reverse mapping, from a device and a
* sector offset to a virtual address
@@ -381,6 +385,8 @@ static void raid10_find_phys(conf_t *con
chunk *= conf->near_copies;
stripe = chunk;
dev = sector_div(stripe, conf->raid_disks);
+ if (conf->far_offset)
+ stripe *= conf->far_copies;
sector += stripe << conf->chunk_shift;
@@ -414,16 +420,24 @@ static sector_t raid10_find_virt(conf_t
{
sector_t offset, chunk, vchunk;
- while (sector > conf->stride) {
- sector -= conf->stride;
- if (dev < conf->near_copies)
- dev += conf->raid_disks - conf->near_copies;
- else
- dev -= conf->near_copies;
- }
-
offset = sector & conf->chunk_mask;
- chunk = sector >> conf->chunk_shift;
+ if (conf->far_offset) {
+ int fc;
+ chunk = sector >> conf->chunk_shift;
+ fc = sector_div(chunk, conf->far_copies);
+ dev -= fc * conf->near_copies;
+ if (dev < 0)
+ dev += conf->raid_disks;
+ } else {
+ while (sector > conf->stride) {
+ sector -= conf->stride;
+ if (dev < conf->near_copies)
+ dev += conf->raid_disks - conf->near_copies;
+ else
+ dev -= conf->near_copies;
+ }
+ chunk = sector >> conf->chunk_shift;
+ }
vchunk = chunk * conf->raid_disks + dev;
sector_div(vchunk, conf->near_copies);
return (vchunk << conf->chunk_shift) + offset;
@@ -900,9 +914,12 @@ static void status(struct seq_file *seq,
seq_printf(seq, " %dK chunks", mddev->chunk_size/1024);
if (conf->near_copies > 1)
seq_printf(seq, " %d near-copies", conf->near_copies);
- if (conf->far_copies > 1)
- seq_printf(seq, " %d far-copies", conf->far_copies);
-
+ if (conf->far_copies > 1) {
+ if (conf->far_offset)
+ seq_printf(seq, " %d offset-copies", conf->far_copies);
+ else
+ seq_printf(seq, " %d far-copies", conf->far_copies);
+ }
seq_printf(seq, " [%d/%d] [", conf->raid_disks,
conf->working_disks);
for (i = 0; i < conf->raid_disks; i++)
@@ -1915,7 +1932,7 @@ static int run(mddev_t *mddev)
mirror_info_t *disk;
mdk_rdev_t *rdev;
struct list_head *tmp;
- int nc, fc;
+ int nc, fc, fo;
sector_t stride, size;
if (mddev->chunk_size == 0) {
@@ -1925,8 +1942,9 @@ static int run(mddev_t *mddev)
nc = mddev->layout & 255;
fc = (mddev->layout >> 8) & 255;
+ fo = mddev->layout & (1<<16);
if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
- (mddev->layout >> 16)) {
+ (mddev->layout >> 17)) {
printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n",
mdname(mddev), mddev->layout);
goto out;
@@ -1958,12 +1976,16 @@ static int run(mddev_t *mddev)
conf->near_copies = nc;
conf->far_copies = fc;
conf->copies = nc*fc;
+ conf->far_offset = fo;
conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
- stride = mddev->size >> (conf->chunk_shift-1);
- sector_div(stride, fc);
- conf->stride = stride << conf->chunk_shift;
-
+ if (fo)
+ conf->stride = 1 << conf->chunk_shift;
+ else {
+ stride = mddev->size >> (conf->chunk_shift-1);
+ sector_div(stride, fc);
+ conf->stride = stride << conf->chunk_shift;
+ }
conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
r10bio_pool_free, conf);
if (!conf->r10bio_pool) {
diff ./include/linux/raid/raid10.h~current~ ./include/linux/raid/raid10.h
--- ./include/linux/raid/raid10.h~current~ 2006-05-01 15:09:20.000000000 +1000
+++ ./include/linux/raid/raid10.h 2006-05-01 15:13:22.000000000 +1000
@@ -24,11 +24,16 @@ struct r10_private_data_s {
int far_copies; /* number of copies layed out
* at large strides across drives
*/
+ int far_offset; /* far_copies are offset by 1 stripe
+ * instead of many
+ */
int copies; /* near_copies * far_copies.
* must be <= raid_disks
*/
sector_t stride; /* distance between far copies.
- * This is size / far_copies
+ * This is size / far_copies unless
+ * far_offset, in which case it is
+ * 1 stripe.
*/
int chunk_shift; /* shift from chunks to sectors */
next prev parent reply other threads:[~2006-05-01 5:30 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-05-01 5:29 [PATCH 000 of 11] md: Introduction - assort md enhancements for 2.6.18 NeilBrown
2006-05-01 5:30 ` [PATCH 001 of 11] md: Reformat code in raid1_end_write_request to avoid goto NeilBrown
2006-05-01 5:30 ` [PATCH 002 of 11] md: Remove arbitrary limit on chunk size NeilBrown
2006-05-01 5:30 ` [PATCH 003 of 11] md: Remove useless ioctl warning NeilBrown
2006-05-01 5:30 ` [PATCH 004 of 11] md: Increase the delay before marking metadata clean, and make it configurable NeilBrown
2006-05-01 5:44 ` Andrew Morton
2006-05-01 6:02 ` Neil Brown
2006-05-01 6:13 ` Andrew Morton
2006-05-01 15:17 ` Linus Torvalds
2006-05-01 6:15 ` Nick Piggin
2006-05-02 5:56 ` bert hubert
2006-05-09 1:40 ` Neil Brown
2006-05-01 5:30 ` [PATCH 006 of 11] md: Remove nuisance message at shutdown NeilBrown
2006-05-01 5:30 ` [PATCH 007 of 11] md: Allow checkpoint of recovery with version-1 superblock NeilBrown
2006-05-01 5:30 ` [PATCH 008 of 11] md: Allow a linear array to have drives added while active NeilBrown
2006-05-01 5:30 ` NeilBrown [this message]
2006-05-02 16:38 ` [PATCH 009 of 11] md: Support stripe/offset mode in raid10 Al Boldi
2006-05-03 0:05 ` Neil Brown
2006-05-03 4:00 ` Al Boldi
2006-05-08 7:17 ` Neil Brown
2006-05-08 16:59 ` Al Boldi
2006-05-17 21:32 ` Raid5 resize "testing opportunity" Patrik Jonsson
2006-05-17 23:49 ` Neil Brown
2006-05-19 0:40 ` Patrik Jonsson
2006-05-19 0:44 ` Neil Brown
2006-05-19 20:11 ` Per Lindstrand
2006-05-01 5:31 ` [PATCH 010 of 11] md: make md_print_devices() static NeilBrown
2006-05-01 5:31 ` [PATCH 011 of 11] md: Split reshape portion of raid5 sync_request into a separate function NeilBrown
[not found] ` <1060501053025.22961@suse.de>
2006-05-01 5:40 ` [PATCH 005 of 11] md: Merge raid5 and raid6 code H. Peter Anvin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1060501053054.23009@suse.de \
--to=neilb@suse.de \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).