From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de
Cc: linux-raid@vger.kernel.org
Subject: [md PATCH 4/4] async_tx: fix asynchronous raid6 recovery for ddf layouts
Date: Tue, 20 Oct 2009 00:11:38 -0700 [thread overview]
Message-ID: <20091020071138.6489.73870.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20091020070627.6489.4006.stgit@dwillia2-linux.ch.intel.com>
The raid6 recovery code currently requires special handling of the
4-disk and 5-disk recovery scenarios for the native layout. Quoting
from commit 0a82a623:
In these situations the default N-disk algorithm will present
0-source or 1-source operations to dma devices. To cover for
dma devices where the minimum source count is 2 we implement
4-disk and 5-disk handling in the recovery code.
Recovery in the ddf layout case needs explicit handling of the 6-disk
and 7-disk recovery cases which pose the same problems as the ones
mentioned above. Note that N-disks refers to N syndrome-disks not the
width of the array.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
crypto/async_tx/async_raid6_recov.c | 98 ++++++++++++++++++++++++++++-------
crypto/async_tx/raid6test.c | 6 +-
drivers/md/raid5.c | 6 +-
include/linux/async_tx.h | 6 +-
4 files changed, 89 insertions(+), 27 deletions(-)
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 8e30b6e..54b39d2 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -132,7 +132,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
static struct dma_async_tx_descriptor *
__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
- struct async_submit_ctl *submit)
+ bool is_ddf, struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *a, *b;
@@ -143,8 +143,13 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
- p = blocks[4-2];
- q = blocks[4-1];
+ if (is_ddf) {
+ p = blocks[6-2];
+ q = blocks[6-1];
+ } else {
+ p = blocks[4-2];
+ q = blocks[4-1];
+ }
a = blocks[faila];
b = blocks[failb];
@@ -171,7 +176,7 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
static struct dma_async_tx_descriptor *
__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
- struct async_submit_ctl *submit)
+ bool is_ddf, struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *g, *dp, *dq;
@@ -181,21 +186,34 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
dma_async_tx_callback cb_fn = submit->cb_fn;
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
- int uninitialized_var(good);
+ int good = -1;
int i;
- for (i = 0; i < 3; i++) {
- if (i == faila || i == failb)
- continue;
- else {
+ if (is_ddf) {
+ /* a 7-disk ddf operation devolves to the 5-disk native
+ * layout case modulo these fixups
+ */
+ for (i = 0; i < 7-2; i++) {
+ if (blocks[i] == NULL)
+ continue;
+ if (i == faila || i == failb)
+ continue;
+ BUG_ON(good != -1);
good = i;
- break;
}
+ p = blocks[7-2];
+ q = blocks[7-1];
+ } else {
+ for (i = 0; i < 5-2; i++) {
+ if (i == faila || i == failb)
+ continue;
+ BUG_ON(good != -1);
+ good = i;
+ }
+ p = blocks[5-2];
+ q = blocks[5-1];
}
- BUG_ON(i >= 3);
-
- p = blocks[5-2];
- q = blocks[5-1];
+ BUG_ON(good == -1);
g = blocks[good];
/* Compute syndrome with zero for the missing data pages
@@ -317,11 +335,13 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
* @faila: first failed drive index
* @failb: second failed drive index
* @blocks: array of source pointers where the last two entries are p and q
+ * @is_ddf: flag to indicate whether 'blocks' is in the ddf layout
* @submit: submission/completion modifiers
*/
struct dma_async_tx_descriptor *
async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
- struct page **blocks, struct async_submit_ctl *submit)
+ struct page **blocks, bool is_ddf,
+ struct async_submit_ctl *submit)
{
BUG_ON(faila == failb);
if (failb < faila)
@@ -356,13 +376,23 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
* operation (in contrast to the synchronous case), so
* explicitly handle the 4 disk special case
*/
- return __2data_recov_4(bytes, faila, failb, blocks, submit);
+ return __2data_recov_4(bytes, faila, failb, blocks, false, submit);
case 5:
/* dma devices do not uniformly understand a single
* source pq operation (in contrast to the synchronous
* case), so explicitly handle the 5 disk special case
*/
- return __2data_recov_5(bytes, faila, failb, blocks, submit);
+ return __2data_recov_5(bytes, faila, failb, blocks, false, submit);
+ case 6:
+ if (is_ddf)
+ return __2data_recov_4(bytes, faila, failb, blocks,
+ true, submit);
+ /* fall through */
+ case 7:
+ if (is_ddf)
+ return __2data_recov_5(bytes, faila, failb, blocks,
+ true, submit);
+ /* fall through */
default:
return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
}
@@ -375,11 +405,13 @@ EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
* @bytes: block size
* @faila: failed drive index
* @blocks: array of source pointers where the last two entries are p and q
+ * @is_ddf: flag to indicate whether 'blocks' is in the ddf layout
* @submit: submission/completion modifiers
*/
struct dma_async_tx_descriptor *
async_raid6_datap_recov(int disks, size_t bytes, int faila,
- struct page **blocks, struct async_submit_ctl *submit)
+ struct page **blocks, bool is_ddf,
+ struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *dq;
@@ -423,10 +455,11 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
blocks[faila] = NULL;
blocks[disks-1] = dq;
- /* in the 4 disk case we only need to perform a single source
- * multiplication
+ /* in the 4-disk (or 6-disk ddf layout) case we only need to
+ * perform a single source multiplication with the one good data
+ * block.
*/
- if (disks == 4) {
+ if (disks == 4 && !is_ddf) {
int good = faila == 0 ? 1 : 0;
struct page *g = blocks[good];
@@ -437,6 +470,29 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
scribble);
tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+ } else if (disks == 6 && is_ddf) {
+ struct page *g;
+ int good = -1;
+ int i;
+
+ for (i = 0; i < 6-2; i++) {
+ if (blocks[i] == NULL)
+ continue;
+ if (i == faila)
+ continue;
+ BUG_ON(good != -1);
+ good = i;
+ }
+ BUG_ON(good == -1);
+ g = blocks[good];
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
+ tx = async_memcpy(p, g, 0, 0, bytes, submit);
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
+ tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
} else {
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
scribble);
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
index 3ec27c7..9d7b3e1 100644
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -108,11 +108,13 @@ static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, stru
if (failb == disks-2) {
/* data+P failure. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
- tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
+ tx = async_raid6_datap_recov(disks, bytes, faila, ptrs,
+ false, &submit);
} else {
/* data+data failure. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
- tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
+ tx = async_raid6_2data_recov(disks, bytes, faila, failb,
+ ptrs, false, &submit);
}
}
init_completion(&cmp);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 81abefc..18d6ed4 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -888,12 +888,14 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
/* We're missing D+P. */
return async_raid6_datap_recov(syndrome_disks+2,
STRIPE_SIZE, faila,
- blocks, &submit);
+ blocks, sh->ddf_layout,
+ &submit);
} else {
/* We're missing D+D. */
return async_raid6_2data_recov(syndrome_disks+2,
STRIPE_SIZE, faila, failb,
- blocks, &submit);
+ blocks, sh->ddf_layout,
+ &submit);
}
}
}
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index a1c486a..7520204 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -199,11 +199,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
struct dma_async_tx_descriptor *
async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
- struct page **ptrs, struct async_submit_ctl *submit);
+ struct page **ptrs, bool is_ddf,
+ struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_raid6_datap_recov(int src_num, size_t bytes, int faila,
- struct page **ptrs, struct async_submit_ctl *submit);
+ struct page **ptrs, bool is_ddf,
+ struct async_submit_ctl *submit);
void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
#endif /* _ASYNC_TX_H_ */
next prev parent reply other threads:[~2009-10-20 7:11 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-20 7:11 [md PATCH 0/4] fix ddf asynchronous raid6 recovery Dan Williams
2009-10-20 7:11 ` [md PATCH 1/4] md/raid6: kill a gcc-4.0.1 'uninitialized variable' warning Dan Williams
2009-10-20 7:11 ` [md PATCH 2/4] async_pq: kill a stray dma_map() call and other cleanups Dan Williams
2009-10-20 7:11 ` [md PATCH 3/4] async_pq: rename scribble page Dan Williams
2009-10-20 7:11 ` Dan Williams [this message]
2009-10-21 23:26 ` [md PATCH 4/4] async_tx: fix asynchronous raid6 recovery for ddf layouts Neil Brown
2009-10-22 0:42 ` Dan Williams
2009-10-22 2:46 ` Neil Brown
2009-10-22 21:17 ` Dan Williams
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20091020071138.6489.73870.stgit@dwillia2-linux.ch.intel.com \
--to=dan.j.williams@intel.com \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).