From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de
Cc: linux-raid@vger.kernel.org
Subject: [md PATCH 4/4] async_tx: fix asynchronous raid6 recovery for ddf layouts
Date: Tue, 20 Oct 2009 00:11:38 -0700 [thread overview]
Message-ID: <20091020071138.6489.73870.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20091020070627.6489.4006.stgit@dwillia2-linux.ch.intel.com>
The raid6 recovery code currently requires special handling of the
4-disk and 5-disk recovery scenarios for the native layout. Quoting
from commit 0a82a623:
In these situations the default N-disk algorithm will present
0-source or 1-source operations to dma devices. To cover for
dma devices where the minimum source count is 2 we implement
4-disk and 5-disk handling in the recovery code.
Recovery in the ddf layout case needs explicit handling of the 6-disk
and 7-disk recovery cases which pose the same problems as the ones
mentioned above. Note that N-disks refers to N syndrome-disks not the
width of the array.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
crypto/async_tx/async_raid6_recov.c | 98 ++++++++++++++++++++++++++++-------
crypto/async_tx/raid6test.c | 6 +-
drivers/md/raid5.c | 6 +-
include/linux/async_tx.h | 6 +-
4 files changed, 89 insertions(+), 27 deletions(-)
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 8e30b6e..54b39d2 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -132,7 +132,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
static struct dma_async_tx_descriptor *
__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
- struct async_submit_ctl *submit)
+ bool is_ddf, struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *a, *b;
@@ -143,8 +143,13 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
- p = blocks[4-2];
- q = blocks[4-1];
+ if (is_ddf) {
+ p = blocks[6-2];
+ q = blocks[6-1];
+ } else {
+ p = blocks[4-2];
+ q = blocks[4-1];
+ }
a = blocks[faila];
b = blocks[failb];
@@ -171,7 +176,7 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
static struct dma_async_tx_descriptor *
__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
- struct async_submit_ctl *submit)
+ bool is_ddf, struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *g, *dp, *dq;
@@ -181,21 +186,34 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
dma_async_tx_callback cb_fn = submit->cb_fn;
void *cb_param = submit->cb_param;
void *scribble = submit->scribble;
- int uninitialized_var(good);
+ int good = -1;
int i;
- for (i = 0; i < 3; i++) {
- if (i == faila || i == failb)
- continue;
- else {
+ if (is_ddf) {
+ /* a 7-disk ddf operation devolves to the 5-disk native
+ * layout case modulo these fixups
+ */
+ for (i = 0; i < 7-2; i++) {
+ if (blocks[i] == NULL)
+ continue;
+ if (i == faila || i == failb)
+ continue;
+ BUG_ON(good != -1);
good = i;
- break;
}
+ p = blocks[7-2];
+ q = blocks[7-1];
+ } else {
+ for (i = 0; i < 5-2; i++) {
+ if (i == faila || i == failb)
+ continue;
+ BUG_ON(good != -1);
+ good = i;
+ }
+ p = blocks[5-2];
+ q = blocks[5-1];
}
- BUG_ON(i >= 3);
-
- p = blocks[5-2];
- q = blocks[5-1];
+ BUG_ON(good == -1);
g = blocks[good];
/* Compute syndrome with zero for the missing data pages
@@ -317,11 +335,13 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
* @faila: first failed drive index
* @failb: second failed drive index
* @blocks: array of source pointers where the last two entries are p and q
+ * @is_ddf: flag to indicate whether 'blocks' is in the ddf layout
* @submit: submission/completion modifiers
*/
struct dma_async_tx_descriptor *
async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
- struct page **blocks, struct async_submit_ctl *submit)
+ struct page **blocks, bool is_ddf,
+ struct async_submit_ctl *submit)
{
BUG_ON(faila == failb);
if (failb < faila)
@@ -356,13 +376,23 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
* operation (in contrast to the synchronous case), so
* explicitly handle the 4 disk special case
*/
- return __2data_recov_4(bytes, faila, failb, blocks, submit);
+ return __2data_recov_4(bytes, faila, failb, blocks, false, submit);
case 5:
/* dma devices do not uniformly understand a single
* source pq operation (in contrast to the synchronous
* case), so explicitly handle the 5 disk special case
*/
- return __2data_recov_5(bytes, faila, failb, blocks, submit);
+ return __2data_recov_5(bytes, faila, failb, blocks, false, submit);
+ case 6:
+ if (is_ddf)
+ return __2data_recov_4(bytes, faila, failb, blocks,
+ true, submit);
+ /* fall through */
+ case 7:
+ if (is_ddf)
+ return __2data_recov_5(bytes, faila, failb, blocks,
+ true, submit);
+ /* fall through */
default:
return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
}
@@ -375,11 +405,13 @@ EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
* @bytes: block size
* @faila: failed drive index
* @blocks: array of source pointers where the last two entries are p and q
+ * @is_ddf: flag to indicate whether 'blocks' is in the ddf layout
* @submit: submission/completion modifiers
*/
struct dma_async_tx_descriptor *
async_raid6_datap_recov(int disks, size_t bytes, int faila,
- struct page **blocks, struct async_submit_ctl *submit)
+ struct page **blocks, bool is_ddf,
+ struct async_submit_ctl *submit)
{
struct dma_async_tx_descriptor *tx = NULL;
struct page *p, *q, *dq;
@@ -423,10 +455,11 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
blocks[faila] = NULL;
blocks[disks-1] = dq;
- /* in the 4 disk case we only need to perform a single source
- * multiplication
+ /* in the 4-disk (or 6-disk ddf layout) case we only need to
+ * perform a single source multiplication with the one good data
+ * block.
*/
- if (disks == 4) {
+ if (disks == 4 && !is_ddf) {
int good = faila == 0 ? 1 : 0;
struct page *g = blocks[good];
@@ -437,6 +470,29 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
scribble);
tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+ } else if (disks == 6 && is_ddf) {
+ struct page *g;
+ int good = -1;
+ int i;
+
+ for (i = 0; i < 6-2; i++) {
+ if (blocks[i] == NULL)
+ continue;
+ if (i == faila)
+ continue;
+ BUG_ON(good != -1);
+ good = i;
+ }
+ BUG_ON(good == -1);
+ g = blocks[good];
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
+ tx = async_memcpy(p, g, 0, 0, bytes, submit);
+
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
+ tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
} else {
init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
scribble);
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
index 3ec27c7..9d7b3e1 100644
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -108,11 +108,13 @@ static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, stru
if (failb == disks-2) {
/* data+P failure. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
- tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
+ tx = async_raid6_datap_recov(disks, bytes, faila, ptrs,
+ false, &submit);
} else {
/* data+data failure. */
init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
- tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
+ tx = async_raid6_2data_recov(disks, bytes, faila, failb,
+ ptrs, false, &submit);
}
}
init_completion(&cmp);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 81abefc..18d6ed4 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -888,12 +888,14 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
/* We're missing D+P. */
return async_raid6_datap_recov(syndrome_disks+2,
STRIPE_SIZE, faila,
- blocks, &submit);
+ blocks, sh->ddf_layout,
+ &submit);
} else {
/* We're missing D+D. */
return async_raid6_2data_recov(syndrome_disks+2,
STRIPE_SIZE, faila, failb,
- blocks, &submit);
+ blocks, sh->ddf_layout,
+ &submit);
}
}
}
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index a1c486a..7520204 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -199,11 +199,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
struct dma_async_tx_descriptor *
async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
- struct page **ptrs, struct async_submit_ctl *submit);
+ struct page **ptrs, bool is_ddf,
+ struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_raid6_datap_recov(int src_num, size_t bytes, int faila,
- struct page **ptrs, struct async_submit_ctl *submit);
+ struct page **ptrs, bool is_ddf,
+ struct async_submit_ctl *submit);
void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
#endif /* _ASYNC_TX_H_ */
next prev parent reply other threads:[~2009-10-20 7:11 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-20 7:11 [md PATCH 0/4] fix ddf asynchronous raid6 recovery Dan Williams
2009-10-20 7:11 ` [md PATCH 1/4] md/raid6: kill a gcc-4.0.1 'uninitialized variable' warning Dan Williams
2009-10-20 7:11 ` [md PATCH 2/4] async_pq: kill a stray dma_map() call and other cleanups Dan Williams
2009-10-20 7:11 ` [md PATCH 3/4] async_pq: rename scribble page Dan Williams
2009-10-20 7:11 ` Dan Williams [this message]
2009-10-21 23:26 ` [md PATCH 4/4] async_tx: fix asynchronous raid6 recovery for ddf layouts Neil Brown
2009-10-22 0:42 ` Dan Williams
2009-10-22 2:46 ` Neil Brown
2009-10-22 21:17 ` Dan Williams
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20091020071138.6489.73870.stgit@dwillia2-linux.ch.intel.com \
--to=dan.j.williams@intel.com \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.