From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de
Cc: linux-raid@vger.kernel.org, Ilya Yanok <yanok@emcraft.com>,
Yuri Tikhonov <yur@emcraft.com>
Subject: [PATCH v2 6/9] md/raid6: asynchronous handle_parity_check6
Date: Mon, 31 Aug 2009 09:41:34 -0700 [thread overview]
Message-ID: <20090831164134.1696.27588.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <20090831163914.1696.55782.stgit@dwillia2-linux.ch.intel.com>
[ Based on an original patch by Yuri Tikhonov ]
Implement the state machine for handling the RAID-6 parities check and
repair functionality. Note that the raid6 case does not need to check
for new failures, like raid5, as it will always writeback the correct
disks. The raid5 case can be updated to check zero_sum_result to avoid
getting confused by new failures rather than retrying the entire check
operation.
Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Ilya Yanok <yanok@emcraft.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
drivers/md/raid5.c | 206 +++++++++++++++++++++++++++++++++++-----------------
1 files changed, 139 insertions(+), 67 deletions(-)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 08f8063..3c31f7f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2901,91 +2901,163 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
struct stripe_head_state *s,
struct r6_state *r6s, int disks)
{
- int update_p = 0, update_q = 0;
- struct r5dev *dev;
int pd_idx = sh->pd_idx;
int qd_idx = sh->qd_idx;
- unsigned long cpu;
- struct page *tmp_page;
+ struct r5dev *dev;
set_bit(STRIPE_HANDLE, &sh->state);
BUG_ON(s->failed > 2);
- BUG_ON(s->uptodate < disks);
+
/* Want to check and possibly repair P and Q.
* However there could be one 'failed' device, in which
* case we can only check one of them, possibly using the
* other to generate missing data
*/
- cpu = get_cpu();
- tmp_page = per_cpu_ptr(conf->percpu, cpu)->spare_page;
- if (s->failed == r6s->q_failed) {
- /* The only possible failed device holds 'Q', so it
- * makes sense to check P (If anything else were failed,
- * we would have used P to recreate it).
- */
- compute_block_1(sh, pd_idx, 1);
- if (!page_is_zero(sh->dev[pd_idx].page)) {
- compute_block_1(sh, pd_idx, 0);
- update_p = 1;
+
+ switch (sh->check_state) {
+ case check_state_idle:
+ /* start a new check operation if there are < 2 failures */
+ if (s->failed == r6s->q_failed) {
+ /* The only possible failed device holds Q, so it
+ * makes sense to check P (If anything else were failed,
+ * we would have used P to recreate it).
+ */
+ sh->check_state = check_state_run;
}
- }
- if (!r6s->q_failed && s->failed < 2) {
- /* q is not failed, and we didn't use it to generate
- * anything, so it makes sense to check it
- */
- memcpy(page_address(tmp_page),
- page_address(sh->dev[qd_idx].page),
- STRIPE_SIZE);
- compute_parity6(sh, UPDATE_PARITY);
- if (memcmp(page_address(tmp_page),
- page_address(sh->dev[qd_idx].page),
- STRIPE_SIZE) != 0) {
- clear_bit(STRIPE_INSYNC, &sh->state);
- update_q = 1;
+ if (!r6s->q_failed && s->failed < 2) {
+ /* Q is not failed, and we didn't use it to generate
+ * anything, so it makes sense to check it
+ */
+ if (sh->check_state == check_state_run)
+ sh->check_state = check_state_run_pq;
+ else
+ sh->check_state = check_state_run_q;
}
- }
- put_cpu();
- if (update_p || update_q) {
- conf->mddev->resync_mismatches += STRIPE_SECTORS;
- if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
- /* don't try to repair!! */
- update_p = update_q = 0;
- }
+ /* discard potentially stale zero_sum_result */
+ sh->ops.zero_sum_result = 0;
- /* now write out any block on a failed drive,
- * or P or Q if they need it
- */
+ if (sh->check_state == check_state_run) {
+ /* async_xor_zero_sum destroys the contents of P */
+ clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+ s->uptodate--;
+ }
+ if (sh->check_state >= check_state_run &&
+ sh->check_state <= check_state_run_pq) {
+ /* async_syndrome_zero_sum preserves P and Q, so
+ * no need to mark them !uptodate here
+ */
+ set_bit(STRIPE_OP_CHECK, &s->ops_request);
+ break;
+ }
- if (s->failed == 2) {
- dev = &sh->dev[r6s->failed_num[1]];
- s->locked++;
- set_bit(R5_LOCKED, &dev->flags);
- set_bit(R5_Wantwrite, &dev->flags);
- }
- if (s->failed >= 1) {
- dev = &sh->dev[r6s->failed_num[0]];
- s->locked++;
- set_bit(R5_LOCKED, &dev->flags);
- set_bit(R5_Wantwrite, &dev->flags);
- }
+ /* we have 2-disk failure */
+ BUG_ON(s->failed != 2);
+ /* fall through */
+ case check_state_compute_result:
+ sh->check_state = check_state_idle;
- if (update_p) {
- dev = &sh->dev[pd_idx];
- s->locked++;
- set_bit(R5_LOCKED, &dev->flags);
- set_bit(R5_Wantwrite, &dev->flags);
- }
- if (update_q) {
- dev = &sh->dev[qd_idx];
- s->locked++;
- set_bit(R5_LOCKED, &dev->flags);
- set_bit(R5_Wantwrite, &dev->flags);
- }
- clear_bit(STRIPE_DEGRADED, &sh->state);
+ /* check that a write has not made the stripe insync */
+ if (test_bit(STRIPE_INSYNC, &sh->state))
+ break;
- set_bit(STRIPE_INSYNC, &sh->state);
+ /* now write out any block on a failed drive,
+ * or P or Q if they were recomputed
+ */
+ BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
+ if (s->failed == 2) {
+ dev = &sh->dev[r6s->failed_num[1]];
+ s->locked++;
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantwrite, &dev->flags);
+ }
+ if (s->failed >= 1) {
+ dev = &sh->dev[r6s->failed_num[0]];
+ s->locked++;
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantwrite, &dev->flags);
+ }
+ if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
+ dev = &sh->dev[pd_idx];
+ s->locked++;
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantwrite, &dev->flags);
+ }
+ if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
+ dev = &sh->dev[qd_idx];
+ s->locked++;
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantwrite, &dev->flags);
+ }
+ clear_bit(STRIPE_DEGRADED, &sh->state);
+
+ set_bit(STRIPE_INSYNC, &sh->state);
+ break;
+ case check_state_run:
+ case check_state_run_q:
+ case check_state_run_pq:
+ break; /* we will be called again upon completion */
+ case check_state_check_result:
+ sh->check_state = check_state_idle;
+
+ /* handle a successful check operation, if parity is correct
+ * we are done. Otherwise update the mismatch count and repair
+ * parity if !MD_RECOVERY_CHECK
+ */
+ if (sh->ops.zero_sum_result == 0) {
+ /* both parities are correct */
+ if (!s->failed)
+ set_bit(STRIPE_INSYNC, &sh->state);
+ else {
+ /* in contrast to the raid5 case we can validate
+ * parity, but still have a failure to write
+ * back
+ */
+ sh->check_state = check_state_compute_result;
+ /* Returning at this point means that we may go
+ * off and bring p and/or q uptodate again so
+ * we make sure to check zero_sum_result again
+ * to verify if p or q need writeback
+ */
+ }
+ } else {
+ conf->mddev->resync_mismatches += STRIPE_SECTORS;
+ if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+ /* don't try to repair!! */
+ set_bit(STRIPE_INSYNC, &sh->state);
+ else {
+ int *target = &sh->ops.target;
+
+ sh->ops.target = -1;
+ sh->ops.target2 = -1;
+ sh->check_state = check_state_compute_run;
+ set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+ set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+ if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
+ set_bit(R5_Wantcompute,
+ &sh->dev[pd_idx].flags);
+ *target = pd_idx;
+ target = &sh->ops.target2;
+ s->uptodate++;
+ }
+ if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
+ set_bit(R5_Wantcompute,
+ &sh->dev[qd_idx].flags);
+ *target = qd_idx;
+ s->uptodate++;
+ }
+ }
+ }
+ break;
+ case check_state_compute_run:
+ break;
+ default:
+ printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
+ __func__, sh->check_state,
+ (unsigned long long) sh->sector);
+ BUG();
+ }
}
static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
next prev parent reply other threads:[~2009-08-31 16:41 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-08-31 16:41 [PATCH v2 0/9] Asynchronous raid6 acceleration (part 3 of 3) Dan Williams
2009-08-31 16:41 ` [PATCH v2 1/9] md/raid5: factor out mark_uptodate from ops_complete_compute5 Dan Williams
2009-08-31 16:41 ` [PATCH v2 2/9] md/raid6: asynchronous raid6 operations Dan Williams
2009-09-15 5:32 ` Neil Brown
2009-08-31 16:41 ` [PATCH v2 3/9] md/raid5, 6: common schedule_reconstruction for raid5/6 Dan Williams
2009-08-31 16:41 ` [PATCH v2 4/9] md/raid6: asynchronous handle_stripe_fill6 Dan Williams
2009-08-31 16:41 ` [PATCH v2 5/9] md/raid6: asynchronous handle_stripe_dirtying6 Dan Williams
2009-08-31 16:41 ` Dan Williams [this message]
2009-08-31 16:41 ` [PATCH v2 7/9] md/raid6: asynchronous handle_stripe6 Dan Williams
2009-09-15 5:26 ` Neil Brown
2009-09-15 8:42 ` Dan Williams
2009-08-31 16:41 ` [PATCH v2 8/9] md/raid6: remove synchronous infrastructure Dan Williams
2009-08-31 16:41 ` [PATCH v2 9/9] md/raid456: distribute raid processing over multiple cores Dan Williams
2009-08-31 17:23 ` [PATCH v2 0/9] Asynchronous raid6 acceleration (part 3 of 3) kwick
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090831164134.1696.27588.stgit@dwillia2-linux.ch.intel.com \
--to=dan.j.williams@intel.com \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
--cc=yanok@emcraft.com \
--cc=yur@emcraft.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).