From: Shaohua Li <shli@kernel.org>
To: linux-raid@vger.kernel.org
Cc: neilb@suse.de
Subject: [RFC]raid5: adjust operation order of handle_stripe
Date: Mon, 12 May 2014 09:16:59 +0800 [thread overview]
Message-ID: <20140512011659.GA13135@kernel.org> (raw)
For a full stripe write, the ideal operation order is handle_stripe_dirtying(),
raid_run_ops(), set R5_Wantwrite bit, and ops_run_io(). In this way, one
handle_stripe() will dispatch IO for the stripe, otherwise there are more extra
rounds of handle_stripe(). In a high speed raid5 array, handle_stripe()
consumes considered cpu time. Reducing its overhead has around 10% performance
boost.
This patch makes handle_stripe() operations follow the ideal order. It also
moves handle_stripe_clean_event() up, as it handles completed stripe. And if I
don't change handle_stripe_clean_event() order, I saw some states confused with
other changes.
Signed-off-by: Shaohua Li <shli@fusionio.com>
---
drivers/md/raid5.c | 39 +++++++++++++++++++++------------------
1 file changed, 21 insertions(+), 18 deletions(-)
Index: linux/drivers/md/raid5.c
===================================================================
--- linux.orig/drivers/md/raid5.c 2014-05-06 17:19:13.868225752 +0800
+++ linux/drivers/md/raid5.c 2014-05-06 17:20:25.367326852 +0800
@@ -1633,7 +1633,7 @@ static void ops_run_check_pq(struct stri
&sh->ops.zero_sum_result, percpu->spare_page, &submit);
}
-static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+static void raid_run_ops(struct stripe_head *sh, unsigned long *ops_request)
{
int overlap_clear = 0, i, disks = sh->disks;
struct dma_async_tx_descriptor *tx = NULL;
@@ -1644,12 +1644,12 @@ static void raid_run_ops(struct stripe_h
cpu = get_cpu();
percpu = per_cpu_ptr(conf->percpu, cpu);
- if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
+ if (test_and_clear_bit(STRIPE_OP_BIOFILL, ops_request)) {
ops_run_biofill(sh);
overlap_clear++;
}
- if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
+ if (test_and_clear_bit(STRIPE_OP_COMPUTE_BLK, ops_request)) {
if (level < 6)
tx = ops_run_compute5(sh, percpu);
else {
@@ -1659,26 +1659,26 @@ static void raid_run_ops(struct stripe_h
tx = ops_run_compute6_2(sh, percpu);
}
/* terminate the chain if reconstruct is not set to be run */
- if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
+ if (tx && !test_and_clear_bit(STRIPE_OP_RECONSTRUCT, ops_request))
async_tx_ack(tx);
}
- if (test_bit(STRIPE_OP_PREXOR, &ops_request))
+ if (test_and_clear_bit(STRIPE_OP_PREXOR, ops_request))
tx = ops_run_prexor(sh, percpu, tx);
- if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
+ if (test_and_clear_bit(STRIPE_OP_BIODRAIN, ops_request)) {
tx = ops_run_biodrain(sh, tx);
overlap_clear++;
}
- if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
+ if (test_and_clear_bit(STRIPE_OP_RECONSTRUCT, ops_request)) {
if (level < 6)
ops_run_reconstruct5(sh, percpu, tx);
else
ops_run_reconstruct6(sh, percpu, tx);
}
- if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
+ if (test_and_clear_bit(STRIPE_OP_CHECK, ops_request)) {
if (sh->check_state == check_state_run)
ops_run_check_p(sh, percpu);
else if (sh->check_state == check_state_run_q)
@@ -3780,6 +3780,41 @@ static void handle_stripe(struct stripe_
handle_failed_sync(conf, sh, &s);
}
+ /*
+ * might be able to return some write requests if the parity blocks
+ * are safe, or on a failed drive
+ */
+ pdev = &sh->dev[sh->pd_idx];
+ s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
+ || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
+ qdev = &sh->dev[sh->qd_idx];
+ s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
+ || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
+ || conf->level < 6;
+
+ if (s.written &&
+ (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
+ && !test_bit(R5_LOCKED, &pdev->flags)
+ && (test_bit(R5_UPTODATE, &pdev->flags) ||
+ test_bit(R5_Discard, &pdev->flags))))) &&
+ (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
+ && !test_bit(R5_LOCKED, &qdev->flags)
+ && (test_bit(R5_UPTODATE, &qdev->flags) ||
+ test_bit(R5_Discard, &qdev->flags))))))
+ handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
+
+ /* Now to consider new write requests and what else, if anything
+ * should be read. We do not handle new writes when:
+ * 1/ A 'write' operation (copy+xor) is already in flight.
+ * 2/ A 'check' operation is in flight, as it may clobber the parity
+ * block.
+ */
+ if (s.to_write && !sh->reconstruct_state && !sh->check_state)
+ handle_stripe_dirtying(conf, sh, &s, disks);
+
+ if (s.ops_request)
+ raid_run_ops(sh, &s.ops_request);
+
/* Now we check to see if any write operations have recently
* completed
*/
@@ -3817,29 +3852,6 @@ static void handle_stripe(struct stripe_
s.dec_preread_active = 1;
}
- /*
- * might be able to return some write requests if the parity blocks
- * are safe, or on a failed drive
- */
- pdev = &sh->dev[sh->pd_idx];
- s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
- || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
- qdev = &sh->dev[sh->qd_idx];
- s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
- || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
- || conf->level < 6;
-
- if (s.written &&
- (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
- && !test_bit(R5_LOCKED, &pdev->flags)
- && (test_bit(R5_UPTODATE, &pdev->flags) ||
- test_bit(R5_Discard, &pdev->flags))))) &&
- (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
- && !test_bit(R5_LOCKED, &qdev->flags)
- && (test_bit(R5_UPTODATE, &qdev->flags) ||
- test_bit(R5_Discard, &qdev->flags))))))
- handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
-
/* Now we might consider reading some blocks, either to check/generate
* parity, or to satisfy requests
* or to load a block that is being partially written.
@@ -3851,15 +3863,6 @@ static void handle_stripe(struct stripe_
|| s.expanding)
handle_stripe_fill(sh, &s, disks);
- /* Now to consider new write requests and what else, if anything
- * should be read. We do not handle new writes when:
- * 1/ A 'write' operation (copy+xor) is already in flight.
- * 2/ A 'check' operation is in flight, as it may clobber the parity
- * block.
- */
- if (s.to_write && !sh->reconstruct_state && !sh->check_state)
- handle_stripe_dirtying(conf, sh, &s, disks);
-
/* maybe we need to check and possibly fix the parity for this stripe
* Any reads will already have been scheduled, so we just see if enough
* data is available. The parity check is held off while parity
@@ -4014,7 +4017,7 @@ finish:
}
if (s.ops_request)
- raid_run_ops(sh, s.ops_request);
+ raid_run_ops(sh, &s.ops_request);
ops_run_io(sh, &s);
next reply other threads:[~2014-05-12 1:16 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-12 1:16 Shaohua Li [this message]
2014-05-19 2:21 ` [RFC]raid5: adjust operation order of handle_stripe Shaohua Li
2014-05-19 2:42 ` NeilBrown
2014-05-20 7:21 ` NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140512011659.GA13135@kernel.org \
--to=shli@kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.