From: Shaohua Li <shli@kernel.org>
To: linux-raid@vger.kernel.org
Cc: neilb@suse.de, axboe@kernel.dk, dan.j.williams@intel.com,
shli@fusionio.com
Subject: [patch 08/10 v3] raid5: make_request use batch stripe release
Date: Mon, 25 Jun 2012 15:24:55 +0800 [thread overview]
Message-ID: <20120625072702.921605418@kernel.org> (raw)
In-Reply-To: 20120625072447.268095276@kernel.org
[-- Attachment #1: raid5-make_request-relase_stripe-batch.patch --]
[-- Type: text/plain, Size: 4473 bytes --]
make_request() does stripe release for every stripe and the stripe usually has
count 1, which makes previous release_stripe() optimization not work. In my
test, this release_stripe() becomes the heaviest pleace to take
conf->device_lock after previous patches applied.
Below patch makes stripe release batch. All the stripes will be released in
unplug. The STRIPE_ON_UNPLUG_LIST bit is to protect concurrent access stripe
lru.
Signed-off-by: Shaohua Li <shli@fusionio.com>
---
drivers/md/raid5.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++-----
drivers/md/raid5.h | 1
2 files changed, 60 insertions(+), 5 deletions(-)
Index: linux/drivers/md/raid5.c
===================================================================
--- linux.orig/drivers/md/raid5.c 2012-06-25 14:38:33.110889008 +0800
+++ linux/drivers/md/raid5.c 2012-06-25 14:38:37.378835415 +0800
@@ -484,7 +484,8 @@ get_active_stripe(struct r5conf *conf, s
} else {
if (atomic_read(&sh->count)) {
BUG_ON(!list_empty(&sh->lru)
- && !test_bit(STRIPE_EXPANDING, &sh->state));
+ && !test_bit(STRIPE_EXPANDING, &sh->state)
+ && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state));
} else {
if (!test_bit(STRIPE_HANDLE, &sh->state))
atomic_inc(&conf->active_stripes);
@@ -3984,6 +3985,51 @@ static struct stripe_head *__get_priorit
return sh;
}
+#define raid5_unplug_list(mdcb) (struct list_head *)(mdcb + 1)
+static void raid5_unplug(struct md_plug_cb *mdcb)
+{
+ struct list_head *list = raid5_unplug_list(mdcb);
+ struct stripe_head *sh;
+ struct r5conf *conf = mdcb->mddev->private;
+
+ if (list->next == NULL || list_empty(list))
+ return;
+ spin_lock_irq(&conf->device_lock);
+ while (!list_empty(list)) {
+ sh = list_entry(list->next, struct stripe_head, lru);
+ list_del_init(&sh->lru);
+ /*
+ * avoid race release_stripe_plug() sees STRIPE_ON_UNPLUG_LIST
+ * clear but the stripe is still in our list
+ */
+ smp_mb__before_clear_bit();
+ clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
+ __release_stripe(conf, sh);
+ }
+ spin_unlock_irq(&conf->device_lock);
+}
+
+static void release_stripe_plug(struct md_plug_cb *mdcb,
+ struct stripe_head *sh)
+{
+ struct list_head *list = raid5_unplug_list(mdcb);
+
+ if (!mdcb) {
+ release_stripe(sh);
+ return;
+ }
+
+ if (list->next == NULL) {
+ INIT_LIST_HEAD(list);
+ mdcb->unplug = raid5_unplug;
+ }
+
+ if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
+ list_add_tail(&sh->lru, list);
+ else
+ release_stripe(sh);
+}
+
static void make_request(struct mddev *mddev, struct bio * bi)
{
struct r5conf *conf = mddev->private;
@@ -3993,7 +4039,7 @@ static void make_request(struct mddev *m
struct stripe_head *sh;
const int rw = bio_data_dir(bi);
int remaining;
- int plugged;
+ struct md_plug_cb *mdcb;
if (unlikely(bi->bi_rw & REQ_FLUSH)) {
md_flush_request(mddev, bi);
@@ -4012,7 +4058,8 @@ static void make_request(struct mddev *m
bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
- plugged = !!mddev_check_plugged(mddev, NULL, 0);
+ mdcb = mddev_check_plugged(mddev, raid5_unplug,
+ sizeof(struct list_head));
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);
int previous;
@@ -4114,7 +4161,14 @@ static void make_request(struct mddev *m
if ((bi->bi_rw & REQ_SYNC) &&
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
- release_stripe(sh);
+ /*
+ * We must recheck here. schedule() might be called
+ * above which makes unplug invoked already, so the old
+ * mdcb is invalid
+ */
+ mdcb = mddev_check_plugged(mddev, raid5_unplug,
+ sizeof(struct list_head));
+ release_stripe_plug(mdcb, sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -4123,7 +4177,7 @@ static void make_request(struct mddev *m
}
}
- if (!plugged)
+ if (!mdcb)
md_wakeup_thread(mddev->thread);
remaining = raid5_dec_bi_active_stripes(bi);
Index: linux/drivers/md/raid5.h
===================================================================
--- linux.orig/drivers/md/raid5.h 2012-06-25 14:37:13.651888057 +0800
+++ linux/drivers/md/raid5.h 2012-06-25 14:38:37.382835318 +0800
@@ -320,6 +320,7 @@ enum {
STRIPE_BIOFILL_RUN,
STRIPE_COMPUTE_RUN,
STRIPE_OPS_REQ_PENDING,
+ STRIPE_ON_UNPLUG_LIST,
};
/*
next prev parent reply other threads:[~2012-06-25 7:24 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-25 7:24 [patch 00/10 v3] raid5: improve write performance for fast storage Shaohua Li
2012-06-25 7:24 ` [patch 01/10 v3] raid5: use wake_up_all for overlap waking Shaohua Li
2012-06-28 7:26 ` NeilBrown
2012-06-28 8:53 ` Shaohua Li
2012-06-25 7:24 ` [patch 02/10 v3] raid5: delayed stripe fix Shaohua Li
2012-07-02 0:46 ` NeilBrown
2012-07-02 0:49 ` Shaohua Li
2012-07-02 0:55 ` NeilBrown
2012-06-25 7:24 ` [patch 03/10 v3] raid5: add a per-stripe lock Shaohua Li
2012-07-02 0:50 ` NeilBrown
2012-07-02 3:16 ` Shaohua Li
2012-07-02 7:39 ` NeilBrown
2012-07-03 1:27 ` Shaohua Li
2012-07-03 12:16 ` majianpeng
2012-07-03 23:56 ` NeilBrown
2012-07-04 1:09 ` majianpeng
2012-06-25 7:24 ` [patch 04/10 v3] raid5: lockless access raid5 overrided bi_phys_segments Shaohua Li
2012-06-25 7:24 ` [patch 05/10 v3] raid5: remove some device_lock locking places Shaohua Li
2012-06-25 7:24 ` [patch 06/10 v3] raid5: reduce chance release_stripe() taking device_lock Shaohua Li
2012-07-02 0:57 ` NeilBrown
2012-06-25 7:24 ` [patch 07/10 v3] md: personality can provide unplug private data Shaohua Li
2012-07-02 1:06 ` NeilBrown
2012-06-25 7:24 ` Shaohua Li [this message]
2012-07-02 2:31 ` [patch 08/10 v3] raid5: make_request use batch stripe release NeilBrown
2012-07-02 2:59 ` Shaohua Li
2012-07-02 5:07 ` NeilBrown
2012-06-25 7:24 ` [patch 09/10 v3] raid5: raid5d handle stripe in batch way Shaohua Li
2012-07-02 2:32 ` NeilBrown
2012-06-25 7:24 ` [patch 10/10 v3] raid5: create multiple threads to handle stripes Shaohua Li
2012-07-02 2:39 ` NeilBrown
2012-07-02 20:03 ` Dan Williams
2012-07-03 8:04 ` Shaohua Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120625072702.921605418@kernel.org \
--to=shli@kernel.org \
--cc=axboe@kernel.dk \
--cc=dan.j.williams@intel.com \
--cc=linux-raid@vger.kernel.org \
--cc=neilb@suse.de \
--cc=shli@fusionio.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.