From: Ming Lei <tom.leiming@gmail.com>
To: Shaohua Li <shli@kernel.org>, Jens Axboe <axboe@fb.com>,
linux-raid@vger.kernel.org, linux-block@vger.kernel.org,
Christoph Hellwig <hch@infradead.org>
Cc: Ming Lei <tom.leiming@gmail.com>
Subject: [PATCH v3 10/14] md: raid1: improve write behind
Date: Fri, 17 Mar 2017 00:12:31 +0800 [thread overview]
Message-ID: <20170316161235.27110-11-tom.leiming@gmail.com> (raw)
In-Reply-To: <20170316161235.27110-1-tom.leiming@gmail.com>
This patch improve handling of write behind in the following ways:
- introduce behind master bio to hold all write behind pages
- fast clone bios from behind master bio
- avoid to change bvec table directly
- use bio_copy_data() and make code more clean
Suggested-by: Shaohua Li <shli@fb.com>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
---
drivers/md/raid1.c | 118 ++++++++++++++++++++++++-----------------------------
drivers/md/raid1.h | 10 +++--
2 files changed, 61 insertions(+), 67 deletions(-)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 2f3622c695ce..3c13286190c1 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -405,12 +405,9 @@ static void close_write(struct r1bio *r1_bio)
{
/* it really is the end of this request */
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
- /* free extra copy of the data pages */
- int i = r1_bio->behind_page_count;
- while (i--)
- safe_put_page(r1_bio->behind_bvecs[i].bv_page);
- kfree(r1_bio->behind_bvecs);
- r1_bio->behind_bvecs = NULL;
+ bio_free_pages(r1_bio->behind_master_bio);
+ bio_put(r1_bio->behind_master_bio);
+ r1_bio->behind_master_bio = NULL;
}
/* clear the bitmap if all writes complete successfully */
bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
@@ -512,6 +509,10 @@ static void raid1_end_write_request(struct bio *bio)
}
if (behind) {
+ /* we release behind master bio when all write are done */
+ if (r1_bio->behind_master_bio == bio)
+ to_put = NULL;
+
if (test_bit(WriteMostly, &rdev->flags))
atomic_dec(&r1_bio->behind_remaining);
@@ -1096,39 +1097,46 @@ static void unfreeze_array(struct r1conf *conf)
wake_up(&conf->wait_barrier);
}
-/* duplicate the data pages for behind I/O
- */
-static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio)
+static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio,
+ struct bio *bio,
+ int offset, int size)
{
- int i;
- struct bio_vec *bvec;
- struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
- GFP_NOIO);
- if (unlikely(!bvecs))
- return;
+ unsigned vcnt = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ int i = 0;
+ struct bio *behind_bio = NULL;
+
+ behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev);
+ if (!behind_bio)
+ goto fail;
+
+ while (i < vcnt && size) {
+ struct page *page;
+ int len = min_t(int, PAGE_SIZE, size);
+
+ page = alloc_page(GFP_NOIO);
+ if (unlikely(!page))
+ goto free_pages;
+
+ bio_add_page(behind_bio, page, len, 0);
+
+ size -= len;
+ i++;
+ }
- bio_for_each_segment_all(bvec, bio, i) {
- bvecs[i] = *bvec;
- bvecs[i].bv_page = alloc_page(GFP_NOIO);
- if (unlikely(!bvecs[i].bv_page))
- goto do_sync_io;
- memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset,
- kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
- kunmap(bvecs[i].bv_page);
- kunmap(bvec->bv_page);
- }
- r1_bio->behind_bvecs = bvecs;
- r1_bio->behind_page_count = bio->bi_vcnt;
+ bio_copy_data_partial(behind_bio, bio, offset,
+ behind_bio->bi_iter.bi_size);
+
+ r1_bio->behind_master_bio = behind_bio;;
set_bit(R1BIO_BehindIO, &r1_bio->state);
- return;
-do_sync_io:
- for (i = 0; i < bio->bi_vcnt; i++)
- if (bvecs[i].bv_page)
- put_page(bvecs[i].bv_page);
- kfree(bvecs);
+ return behind_bio;
+
+ free_pages:
pr_debug("%dB behind alloc failed, doing sync I/O\n",
bio->bi_iter.bi_size);
+ bio_free_pages(behind_bio);
+ fail:
+ return behind_bio;
}
struct raid1_plug_cb {
@@ -1499,11 +1507,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
(atomic_read(&bitmap->behind_writes)
< mddev->bitmap_info.max_write_behind) &&
!waitqueue_active(&bitmap->behind_wait)) {
- mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
- mddev->bio_set,
- offset << 9,
- max_sectors << 9);
- alloc_behind_pages(mbio, r1_bio);
+ mbio = alloc_behind_master_bio(r1_bio, bio,
+ offset << 9,
+ max_sectors << 9);
}
bitmap_startwrite(bitmap, r1_bio->sector,
@@ -1514,26 +1520,17 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
}
if (!mbio) {
- if (r1_bio->behind_bvecs)
- mbio = bio_clone_bioset_partial(bio, GFP_NOIO,
- mddev->bio_set,
- offset << 9,
- max_sectors << 9);
+ if (r1_bio->behind_master_bio)
+ mbio = bio_clone_fast(r1_bio->behind_master_bio,
+ GFP_NOIO,
+ mddev->bio_set);
else {
mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
bio_trim(mbio, offset, max_sectors);
}
}
- if (r1_bio->behind_bvecs) {
- struct bio_vec *bvec;
- int j;
-
- /*
- * We trimmed the bio, so _all is legit
- */
- bio_for_each_segment_all(bvec, mbio, j)
- bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
+ if (r1_bio->behind_master_bio) {
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
atomic_inc(&r1_bio->behind_remaining);
}
@@ -2405,18 +2402,11 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
/* Write at 'sector' for 'sectors'*/
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
- unsigned vcnt = r1_bio->behind_page_count;
- struct bio_vec *vec = r1_bio->behind_bvecs;
-
- while (!vec->bv_page) {
- vec++;
- vcnt--;
- }
-
- wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
- memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
-
- wbio->bi_vcnt = vcnt;
+ wbio = bio_clone_fast(r1_bio->behind_master_bio,
+ GFP_NOIO,
+ mddev->bio_set);
+ /* We really need a _all clone */
+ wbio->bi_iter = (struct bvec_iter){ 0 };
} else {
wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
mddev->bio_set);
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index dd22a37d0d83..4271cd7ac2de 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -153,9 +153,13 @@ struct r1bio {
int read_disk;
struct list_head retry_list;
- /* Next two are only valid when R1BIO_BehindIO is set */
- struct bio_vec *behind_bvecs;
- int behind_page_count;
+
+ /*
+ * When R1BIO_BehindIO is set, we store pages for write behind
+ * in behind_master_bio.
+ */
+ struct bio *behind_master_bio;
+
/*
* if the IO is in WRITE direction, then multiple bios are used.
* We choose the number when they are allocated.
--
2.9.3
next prev parent reply other threads:[~2017-03-16 16:12 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-03-16 16:12 [PATCH v3 00/14] md: cleanup on direct access to bvec table Ming Lei
2017-03-16 16:12 ` [PATCH v3 01/14] md: raid1/raid10: don't handle failure of bio_add_page() Ming Lei
2017-03-27 9:14 ` Christoph Hellwig
2017-03-16 16:12 ` [PATCH v3 02/14] md: move two macros into md.h Ming Lei
2017-03-24 5:57 ` NeilBrown
2017-03-24 6:30 ` Ming Lei
2017-03-24 16:53 ` Shaohua Li
2017-03-27 9:15 ` Christoph Hellwig
2017-03-27 9:52 ` NeilBrown
2017-03-16 16:12 ` [PATCH v3 03/14] md: prepare for managing resync I/O pages in clean way Ming Lei
2017-03-24 6:00 ` NeilBrown
2017-03-16 16:12 ` [PATCH v3 04/14] md: raid1: simplify r1buf_pool_free() Ming Lei
2017-03-16 16:12 ` [PATCH v3 05/14] md: raid1: don't use bio's vec table to manage resync pages Ming Lei
2017-07-09 23:09 ` NeilBrown
2017-07-10 3:35 ` Ming Lei
2017-07-10 4:13 ` Ming Lei
2017-07-10 4:38 ` NeilBrown
2017-07-10 7:25 ` Ming Lei
2017-07-10 19:05 ` Shaohua Li
2017-07-10 22:54 ` Ming Lei
2017-07-10 23:14 ` NeilBrown
2017-07-12 1:40 ` Ming Lei
2017-07-12 16:30 ` Shaohua Li
2017-07-13 1:22 ` Ming Lei
2017-03-16 16:12 ` [PATCH v3 06/14] md: raid1: retrieve page from pre-allocated resync page array Ming Lei
2017-03-16 16:12 ` [PATCH v3 07/14] md: raid1: use bio helper in process_checks() Ming Lei
2017-03-16 16:12 ` [PATCH v3 08/14] block: introduce bio_copy_data_partial Ming Lei
2017-03-24 5:34 ` Shaohua Li
2017-03-24 16:41 ` Jens Axboe
2017-03-16 16:12 ` [PATCH v3 09/14] md: raid1: move 'offset' out of loop Ming Lei
2017-03-16 16:12 ` Ming Lei [this message]
2017-03-16 16:12 ` [PATCH v3 11/14] md: raid10: refactor code of read reshape's .bi_end_io Ming Lei
2017-03-16 16:12 ` [PATCH v3 12/14] md: raid10: don't use bio's vec table to manage resync pages Ming Lei
2017-03-16 16:12 ` [PATCH v3 13/14] md: raid10: retrieve page from preallocated resync page array Ming Lei
2017-03-16 16:12 ` [PATCH v3 14/14] md: raid10: avoid direct access to bvec table in handle_reshape_read_error Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170316161235.27110-11-tom.leiming@gmail.com \
--to=tom.leiming@gmail.com \
--cc=axboe@fb.com \
--cc=hch@infradead.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=shli@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).