From: NeilBrown <neilb@suse.de>
To: linux-raid@vger.kernel.org
Subject: [md PATCH 11/16] md/raid1: improve handling of pages allocated for write-behind.
Date: Wed, 11 May 2011 16:30:31 +1000 [thread overview]
Message-ID: <20110511063031.21263.12794.stgit@notabene.brown> (raw)
In-Reply-To: <20110511062743.21263.72802.stgit@notabene.brown>
The current handling and freeing of these pages is a bit fragile.
We only keep the list of allocated pages in each bio, so we need to
still have a valid bio when freeing the pages, which is a bit clumsy.
So simply store the allocated page list in the r1_bio so it can easily
be found and freed when we are finished with the r1_bio.
Signed-off-by: NeilBrown <neilb@suse.de>
---
drivers/md/raid1.c | 55 +++++++++++++++++++++++++---------------------------
drivers/md/raid1.h | 4 +++-
2 files changed, 29 insertions(+), 30 deletions(-)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b9d6da1..779abbd 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error)
rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
}
-static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv,
- int behind)
+static void r1_bio_write_done(r1bio_t *r1_bio)
{
if (atomic_dec_and_test(&r1_bio->remaining))
{
/* it really is the end of this request */
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
/* free extra copy of the data pages */
- int i = vcnt;
+ int i = r1_bio->behind_page_count;
while (i--)
- safe_put_page(bv[i].bv_page);
+ safe_put_page(r1_bio->behind_pages[i]);
+ kfree(r1_bio->behind_pages);
+ r1_bio->behind_pages = NULL;
}
/* clear the bitmap if all writes complete successfully */
bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
r1_bio->sectors,
!test_bit(R1BIO_Degraded, &r1_bio->state),
- behind);
+ test_bit(R1BIO_BehindIO, &r1_bio->state));
md_write_end(r1_bio->mddev);
raid_end_bio_io(r1_bio);
}
@@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
* Let's see if all mirrored write operations have finished
* already.
*/
- r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind);
+ r1_bio_write_done(r1_bio);
if (to_put)
bio_put(to_put);
@@ -660,37 +661,36 @@ static void unfreeze_array(conf_t *conf)
/* duplicate the data pages for behind I/O
- * We return a list of bio_vec rather than just page pointers
- * as it makes freeing easier
*/
-static struct bio_vec *alloc_behind_pages(struct bio *bio)
+static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio)
{
int i;
struct bio_vec *bvec;
- struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
+ struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
GFP_NOIO);
if (unlikely(!pages))
- goto do_sync_io;
+ return;
bio_for_each_segment(bvec, bio, i) {
- pages[i].bv_page = alloc_page(GFP_NOIO);
- if (unlikely(!pages[i].bv_page))
+ pages[i] = alloc_page(GFP_NOIO);
+ if (unlikely(!pages[i]))
goto do_sync_io;
- memcpy(kmap(pages[i].bv_page) + bvec->bv_offset,
+ memcpy(kmap(pages[i]) + bvec->bv_offset,
kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
- kunmap(pages[i].bv_page);
+ kunmap(pages[i]);
kunmap(bvec->bv_page);
}
-
- return pages;
+ r1_bio->behind_pages = pages;
+ r1_bio->behind_page_count = bio->bi_vcnt;
+ set_bit(R1BIO_BehindIO, &r1_bio->state);
+ return;
do_sync_io:
- if (pages)
- for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++)
- put_page(pages[i].bv_page);
+ for (i = 0; i < bio->bi_vcnt; i++)
+ if (pages[i])
+ put_page(pages[i]);
kfree(pages);
PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
- return NULL;
}
static int make_request(mddev_t *mddev, struct bio * bio)
@@ -702,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
int i, targets = 0, disks;
struct bitmap *bitmap;
unsigned long flags;
- struct bio_vec *behind_pages = NULL;
const int rw = bio_data_dir(bio);
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
@@ -855,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
if (bitmap &&
(atomic_read(&bitmap->behind_writes)
< mddev->bitmap_info.max_write_behind) &&
- !waitqueue_active(&bitmap->behind_wait) &&
- (behind_pages = alloc_behind_pages(bio)) != NULL)
- set_bit(R1BIO_BehindIO, &r1_bio->state);
+ !waitqueue_active(&bitmap->behind_wait))
+ alloc_behind_pages(bio, r1_bio);
atomic_set(&r1_bio->remaining, 1);
atomic_set(&r1_bio->behind_remaining, 0);
@@ -878,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
mbio->bi_rw = WRITE | do_flush_fua | do_sync;
mbio->bi_private = r1_bio;
- if (behind_pages) {
+ if (r1_bio->behind_pages) {
struct bio_vec *bvec;
int j;
@@ -890,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
* them all
*/
__bio_for_each_segment(bvec, mbio, j, 0)
- bvec->bv_page = behind_pages[j].bv_page;
+ bvec->bv_page = r1_bio->behind_pages[j];
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
atomic_inc(&r1_bio->behind_remaining);
}
@@ -900,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
bio_list_add(&conf->pending_bio_list, mbio);
spin_unlock_irqrestore(&conf->device_lock, flags);
}
- r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
- kfree(behind_pages); /* the behind pages are attached to the bios now */
+ r1_bio_write_done(r1_bio);
/* In case raid1d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index cbfdf1a..5fc4ca1 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -94,7 +94,9 @@ struct r1bio_s {
int read_disk;
struct list_head retry_list;
- struct bitmap_update *bitmap_update;
+ /* Next two are only valid when R1BIO_BehindIO is set */
+ struct page **behind_pages;
+ int behind_page_count;
/*
* if the IO is in WRITE direction, then multiple bios are used.
* We choose the number when they are allocated.
next prev parent reply other threads:[~2011-05-11 6:30 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-05-11 6:30 [md PATCH 00/16] md patches for 2.6.40 NeilBrown
2011-05-11 6:30 ` [md PATCH 02/16] md: reject a re-add request that cannot be honoured NeilBrown
2011-05-11 6:30 ` [md PATCH 04/16] md: simplify raid10 read_balance NeilBrown
2011-05-11 6:30 ` [md PATCH 03/16] md/bitmap: fix saving of events_cleared and other state NeilBrown
2011-05-11 6:30 ` [md PATCH 01/16] md: Fix race when creating a new md device NeilBrown
2011-05-11 6:30 ` [md PATCH 05/16] md/raid1: clean up read_balance NeilBrown
2011-05-11 6:30 ` [md PATCH 08/16] md/raid1: split out two sub-functions from sync_request_write NeilBrown
2011-05-11 6:30 ` [md PATCH 13/16] md/raid10: make more use of 'slot' in raid10d NeilBrown
2011-05-11 6:30 ` [md PATCH 14/16] md/raid10: remove unused variable NeilBrown
2011-05-11 6:30 ` [md PATCH 10/16] md/raid1: try fix_sync_read_error before process_checks NeilBrown
2011-05-11 6:30 ` [md PATCH 15/16] md/raid10: reformat some loops with less indenting NeilBrown
2011-05-11 6:30 ` NeilBrown [this message]
2011-05-11 6:30 ` [md PATCH 07/16] md: make error_handler functions more uniform and correct NeilBrown
2011-05-11 6:30 ` [md PATCH 12/16] md/raid10: some tidying up in fix_read_error NeilBrown
2011-05-11 6:30 ` [md PATCH 16/16] md: allow resync_start to be set while an array is active NeilBrown
2011-05-11 6:30 ` [md PATCH 06/16] md/multipath: discard ->working_disks in favour of ->degraded NeilBrown
2011-05-11 6:30 ` [md PATCH 09/16] md/raid1: tidy up new functions: process_checks and fix_sync_read_error NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110511063031.21263.12794.stgit@notabene.brown \
--to=neilb@suse.de \
--cc=linux-raid@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).