From: Kevin Corry <corryk@us.ibm.com>
To: Joe Thornber <joe@fib011235813.fsnet.co.uk>,
Linus Torvalds <torvalds@transmeta.com>
Cc: Kernel Mailing List <linux-kernel@vger.kernel.org>,
lvm-devel@sistina.com
Subject: [PATCH] dm.c - device-mapper I/O path fixes
Date: Tue, 10 Dec 2002 16:03:47 -0600 [thread overview]
Message-ID: <02121016034706.02220@boiler> (raw)
Joe, Linus,
This patch fixes problems with the device-mapper I/O path in 2.5.51. The
existing code does not properly split requests when necessary, and can
cause segfaults and/or data corruption. This can easily manifest itself
when running XFS on striped LVM volumes.
Notes:
- New bio's must be alloc'd instead of clone'd, since the bio vector may need
to be adjusted if the end of the split request does not fill a page.
- Fix reference counting of md->pending. This should only be incremented once
for each incoming bio, not for each split bio that is resubmitted.
- Copy the correct bvec when splitting the tail-end of a page.
--
Kevin Corry
corryk@us.ibm.com
http://evms.sourceforge.net/
--- linux-2.5.51a/drivers/md/dm.c Tue Dec 10 11:01:13 2002
+++ linux-2.5.51b/drivers/md/dm.c Tue Dec 10 11:03:55 2002
@@ -242,17 +242,18 @@
static spinlock_t _uptodate_lock = SPIN_LOCK_UNLOCKED;
unsigned long flags;
- spin_lock_irqsave(&_uptodate_lock, flags);
- if (error)
+ if (error) {
+ spin_lock_irqsave(&_uptodate_lock, flags);
io->error = error;
- spin_unlock_irqrestore(&_uptodate_lock, flags);
+ spin_unlock_irqrestore(&_uptodate_lock, flags);
+ }
if (atomic_dec_and_test(&io->io_count)) {
if (atomic_dec_and_test(&io->md->pending))
/* nudge anyone waiting on suspend queue */
wake_up(&io->md->wait);
- bio_endio(io->bio, io->error ? 0 : io->bio->bi_size, io->error);
+ bio_endio(io->bio, io->bio->bi_size, io->error);
free_io(io);
}
}
@@ -261,15 +262,15 @@
{
struct dm_io *io = bio->bi_private;
- /*
- * Only call dec_pending if the clone has completely
- * finished. If a partial io errors I'm assuming it won't
- * be requeued. FIXME: check this.
- */
- if (error || !bio->bi_size) {
- dec_pending(io, error);
- bio_put(bio);
+ if (bio->bi_size)
+ return 1;
+
+ if (error) {
+ struct gendisk *disk = dm_disk(io->md);
+ DMWARN("I/O error (%d) on device %s\n", error, disk->disk_name);
}
+ dec_pending(io, error);
+ bio_put(bio);
return 0;
}
@@ -313,7 +314,6 @@
* anything, the target has assumed ownership of
* this io.
*/
- atomic_inc(&io->md->pending);
atomic_inc(&io->io_count);
r = ti->type->map(ti, clone);
if (r > 0)
@@ -341,9 +341,7 @@
{
struct dm_target *ti = dm_table_find_target(ci->md->map, ci->sector);
struct bio *clone, *bio = ci->bio;
- struct bio_vec *bv = bio->bi_io_vec + (bio->bi_vcnt - 1);
-
- DMWARN("splitting page");
+ struct bio_vec *bv = bio->bi_io_vec + ci->idx;
if (len > ci->sector_count)
len = ci->sector_count;
@@ -353,11 +351,13 @@
clone->bi_sector = ci->sector;
clone->bi_bdev = bio->bi_bdev;
- clone->bi_flags = bio->bi_flags | (1 << BIO_SEG_VALID);
clone->bi_rw = bio->bi_rw;
+ clone->bi_vcnt = 1;
clone->bi_size = len << SECTOR_SHIFT;
clone->bi_end_io = clone_endio;
clone->bi_private = ci->io;
+ clone->bi_io_vec->bv_offset = clone->bi_io_vec->bv_len - clone->bi_size;
+ clone->bi_io_vec->bv_len = clone->bi_size;
ci->sector += len;
ci->sector_count -= len;
@@ -369,24 +369,48 @@
{
struct bio *clone, *bio = ci->bio;
struct dm_target *ti = dm_table_find_target(ci->md->map, ci->sector);
- sector_t len = max_io_len(ci->md, bio->bi_sector, ti);
+ sector_t bv_len, len = max_io_len(ci->md, ci->sector, ti);
+ struct bio_vec *bv;
+ int i, vcnt = bio->bi_vcnt - ci->idx;
/* shorter than current target ? */
if (ci->sector_count < len)
len = ci->sector_count;
/* create the clone */
- clone = bio_clone(ci->bio, GFP_NOIO);
+ clone = bio_alloc(GFP_NOIO, vcnt);
+ if (!clone) {
+ dec_pending(ci->io, -ENOMEM);
+ return;
+ }
clone->bi_sector = ci->sector;
- clone->bi_idx = ci->idx;
+ clone->bi_bdev = bio->bi_bdev;
+ clone->bi_rw = bio->bi_rw;
+ clone->bi_vcnt = vcnt;
clone->bi_size = len << SECTOR_SHIFT;
clone->bi_end_io = clone_endio;
clone->bi_private = ci->io;
+ /* copy the original vector and adjust if necessary. */
+ memcpy(clone->bi_io_vec, bio->bi_io_vec + ci->idx,
+ vcnt * sizeof(*clone->bi_io_vec));
+ bv_len = len << SECTOR_SHIFT;
+ bio_for_each_segment(bv, clone, i) {
+ if (bv_len >= bv->bv_len) {
+ bv_len -= bv->bv_len;
+ } else {
+ bv->bv_len = bv_len;
+ clone->bi_vcnt = i + 1;
+ break;
+ }
+ }
+
+ /* submit this io */
+ __map_bio(ti, clone);
+
/* adjust the remaining io */
ci->sector += len;
ci->sector_count -= len;
- __map_bio(ti, clone);
/*
* If we are not performing all remaining io in this
@@ -395,8 +419,8 @@
*/
if (ci->sector_count) {
while (len) {
- struct bio_vec *bv = clone->bi_io_vec + ci->idx;
- sector_t bv_len = bv->bv_len >> SECTOR_SHIFT;
+ bv = bio->bi_io_vec + ci->idx;
+ bv_len = bv->bv_len >> SECTOR_SHIFT;
if (bv_len <= len)
len -= bv_len;
@@ -427,6 +451,8 @@
ci.sector_count = bio_sectors(bio);
ci.idx = 0;
+ atomic_inc(&ci.io->md->pending);
+
while (ci.sector_count)
__clone_and_map(&ci);
@@ -457,13 +483,13 @@
up_read(&md->lock);
if (bio_rw(bio) == READA) {
- bio_io_error(bio, 0);
+ bio_io_error(bio, bio->bi_size);
return 0;
}
r = queue_io(md, bio);
if (r < 0) {
- bio_io_error(bio, 0);
+ bio_io_error(bio, bio->bi_size);
return 0;
} else if (r == 0)
next reply other threads:[~2002-12-10 22:42 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2002-12-10 22:03 Kevin Corry [this message]
2002-12-11 12:17 ` [PATCH] dm.c - device-mapper I/O path fixes Joe Thornber
2002-12-11 12:19 ` Joe Thornber
2002-12-11 18:19 ` Denis Vlasenko
2002-12-11 13:16 ` Kevin Corry
2002-12-11 14:18 ` Joe Thornber
2002-12-11 19:24 ` Denis Vlasenko
2002-12-11 14:06 ` Kevin Corry
2002-12-11 21:12 ` Paul Mackerras
2002-12-12 12:30 ` Kevin Corry
2002-12-11 19:19 ` Denis Vlasenko
2002-12-11 14:02 ` Kevin Corry
2002-12-11 15:12 ` [lvm-devel] " Joe Thornber
2002-12-11 14:58 ` Joe Thornber
2002-12-11 12:19 ` Joe Thornber
2002-12-11 12:20 ` Joe Thornber
2002-12-11 12:21 ` Joe Thornber
2002-12-11 12:52 ` [lvm-devel] " Kevin Corry
2002-12-16 0:50 ` Linus Torvalds
2002-12-16 10:04 ` Joe Thornber
2002-12-16 10:06 ` 1/19 Joe Thornber
2002-12-16 17:07 ` 1/19 Linus Torvalds
2002-12-16 10:06 ` 2/19 Joe Thornber
2002-12-16 10:07 ` 3/19 Joe Thornber
2002-12-16 10:08 ` 4/19 Joe Thornber
2002-12-16 10:09 ` 5/19 Joe Thornber
2002-12-16 10:09 ` 6/19 Joe Thornber
2002-12-16 10:35 ` 6/19 Tomas Szepe
2002-12-16 10:38 ` 6/19 Tomas Szepe
2002-12-16 10:10 ` 7/19 Joe Thornber
2002-12-16 10:11 ` 8/19 Joe Thornber
2002-12-16 10:11 ` 9/19 Joe Thornber
2002-12-16 10:12 ` 10/19 Joe Thornber
2002-12-16 10:13 ` 11/19 Joe Thornber
2002-12-16 10:14 ` 12/19 Joe Thornber
2002-12-16 10:14 ` 13/19 Joe Thornber
2002-12-16 10:15 ` 14/19 Joe Thornber
2002-12-16 10:16 ` 15/19 Joe Thornber
2002-12-16 10:16 ` 16/19 Joe Thornber
2002-12-16 10:17 ` 17/19 Joe Thornber
2002-12-16 10:18 ` 18/19 Joe Thornber
2002-12-16 10:19 ` 19/19 Joe Thornber
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=02121016034706.02220@boiler \
--to=corryk@us.ibm.com \
--cc=joe@fib011235813.fsnet.co.uk \
--cc=linux-kernel@vger.kernel.org \
--cc=lvm-devel@sistina.com \
--cc=torvalds@transmeta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox