From: Christoph Hellwig <hch@lst.de>
To: Jan Kara <jack@suse.cz>,
jaxboe@fusionio.com, tj@kernel.org, James.Bottomley@suse.de,
linux-fsdevel@vger.kernel.org, linux-scsi@vger.kernel.org,
tytso@mit.edu, chris.maso
Subject: Re: [PATCH, RFC 2/2] dm: support REQ_FLUSH directly
Date: Tue, 3 Aug 2010 20:51:48 +0200 [thread overview]
Message-ID: <20100803185148.GA12258@lst.de> (raw)
In-Reply-To: <20100803184939.GA12198@lst.de>
Adapt device-mapper to the new world order where even bio based devices
get simple REQ_FLUSH requests for cache flushes, and need to submit
them downwards for implementing barriers.
Note that I've removed the unlikely statements around the REQ_FLUSH
checks. While these generally aren't as common as normal read/writes
they are common enough that statically mispredictim them is a really
bad idea.
Tested with simple linear LVM volumes only so far.
Index: linux-2.6/drivers/md/dm-crypt.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-crypt.c 2010-08-03 20:26:49.629254174 +0200
+++ linux-2.6/drivers/md/dm-crypt.c 2010-08-03 20:36:59.279003929 +0200
@@ -1249,7 +1249,7 @@ static int crypt_map(struct dm_target *t
struct dm_crypt_io *io;
struct crypt_config *cc;
- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
cc = ti->private;
bio->bi_bdev = cc->dev->bdev;
return DM_MAPIO_REMAPPED;
Index: linux-2.6/drivers/md/dm-raid1.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-raid1.c 2010-08-03 20:26:49.641003999 +0200
+++ linux-2.6/drivers/md/dm-raid1.c 2010-08-03 20:36:59.280003649 +0200
@@ -629,7 +629,7 @@ static void do_write(struct mirror_set *
struct dm_io_region io[ms->nr_mirrors], *dest = io;
struct mirror *m;
struct dm_io_request io_req = {
- .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER),
+ .bi_rw = WRITE | (bio->bi_rw & (WRITE_BARRIER|REQ_FLUSH)),
.mem.type = DM_IO_BVEC,
.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
.notify.fn = write_callback,
@@ -670,7 +670,7 @@ static void do_writes(struct mirror_set
bio_list_init(&requeue);
while ((bio = bio_list_pop(writes))) {
- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
bio_list_add(&sync, bio);
continue;
}
@@ -1199,12 +1199,14 @@ static int mirror_end_io(struct dm_targe
struct dm_bio_details *bd = NULL;
struct dm_raid1_read_record *read_record = map_context->ptr;
+ if (bio->bi_rw & REQ_FLUSH)
+ return error;
+
/*
* We need to dec pending if this was a write.
*/
if (rw == WRITE) {
- if (likely(!bio_empty_barrier(bio)))
- dm_rh_dec(ms->rh, map_context->ll);
+ dm_rh_dec(ms->rh, map_context->ll);
return error;
}
Index: linux-2.6/drivers/md/dm-region-hash.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-region-hash.c 2010-08-03 20:26:49.650023346 +0200
+++ linux-2.6/drivers/md/dm-region-hash.c 2010-08-03 20:36:59.285025649 +0200
@@ -399,7 +399,7 @@ void dm_rh_mark_nosync(struct dm_region_
region_t region = dm_rh_bio_to_region(rh, bio);
int recovering = 0;
- if (bio_empty_barrier(bio)) {
+ if (bio->bi_rw & REQ_FLUSH) {
rh->barrier_failure = 1;
return;
}
@@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_
struct bio *bio;
for (bio = bios->head; bio; bio = bio->bi_next) {
- if (bio_empty_barrier(bio))
+ if (bio->bi_rw & REQ_FLUSH)
continue;
rh_inc(rh, dm_rh_bio_to_region(rh, bio));
}
Index: linux-2.6/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-snap.c 2010-08-03 20:26:49.656003091 +0200
+++ linux-2.6/drivers/md/dm-snap.c 2010-08-03 20:36:59.290023135 +0200
@@ -1581,7 +1581,7 @@ static int snapshot_map(struct dm_target
chunk_t chunk;
struct dm_snap_pending_exception *pe = NULL;
- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
bio->bi_bdev = s->cow->bdev;
return DM_MAPIO_REMAPPED;
}
@@ -1685,7 +1685,7 @@ static int snapshot_merge_map(struct dm_
int r = DM_MAPIO_REMAPPED;
chunk_t chunk;
- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
if (!map_context->flush_request)
bio->bi_bdev = s->origin->bdev;
else
@@ -2123,7 +2123,7 @@ static int origin_map(struct dm_target *
struct dm_dev *dev = ti->private;
bio->bi_bdev = dev->bdev;
- if (unlikely(bio_empty_barrier(bio)))
+ if (bio->bi_rw & REQ_FLUSH)
return DM_MAPIO_REMAPPED;
/* Only tell snapshots if this is a write */
Index: linux-2.6/drivers/md/dm-stripe.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-stripe.c 2010-08-03 20:26:49.663003301 +0200
+++ linux-2.6/drivers/md/dm-stripe.c 2010-08-03 20:36:59.295005744 +0200
@@ -214,7 +214,7 @@ static int stripe_map(struct dm_target *
sector_t offset, chunk;
uint32_t stripe;
- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
BUG_ON(map_context->flush_request >= sc->stripes);
bio->bi_bdev = sc->stripe[map_context->flush_request].dev->bdev;
return DM_MAPIO_REMAPPED;
Index: linux-2.6/drivers/md/dm.c
===================================================================
--- linux-2.6.orig/drivers/md/dm.c 2010-08-03 20:26:49.676004139 +0200
+++ linux-2.6/drivers/md/dm.c 2010-08-03 20:36:59.301005325 +0200
@@ -633,7 +633,7 @@ static void dec_pending(struct dm_io *io
io_error = io->error;
bio = io->bio;
- if (bio->bi_rw & REQ_HARDBARRIER) {
+ if (bio == &md->barrier_bio) {
/*
* There can be just one barrier request so we use
* a per-device variable for error reporting.
@@ -851,7 +851,7 @@ void dm_requeue_unmapped_request(struct
struct request_queue *q = rq->q;
unsigned long flags;
- if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
+ if (clone->cmd_flags & REQ_HARDBARRIER) {
/*
* Barrier clones share an original request.
* Leave it to dm_end_request(), which handles this special
@@ -950,7 +950,7 @@ static void dm_complete_request(struct r
struct dm_rq_target_io *tio = clone->end_io_data;
struct request *rq = tio->orig;
- if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
+ if (clone->cmd_flags & REQ_HARDBARRIER) {
/*
* Barrier clones share an original request. So can't use
* softirq_done with the original.
@@ -979,7 +979,7 @@ void dm_kill_unmapped_request(struct req
struct dm_rq_target_io *tio = clone->end_io_data;
struct request *rq = tio->orig;
- if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
+ if (clone->cmd_flags & REQ_HARDBARRIER) {
/*
* Barrier clones share an original request.
* Leave it to dm_end_request(), which handles this special
@@ -1208,7 +1208,7 @@ static int __clone_and_map(struct clone_
sector_t len = 0, max;
struct dm_target_io *tio;
- if (unlikely(bio_empty_barrier(bio)))
+ if (bio->bi_rw & REQ_FLUSH)
return __clone_and_map_empty_barrier(ci);
ti = dm_table_find_target(ci->map, ci->sector);
@@ -1308,7 +1308,7 @@ static void __split_and_process_bio(stru
ci.map = dm_get_live_table(md);
if (unlikely(!ci.map)) {
- if (!(bio->bi_rw & REQ_HARDBARRIER))
+ if (bio != &md->barrier_bio)
bio_io_error(bio);
else
if (!md->barrier_error)
@@ -1326,7 +1326,7 @@ static void __split_and_process_bio(stru
spin_lock_init(&ci.io->endio_lock);
ci.sector = bio->bi_sector;
ci.sector_count = bio_sectors(bio);
- if (unlikely(bio_empty_barrier(bio)))
+ if (bio->bi_rw & REQ_FLUSH)
ci.sector_count = 1;
ci.idx = bio->bi_idx;
@@ -1421,7 +1421,7 @@ static int _dm_request(struct request_qu
* we have to queue this io for later.
*/
if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
- unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
+ unlikely(bio->bi_rw & (REQ_HARDBARRIER|REQ_FLUSH))) {
up_read(&md->io_lock);
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1462,14 +1462,6 @@ static int dm_request(struct request_que
return _dm_request(q, bio);
}
-static bool dm_rq_is_flush_request(struct request *rq)
-{
- if (rq->cmd_flags & REQ_FLUSH)
- return true;
- else
- return false;
-}
-
void dm_dispatch_request(struct request *rq)
{
int r;
@@ -1517,10 +1509,10 @@ static int setup_clone(struct request *c
{
int r;
- if (dm_rq_is_flush_request(rq)) {
+ if (rq->cmd_flags & REQ_FLUSH) {
blk_rq_init(NULL, clone);
clone->cmd_type = REQ_TYPE_FS;
- clone->cmd_flags |= (REQ_HARDBARRIER | WRITE);
+ clone->cmd_flags |= (WRITE_SYNC | REQ_FLUSH);
} else {
r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
dm_rq_bio_constructor, tio);
@@ -1573,7 +1565,7 @@ static int dm_prep_fn(struct request_que
struct mapped_device *md = q->queuedata;
struct request *clone;
- if (unlikely(dm_rq_is_flush_request(rq)))
+ if (rq->cmd_flags & REQ_FLUSH)
return BLKPREP_OK;
if (unlikely(rq->special)) {
@@ -1664,7 +1656,7 @@ static void dm_request_fn(struct request
if (!rq)
goto plug_and_out;
- if (unlikely(dm_rq_is_flush_request(rq))) {
+ if (rq->cmd_flags & REQ_FLUSH) {
BUG_ON(md->flush_request);
md->flush_request = rq;
blk_start_request(rq);
@@ -2239,7 +2231,7 @@ static void dm_flush(struct mapped_devic
bio_init(&md->barrier_bio);
md->barrier_bio.bi_bdev = md->bdev;
- md->barrier_bio.bi_rw = WRITE_BARRIER;
+ md->barrier_bio.bi_rw = WRITE_SYNC | REQ_FLUSH;
__split_and_process_bio(md, &md->barrier_bio);
dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
@@ -2250,19 +2242,8 @@ static void process_barrier(struct mappe
md->barrier_error = 0;
dm_flush(md);
-
- if (!bio_empty_barrier(bio)) {
- __split_and_process_bio(md, bio);
- dm_flush(md);
- }
-
- if (md->barrier_error != DM_ENDIO_REQUEUE)
- bio_endio(bio, md->barrier_error);
- else {
- spin_lock_irq(&md->deferred_lock);
- bio_list_add_head(&md->deferred, bio);
- spin_unlock_irq(&md->deferred_lock);
- }
+ __split_and_process_bio(md, bio);
+ dm_flush(md);
}
/*
Index: linux-2.6/include/linux/bio.h
===================================================================
--- linux-2.6.orig/include/linux/bio.h 2010-08-03 20:32:11.951274008 +0200
+++ linux-2.6/include/linux/bio.h 2010-08-03 20:36:59.303005325 +0200
@@ -241,10 +241,6 @@ enum rq_flag_bits {
#define bio_offset(bio) bio_iovec((bio))->bv_offset
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
#define bio_sectors(bio) ((bio)->bi_size >> 9)
-#define bio_empty_barrier(bio) \
- ((bio->bi_rw & REQ_HARDBARRIER) && \
- !bio_has_data(bio) && \
- !(bio->bi_rw & REQ_DISCARD))
static inline unsigned int bio_cur_bytes(struct bio *bio)
{
Index: linux-2.6/drivers/md/dm-io.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-io.c 2010-08-03 20:26:49.685023485 +0200
+++ linux-2.6/drivers/md/dm-io.c 2010-08-03 20:36:59.308004417 +0200
@@ -364,7 +364,7 @@ static void dispatch_io(int rw, unsigned
*/
for (i = 0; i < num_regions; i++) {
*dp = old_pages;
- if (where[i].count || (rw & REQ_HARDBARRIER))
+ if (where[i].count || (rw & REQ_FLUSH))
do_region(rw, i, where + i, dp, io);
}
next prev parent reply other threads:[~2010-08-03 18:51 UTC|newest]
Thread overview: 146+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-07-27 16:56 [RFC] relaxed barrier semantics Christoph Hellwig
2010-07-27 17:54 ` Jan Kara
2010-07-27 18:35 ` Vivek Goyal
2010-07-27 18:42 ` James Bottomley
2010-07-27 18:51 ` Ric Wheeler
2010-07-27 19:43 ` Christoph Hellwig
2010-07-27 19:38 ` Christoph Hellwig
2010-07-28 8:08 ` Tejun Heo
2010-07-28 8:20 ` Tejun Heo
2010-07-28 13:55 ` Vladislav Bolkhovitin
2010-07-28 14:23 ` Tejun Heo
2010-07-28 14:37 ` James Bottomley
2010-07-28 14:44 ` Tejun Heo
2010-07-28 16:17 ` Vladislav Bolkhovitin
2010-07-28 16:17 ` Vladislav Bolkhovitin
2010-07-28 16:16 ` Vladislav Bolkhovitin
2010-07-28 8:24 ` Christoph Hellwig
2010-07-28 8:40 ` Tejun Heo
2010-07-28 8:50 ` Christoph Hellwig
2010-07-28 8:58 ` Tejun Heo
2010-07-28 9:00 ` Christoph Hellwig
2010-07-28 9:11 ` Hannes Reinecke
2010-07-28 9:16 ` Christoph Hellwig
2010-07-28 9:24 ` Tejun Heo
2010-07-28 9:38 ` Christoph Hellwig
2010-07-28 9:28 ` Steven Whitehouse
2010-07-28 9:35 ` READ_META semantics, was " Christoph Hellwig
2010-07-28 13:52 ` Jeff Moyer
2010-07-28 9:17 ` Tejun Heo
2010-07-28 9:28 ` Christoph Hellwig
2010-07-28 9:48 ` Tejun Heo
2010-07-28 10:19 ` Steven Whitehouse
2010-07-28 11:45 ` Christoph Hellwig
2010-07-28 12:47 ` Jan Kara
2010-07-28 23:00 ` Christoph Hellwig
2010-07-29 10:45 ` Jan Kara
2010-07-29 16:54 ` Joel Becker
2010-07-29 17:02 ` Christoph Hellwig
2010-07-29 1:44 ` Ted Ts'o
2010-07-29 2:43 ` Vivek Goyal
2010-07-29 8:42 ` Christoph Hellwig
2010-07-29 20:02 ` Vivek Goyal
2010-07-29 20:06 ` Christoph Hellwig
2010-07-30 3:17 ` Vivek Goyal
2010-07-30 7:07 ` Christoph Hellwig
2010-07-30 7:41 ` Vivek Goyal
2010-08-02 18:28 ` [RFC PATCH] Flush only barriers (Was: Re: [RFC] relaxed barrier semantics) Vivek Goyal
2010-08-03 13:03 ` Christoph Hellwig
2010-08-04 15:29 ` Vivek Goyal
2010-08-04 16:21 ` Christoph Hellwig
2010-07-29 8:31 ` [RFC] relaxed barrier semantics Christoph Hellwig
2010-07-29 11:16 ` Jan Kara
2010-07-29 13:00 ` extfs reliability Vladislav Bolkhovitin
2010-07-29 13:08 ` Christoph Hellwig
2010-07-29 14:12 ` Vladislav Bolkhovitin
2010-07-29 14:34 ` Jan Kara
2010-07-29 18:20 ` Vladislav Bolkhovitin
2010-07-29 18:49 ` Vladislav Bolkhovitin
2010-07-29 14:26 ` Jan Kara
2010-07-29 18:20 ` Vladislav Bolkhovitin
2010-07-29 18:58 ` Ted Ts'o
2010-07-29 19:44 ` [RFC] relaxed barrier semantics Ric Wheeler
2010-07-29 19:49 ` Christoph Hellwig
2010-07-29 19:56 ` Ric Wheeler
2010-07-29 19:59 ` James Bottomley
2010-07-29 20:03 ` Christoph Hellwig
2010-07-29 20:07 ` James Bottomley
2010-07-29 20:11 ` Christoph Hellwig
2010-07-30 12:45 ` Vladislav Bolkhovitin
2010-07-30 12:56 ` Christoph Hellwig
2010-08-04 1:58 ` Jamie Lokier
2010-07-30 12:46 ` Vladislav Bolkhovitin
2010-07-30 12:57 ` Christoph Hellwig
2010-07-30 13:09 ` Vladislav Bolkhovitin
2010-07-30 13:12 ` Christoph Hellwig
2010-07-30 17:40 ` Vladislav Bolkhovitin
2010-07-29 20:58 ` Ric Wheeler
2010-07-29 22:30 ` Andreas Dilger
2010-07-29 23:04 ` Ted Ts'o
2010-07-29 23:08 ` Ric Wheeler
2010-07-29 23:28 ` James Bottomley
2010-07-29 23:37 ` James Bottomley
2010-07-30 0:19 ` Ted Ts'o
2010-07-30 12:56 ` Vladislav Bolkhovitin
2010-07-30 7:11 ` Christoph Hellwig
2010-07-30 12:56 ` Vladislav Bolkhovitin
2010-07-30 13:07 ` Tejun Heo
2010-07-30 13:22 ` Vladislav Bolkhovitin
2010-07-30 13:27 ` Vladislav Bolkhovitin
2010-07-30 13:09 ` Christoph Hellwig
2010-07-30 13:25 ` Vladislav Bolkhovitin
2010-07-30 13:34 ` Christoph Hellwig
2010-07-30 13:44 ` Vladislav Bolkhovitin
2010-07-30 14:20 ` Christoph Hellwig
2010-07-31 0:47 ` Jan Kara
2010-07-31 9:12 ` Christoph Hellwig
2010-08-02 13:14 ` Jan Kara
2010-08-02 10:38 ` Vladislav Bolkhovitin
2010-08-02 12:48 ` Christoph Hellwig
2010-08-02 19:03 ` xfs rm performance Vladislav Bolkhovitin
2010-08-02 19:18 ` Christoph Hellwig
2010-08-05 19:31 ` Vladislav Bolkhovitin
2010-08-02 19:01 ` [RFC] relaxed barrier semantics Vladislav Bolkhovitin
2010-08-02 19:26 ` Christoph Hellwig
2010-07-31 0:35 ` Jan Kara
2010-08-02 16:47 ` Ryusuke Konishi
2010-08-02 17:39 ` Chris Mason
2010-08-05 13:11 ` Vladislav Bolkhovitin
2010-08-05 13:32 ` Chris Mason
2010-08-05 14:52 ` Hannes Reinecke
2010-08-05 15:17 ` Chris Mason
2010-08-05 17:07 ` Christoph Hellwig
2010-08-05 19:48 ` Vladislav Bolkhovitin
[not found] ` <4C5B1583.6070706@vlnb.net>
2010-08-05 19:50 ` Christoph Hellwig
2010-08-05 20:05 ` Vladislav Bolkhovitin
2010-08-06 14:56 ` Hannes Reinecke
2010-08-06 18:38 ` Vladislav Bolkhovitin
2010-08-06 23:38 ` Christoph Hellwig
2010-08-06 23:34 ` Christoph Hellwig
2010-08-05 17:09 ` Christoph Hellwig
2010-08-05 19:32 ` Vladislav Bolkhovitin
2010-08-05 19:40 ` Christoph Hellwig
2010-07-28 13:56 ` Vladislav Bolkhovitin
2010-07-28 14:42 ` Vivek Goyal
2010-07-27 19:37 ` Christoph Hellwig
2010-08-03 18:49 ` [PATCH, RFC 1/2] relaxed cache flushes Christoph Hellwig
2010-08-03 18:51 ` Christoph Hellwig [this message]
2010-08-04 4:57 ` [PATCH, RFC 2/2] dm: support REQ_FLUSH directly Kiyoshi Ueda
2010-08-04 8:54 ` Christoph Hellwig
2010-08-05 2:16 ` Jun'ichi Nomura
2010-08-26 22:50 ` Mike Snitzer
2010-08-27 0:40 ` Mike Snitzer
2010-08-27 1:20 ` Jamie Lokier
2010-08-27 1:43 ` Jun'ichi Nomura
2010-08-27 4:08 ` Mike Snitzer
2010-08-27 5:52 ` Jun'ichi Nomura
2010-08-27 14:13 ` Mike Snitzer
2010-08-30 4:45 ` Jun'ichi Nomura
2010-08-30 8:33 ` Tejun Heo
2010-08-30 12:43 ` Mike Snitzer
2010-08-30 12:45 ` Tejun Heo
2010-08-06 16:04 ` [PATCH, RFC] relaxed barriers Tejun Heo
2010-08-06 23:34 ` Christoph Hellwig
2010-08-07 10:13 ` [PATCH REPOST " Tejun Heo
2010-08-08 14:31 ` Christoph Hellwig
2010-08-09 14:50 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100803185148.GA12258@lst.de \
--to=hch@lst.de \
--cc=James.Bottomley@suse.de \
--cc=dm-devel@redhat.com \
--cc=jack@suse.cz \
--cc=jaxboe@fusionio.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=tj@kernel.org \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).