From: Samuel Thibault <samuel.thibault@eu.citrix.com>
To: xen-devel@lists.xensource.com
Subject: Re: Block WRITE_BARRIER / FLUSH_DISKCACHE operations and parameters
Date: Fri, 8 Feb 2008 14:14:47 +0000 [thread overview]
Message-ID: <20080208141447.GA9422@implementation.uk.xensource.com> (raw)
In-Reply-To: <20080207161442.GJ4310@implementation.uk.xensource.com>
Samuel Thibault, le Thu 07 Feb 2008 16:14:42 +0000, a écrit :
> Nothing is said about parameters to be given along WRITE_BARRIER or
> FLUSH_DISKCACHE operations. I guess they are both implicitely supposed
> to be write operations? (it's the case for WRITE_BARRIER in the Linux
> implementation) If so, would it be fine to allow nr_segments to be 0?
> As part of IDE cache flush emulation, I need to issue WRITE_BARRIERs
> without issuing any actual write...
The patch below implements it for Linux:
block: backport Jens Axboe's commit from
Tue, 16 Oct 2007 09:03:56 +0000 (11:03 +0200)
bf2de6f5a4faf0197268f18d08969b003b87b6e8
Initial support for data-less (or empty) barrier support
blkback: permit and implement empty barrier.
Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
diff -r 5c61cd349b20 block/elevator.c
--- a/block/elevator.c Thu Feb 07 10:33:19 2008 +0000
+++ b/block/elevator.c Fri Feb 08 11:27:12 2008 +0000
@@ -493,6 +493,16 @@
int ret;
while ((rq = __elv_next_request(q)) != NULL) {
+ /*
+ * Kill the empty barrier place holder, the driver must
+ * not ever see it.
+ */
+ if (blk_empty_barrier(rq)) {
+ blkdev_dequeue_request(rq);
+ end_that_request_chunk(rq, 1, 0);
+ end_that_request_last(rq, 1);
+ continue;
+ }
if (!(rq->flags & REQ_STARTED)) {
elevator_t *e = q->elevator;
--- a/block/ll_rw_blk.c Thu Feb 07 10:33:19 2008 +0000
+++ b/block/ll_rw_blk.c Fri Feb 08 11:27:12 2008 +0000
@@ -483,9 +483,12 @@
* Queue ordered sequence. As we stack them at the head, we
* need to queue in reverse order. Note that we rely on that
* no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
- * request gets inbetween ordered sequence.
+ * request gets inbetween ordered sequence. If this request is
+ * an empty barrier, we don't need to do a postflush ever since
+ * there will be no data written between the pre and post flush.
+ * Hence a single flush will suffice.
*/
- if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
+ if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
else
q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
@@ -2967,7 +2970,7 @@
{
struct block_device *bdev = bio->bi_bdev;
- if (bdev != bdev->bd_contains) {
+ if (bio_sectors(bio) && bdev != bdev->bd_contains) {
struct hd_struct *p = bdev->bd_part;
const int rw = bio_data_dir(bio);
@@ -3028,7 +3031,7 @@
might_sleep();
/* Test device or partition size, when known. */
maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
- if (maxsector) {
+ if (maxsector && nr_sectors) {
sector_t sector = bio->bi_sector;
if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
@@ -3094,7 +3097,7 @@
old_dev = bio->bi_bdev->bd_dev;
maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
- if (maxsector) {
+ if (maxsector && nr_sectors) {
sector_t sector = bio->bi_sector;
if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
@@ -3128,21 +3131,25 @@
{
int count = bio_sectors(bio);
- BIO_BUG_ON(!bio->bi_size);
- BIO_BUG_ON(!bio->bi_io_vec);
bio->bi_rw |= rw;
- if (rw & WRITE)
- count_vm_events(PGPGOUT, count);
- else
- count_vm_events(PGPGIN, count);
- if (unlikely(block_dump)) {
- char b[BDEVNAME_SIZE];
- printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
- current->comm, current->pid,
- (rw & WRITE) ? "WRITE" : "READ",
- (unsigned long long)bio->bi_sector,
- bdevname(bio->bi_bdev,b));
+ if (!bio_empty_barrier(bio)) {
+ BIO_BUG_ON(!bio->bi_size);
+ BIO_BUG_ON(!bio->bi_io_vec);
+
+ if (rw & WRITE)
+ count_vm_events(PGPGOUT, count);
+ else
+ count_vm_events(PGPGIN, count);
+
+ if (unlikely(block_dump)) {
+ char b[BDEVNAME_SIZE];
+ printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
+ current->comm, current->pid,
+ (rw & WRITE) ? "WRITE" : "READ",
+ (unsigned long long)bio->bi_sector,
+ bdevname(bio->bi_bdev,b));
+ }
}
generic_make_request(bio);
@@ -3259,6 +3266,13 @@
total_bytes = bio_nbytes = 0;
while ((bio = req->bio) != NULL) {
int nbytes;
+
+ /* For an empty barrier request, the low level driver must
+ * store a potential error location in ->sector. We pass
+ * that back up in ->bi_sector
+ */
+ if (blk_empty_barrier(req))
+ bio->bi_sector = req->sector;
if (nr_bytes >= bio->bi_size) {
req->bio = bio->bi_next;
--- a/drivers/xen/blkback/blkback.c Thu Feb 07 10:33:19 2008 +0000
+++ b/drivers/xen/blkback/blkback.c Fri Feb 08 11:27:13 2008 +0000
@@ -407,7 +407,7 @@
/* Check that number of segments is sane. */
nseg = req->nr_segments;
- if (unlikely(nseg == 0) ||
+ if (unlikely(nseg == 0 && operation != WRITE_BARRIER) ||
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
DPRINTK("Bad number of segments in request (%d)\n", nseg);
goto fail_response;
@@ -500,6 +500,18 @@
preq.sector_number += seg[i].nsec;
}
+ if (!bio) {
+ BUG_ON(operation != WRITE_BARRIER);
+ bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0);
+ if (unlikely(bio == NULL))
+ goto fail_put_bio;
+
+ bio->bi_bdev = preq.bdev;
+ bio->bi_private = pending_req;
+ bio->bi_end_io = end_block_io_op;
+ bio->bi_sector = -1;
+ }
+
plug_queue(blkif, bio);
atomic_set(&pending_req->pendcnt, nbio);
blkif_get(blkif);
--- a/fs/bio.c Thu Feb 07 10:33:19 2008 +0000
+++ b/fs/bio.c Fri Feb 08 11:27:13 2008 +0000
@@ -112,7 +112,8 @@
BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
- mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+ if (bio->bi_io_vec)
+ mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
mempool_free(bio, bio_set->bio_pool);
}
--- a/include/linux/bio.h Thu Feb 07 10:33:19 2008 +0000
+++ b/include/linux/bio.h Fri Feb 08 11:27:13 2008 +0000
@@ -172,12 +172,27 @@
#define bio_offset(bio) bio_iovec((bio))->bv_offset
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
#define bio_sectors(bio) ((bio)->bi_size >> 9)
-#define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9)
-#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio)))
#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))
#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
+#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size)
+
+static inline unsigned int bio_cur_sectors(struct bio *bio)
+{
+ if (bio->bi_vcnt)
+ return bio_iovec(bio)->bv_len >> 9;
+
+ return 0;
+}
+
+static inline void *bio_data(struct bio *bio)
+{
+ if (bio->bi_vcnt)
+ return page_address(bio_page(bio)) + bio_offset(bio);
+
+ return NULL;
+}
/*
* will die
--- a/include/linux/blkdev.h Thu Feb 07 10:33:19 2008 +0000
+++ b/include/linux/blkdev.h Fri Feb 08 11:27:13 2008 +0000
@@ -506,6 +506,8 @@
#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER)
#define blk_fua_rq(rq) ((rq)->flags & REQ_FUA)
+#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
+
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
#define rq_data_dir(rq) ((rq)->flags & 1)
--- a/mm/highmem.c Thu Feb 07 10:33:19 2008 +0000
+++ b/mm/highmem.c Fri Feb 08 11:27:13 2008 +0000
@@ -468,6 +468,12 @@
mempool_t *pool;
/*
+ * Data-less bio, nothing to bounce
+ */
+ if (bio_empty_barrier(*bio_orig))
+ return;
+
+ /*
* for non-isa bounce case, just check if the bounce pfn is equal
* to or bigger than the highest pfn in the system -- in that case,
* don't waste time iterating over bio segments
prev parent reply other threads:[~2008-02-08 14:14 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-02-07 16:14 Block WRITE_BARRIER / FLUSH_DISKCACHE operations and parameters Samuel Thibault
2008-02-08 14:14 ` Samuel Thibault [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080208141447.GA9422@implementation.uk.xensource.com \
--to=samuel.thibault@eu.citrix.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.