All of lore.kernel.org
 help / color / mirror / Atom feed
* Block WRITE_BARRIER / FLUSH_DISKCACHE operations and parameters
@ 2008-02-07 16:14 Samuel Thibault
  2008-02-08 14:14 ` Samuel Thibault
  0 siblings, 1 reply; 2+ messages in thread
From: Samuel Thibault @ 2008-02-07 16:14 UTC (permalink / raw)
  To: xen-devel

Hello,

Nothing is said about parameters to be given along WRITE_BARRIER or
FLUSH_DISKCACHE operations.  I guess they are both implicitely supposed
to be write operations? (it's the case for WRITE_BARRIER in the Linux
implementation) If so, would it be fine to allow nr_segments to be 0?
As part of IDE cache flush emulation, I need to issue WRITE_BARRIERs
without issuing any actual write...

Samuel

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: Block WRITE_BARRIER / FLUSH_DISKCACHE operations and parameters
  2008-02-07 16:14 Block WRITE_BARRIER / FLUSH_DISKCACHE operations and parameters Samuel Thibault
@ 2008-02-08 14:14 ` Samuel Thibault
  0 siblings, 0 replies; 2+ messages in thread
From: Samuel Thibault @ 2008-02-08 14:14 UTC (permalink / raw)
  To: xen-devel

Samuel Thibault, le Thu 07 Feb 2008 16:14:42 +0000, a écrit :
> Nothing is said about parameters to be given along WRITE_BARRIER or
> FLUSH_DISKCACHE operations.  I guess they are both implicitely supposed
> to be write operations? (it's the case for WRITE_BARRIER in the Linux
> implementation) If so, would it be fine to allow nr_segments to be 0?
> As part of IDE cache flush emulation, I need to issue WRITE_BARRIERs
> without issuing any actual write...

The patch below implements it for Linux:



block: backport Jens Axboe's commit from
Tue, 16 Oct 2007 09:03:56 +0000 (11:03 +0200)
bf2de6f5a4faf0197268f18d08969b003b87b6e8
Initial support for data-less (or empty) barrier support

blkback: permit and implement empty barrier.

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>

diff -r 5c61cd349b20 block/elevator.c
--- a/block/elevator.c	Thu Feb 07 10:33:19 2008 +0000
+++ b/block/elevator.c	Fri Feb 08 11:27:12 2008 +0000
@@ -493,6 +493,16 @@
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
+		/*
+		 * Kill the empty barrier place holder, the driver must
+		 * not ever see it.
+		 */
+		if (blk_empty_barrier(rq)) {
+			blkdev_dequeue_request(rq);
+			end_that_request_chunk(rq, 1, 0);
+			end_that_request_last(rq, 1);
+			continue;
+		}
 		if (!(rq->flags & REQ_STARTED)) {
 			elevator_t *e = q->elevator;
 
--- a/block/ll_rw_blk.c	Thu Feb 07 10:33:19 2008 +0000
+++ b/block/ll_rw_blk.c	Fri Feb 08 11:27:12 2008 +0000
@@ -483,9 +483,12 @@
 	 * Queue ordered sequence.  As we stack them at the head, we
 	 * need to queue in reverse order.  Note that we rely on that
 	 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
-	 * request gets inbetween ordered sequence.
+	 * request gets inbetween ordered sequence. If this request is
+	 * an empty barrier, we don't need to do a postflush ever since
+	 * there will be no data written between the pre and post flush.
+	 * Hence a single flush will suffice.
 	 */
-	if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
+	if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
 		queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
 	else
 		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
@@ -2967,7 +2970,7 @@
 {
 	struct block_device *bdev = bio->bi_bdev;
 
-	if (bdev != bdev->bd_contains) {
+	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 		const int rw = bio_data_dir(bio);
 
@@ -3028,7 +3031,7 @@
 	might_sleep();
 	/* Test device or partition size, when known. */
 	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-	if (maxsector) {
+	if (maxsector && nr_sectors) {
 		sector_t sector = bio->bi_sector;
 
 		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
@@ -3094,7 +3097,7 @@
 		old_dev = bio->bi_bdev->bd_dev;
 
 		maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-		if (maxsector) {
+		if (maxsector && nr_sectors) {
 			sector_t sector = bio->bi_sector;
 
 			if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
@@ -3128,21 +3131,25 @@
 {
 	int count = bio_sectors(bio);
 
-	BIO_BUG_ON(!bio->bi_size);
-	BIO_BUG_ON(!bio->bi_io_vec);
 	bio->bi_rw |= rw;
-	if (rw & WRITE)
-		count_vm_events(PGPGOUT, count);
-	else
-		count_vm_events(PGPGIN, count);
 
-	if (unlikely(block_dump)) {
-		char b[BDEVNAME_SIZE];
-		printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
-			current->comm, current->pid,
-			(rw & WRITE) ? "WRITE" : "READ",
-			(unsigned long long)bio->bi_sector,
-			bdevname(bio->bi_bdev,b));
+	if (!bio_empty_barrier(bio)) {
+		BIO_BUG_ON(!bio->bi_size);
+		BIO_BUG_ON(!bio->bi_io_vec);
+
+		if (rw & WRITE)
+			count_vm_events(PGPGOUT, count);
+		else
+			count_vm_events(PGPGIN, count);
+
+		if (unlikely(block_dump)) {
+			char b[BDEVNAME_SIZE];
+			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
+				current->comm, current->pid,
+				(rw & WRITE) ? "WRITE" : "READ",
+				(unsigned long long)bio->bi_sector,
+				bdevname(bio->bi_bdev,b));
+		}
 	}
 
 	generic_make_request(bio);
@@ -3259,6 +3266,13 @@
 	total_bytes = bio_nbytes = 0;
 	while ((bio = req->bio) != NULL) {
 		int nbytes;
+
+		/* For an empty barrier request, the low level driver must
+		 * store a potential error location in ->sector. We pass
+		 * that back up in ->bi_sector
+		 */
+		if (blk_empty_barrier(req))
+			bio->bi_sector = req->sector;
 
 		if (nr_bytes >= bio->bi_size) {
 			req->bio = bio->bi_next;
--- a/drivers/xen/blkback/blkback.c	Thu Feb 07 10:33:19 2008 +0000
+++ b/drivers/xen/blkback/blkback.c	Fri Feb 08 11:27:13 2008 +0000
@@ -407,7 +407,7 @@
 
 	/* Check that number of segments is sane. */
 	nseg = req->nr_segments;
-	if (unlikely(nseg == 0) || 
+	if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || 
 	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
 		DPRINTK("Bad number of segments in request (%d)\n", nseg);
 		goto fail_response;
@@ -500,6 +500,18 @@
 		preq.sector_number += seg[i].nsec;
 	}
 
+	if (!bio) {
+		BUG_ON(operation != WRITE_BARRIER);
+		bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0);
+		if (unlikely(bio == NULL))
+			goto fail_put_bio;
+
+		bio->bi_bdev    = preq.bdev;
+		bio->bi_private = pending_req;
+		bio->bi_end_io  = end_block_io_op;
+		bio->bi_sector  = -1;
+	}
+
 	plug_queue(blkif, bio);
 	atomic_set(&pending_req->pendcnt, nbio);
 	blkif_get(blkif);
--- a/fs/bio.c	Thu Feb 07 10:33:19 2008 +0000
+++ b/fs/bio.c	Fri Feb 08 11:27:13 2008 +0000
@@ -112,7 +112,8 @@
 
 	BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
 
-	mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+	if (bio->bi_io_vec)
+		mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
 	mempool_free(bio, bio_set->bio_pool);
 }
 
--- a/include/linux/bio.h	Thu Feb 07 10:33:19 2008 +0000
+++ b/include/linux/bio.h	Fri Feb 08 11:27:13 2008 +0000
@@ -172,12 +172,27 @@
 #define bio_offset(bio)		bio_iovec((bio))->bv_offset
 #define bio_segments(bio)	((bio)->bi_vcnt - (bio)->bi_idx)
 #define bio_sectors(bio)	((bio)->bi_size >> 9)
-#define bio_cur_sectors(bio)	(bio_iovec(bio)->bv_len >> 9)
-#define bio_data(bio)		(page_address(bio_page((bio))) + bio_offset((bio)))
 #define bio_barrier(bio)	((bio)->bi_rw & (1 << BIO_RW_BARRIER))
 #define bio_sync(bio)		((bio)->bi_rw & (1 << BIO_RW_SYNC))
 #define bio_failfast(bio)	((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
 #define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
+#define bio_empty_barrier(bio)	(bio_barrier(bio) && !(bio)->bi_size)
+
+static inline unsigned int bio_cur_sectors(struct bio *bio)
+{
+	if (bio->bi_vcnt)
+		return bio_iovec(bio)->bv_len >> 9;
+
+	return 0;
+}
+
+static inline void *bio_data(struct bio *bio)
+{
+	if (bio->bi_vcnt)
+		return page_address(bio_page(bio)) + bio_offset(bio);
+
+	return NULL;
+}
 
 /*
  * will die
--- a/include/linux/blkdev.h	Thu Feb 07 10:33:19 2008 +0000
+++ b/include/linux/blkdev.h	Fri Feb 08 11:27:13 2008 +0000
@@ -506,6 +506,8 @@
 #define blk_barrier_rq(rq)	((rq)->flags & REQ_HARDBARRIER)
 #define blk_fua_rq(rq)		((rq)->flags & REQ_FUA)
 
+#define blk_empty_barrier(rq)   (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
+
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
 #define rq_data_dir(rq)		((rq)->flags & 1)
--- a/mm/highmem.c	Thu Feb 07 10:33:19 2008 +0000
+++ b/mm/highmem.c	Fri Feb 08 11:27:13 2008 +0000
@@ -468,6 +468,12 @@
 	mempool_t *pool;
 
 	/*
+	 * Data-less bio, nothing to bounce
+	 */
+	if (bio_empty_barrier(*bio_orig))
+		return;
+
+	/*
 	 * for non-isa bounce case, just check if the bounce pfn is equal
 	 * to or bigger than the highest pfn in the system -- in that case,
 	 * don't waste time iterating over bio segments

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2008-02-08 14:14 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-02-07 16:14 Block WRITE_BARRIER / FLUSH_DISKCACHE operations and parameters Samuel Thibault
2008-02-08 14:14 ` Samuel Thibault

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.