public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH, RFC] virtio_blk: add cache flush command
@ 2009-05-11  8:39 Christoph Hellwig
  2009-05-11 14:51 ` Anthony Liguori
  2009-05-12 13:54 ` Rusty Russell
  0 siblings, 2 replies; 21+ messages in thread
From: Christoph Hellwig @ 2009-05-11  8:39 UTC (permalink / raw)
  To: Rusty Russell; +Cc: kvm

Currently virtio-blk does support barriers for ordering requests which
is enough to guarantee filesystem metadata integrity with write back
caches, but it does not support any way to flush that writeback cache,
to guarantee that data is stable on disk on a fsync.

This patch implements a new VIRTIO_BLK_T_FLUSH command to flush the
cache and exposes the functionality to the block layer by implementing
a prepare_flush method.

Do we need a new feature flag for this command or can we expect that
all previous barrier support was buggy enough anyway?


Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: xfs/drivers/block/virtio_blk.c
===================================================================
--- xfs.orig/drivers/block/virtio_blk.c	2009-05-11 10:11:28.884784539 +0200
+++ xfs/drivers/block/virtio_blk.c	2009-05-11 10:31:16.642783620 +0200
@@ -65,13 +65,25 @@ static void blk_done(struct virtqueue *v
 			break;
 		}
 
-		if (blk_pc_request(vbr->req)) {
+		switch (vbr->req->cmd_type) {
+		case REQ_TYPE_FS:
+			nr_bytes = blk_rq_bytes(vbr->req);
+			break;
+		case REQ_TYPE_BLOCK_PC:
 			vbr->req->data_len = vbr->in_hdr.residual;
 			nr_bytes = vbr->in_hdr.data_len;
 			vbr->req->sense_len = vbr->in_hdr.sense_len;
 			vbr->req->errors = vbr->in_hdr.errors;
-		} else
-			nr_bytes = blk_rq_bytes(vbr->req);
+			break;
+		case REQ_TYPE_LINUX_BLOCK:
+			if (vbr->req->cmd[0] == REQ_LB_OP_FLUSH) {
+				nr_bytes = blk_rq_bytes(vbr->req);
+				break;
+			}
+			/*FALLTHRU*/
+		default:
+			BUG();
+		}
 
 		__blk_end_request(vbr->req, error, nr_bytes);
 		list_del(&vbr->list);
@@ -82,7 +94,7 @@ static void blk_done(struct virtqueue *v
 	spin_unlock_irqrestore(&vblk->lock, flags);
 }
 
-static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
+static noinline bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		   struct request *req)
 {
 	unsigned long num, out = 0, in = 0;
@@ -94,15 +106,27 @@ static bool do_req(struct request_queue 
 		return false;
 
 	vbr->req = req;
-	if (blk_fs_request(vbr->req)) {
+
+	switch (req->cmd_type) {
+	case REQ_TYPE_FS:
 		vbr->out_hdr.type = 0;
 		vbr->out_hdr.sector = vbr->req->sector;
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-	} else if (blk_pc_request(vbr->req)) {
+		break;
+	case REQ_TYPE_BLOCK_PC:
 		vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
 		vbr->out_hdr.sector = 0;
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-	} else {
+		break;
+	case REQ_TYPE_LINUX_BLOCK:
+		if (req->cmd[0] == REQ_LB_OP_FLUSH) {
+			vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+			vbr->out_hdr.sector = 0;
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		}
+		/*FALLTHRU*/
+	default:
 		/* We don't put anything else in the queue. */
 		BUG();
 	}
@@ -174,6 +198,12 @@ static void do_virtblk_request(struct re
 		vblk->vq->vq_ops->kick(vblk->vq);
 }
 
+static void virtblk_prepare_flush(struct request_queue *q, struct request *req)
+{
+	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
+	req->cmd[0] = REQ_LB_OP_FLUSH;
+}
+
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 			 unsigned cmd, unsigned long data)
 {
@@ -310,7 +340,8 @@ static int virtblk_probe(struct virtio_d
 
 	/* If barriers are supported, tell block layer that queue is ordered */
 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
-		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
+		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG_FLUSH,
+				  virtblk_prepare_flush);
 
 	/* If disk is read-only in the host, the guest should obey */
 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
Index: xfs/include/linux/virtio_blk.h
===================================================================
--- xfs.orig/include/linux/virtio_blk.h	2009-05-11 10:18:39.933666519 +0200
+++ xfs/include/linux/virtio_blk.h	2009-05-11 10:22:14.396660919 +0200
@@ -35,6 +35,17 @@ struct virtio_blk_config
 	__u32 blk_size;
 } __attribute__((packed));
 
+/*
+ * Command types
+ *
+ * Usage is a bit tricky as some bits are used as flags and some not.
+ *
+ * Rules:
+ *   VIRTIO_BLK_T_OUT may be combinaed with VIRTIO_BLK_T_SCSI_CMD or
+ *   VIRTIO_BLK_T_BARRIER.  VIRTIO_BLK_T_FLUSH is a command of it's own
+ *   and may no be comined with any of the other flags.
+ */
+
 /* These two define direction. */
 #define VIRTIO_BLK_T_IN		0
 #define VIRTIO_BLK_T_OUT	1
@@ -42,6 +53,9 @@ struct virtio_blk_config
 /* This bit says it's a scsi command, not an actual read or write. */
 #define VIRTIO_BLK_T_SCSI_CMD	2
 
+/* Cache flush command */
+#define VIRTIO_BLK_T_FLUSH	4
+
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
 

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2009-05-18 12:07 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-05-11  8:39 [PATCH, RFC] virtio_blk: add cache flush command Christoph Hellwig
2009-05-11 14:51 ` Anthony Liguori
2009-05-11 15:40   ` Christoph Hellwig
2009-05-11 15:45     ` Avi Kivity
2009-05-11 16:28       ` Christoph Hellwig
2009-05-11 16:49         ` Avi Kivity
2009-05-11 17:47           ` Anthony Liguori
2009-05-11 18:00             ` Avi Kivity
2009-05-11 18:29               ` Anthony Liguori
2009-05-11 18:40                 ` Avi Kivity
2009-05-18 12:03                 ` Christoph Hellwig
2009-05-12  7:23             ` Christoph Hellwig
2009-05-12  7:19           ` Christoph Hellwig
2009-05-12  8:35             ` Avi Kivity
2009-05-18 12:06               ` Christoph Hellwig
2009-05-11 16:38     ` Anthony Liguori
2009-05-12  7:26       ` Christoph Hellwig
2009-05-12 13:54 ` Rusty Russell
2009-05-12 14:18   ` Christian Borntraeger
2009-05-13  1:52     ` Rusty Russell
2009-05-18 12:07     ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox