From: Marcelo Tosatti <mtosatti@redhat.com>
To: Anthony Liguori <aliguori@us.ibm.com>, Avi Kivity <avi@qumranet.com>
Cc: kvm-devel@lists.sourceforge.net, Gerd von Egidy <lists@egidy.de>,
Marcelo Tosatti <mtosatti@redhat.com>
Subject: [patch 2/2] QEMU/KVM: virtio-blk async IO
Date: Fri, 18 Apr 2008 19:20:42 -0300 [thread overview]
Message-ID: <20080418222407.375428761@localhost.localdomain> (raw)
In-Reply-To: 20080418222040.988332630@localhost.localdomain
[-- Attachment #1: virtio-blk-async --]
[-- Type: text/plain, Size: 4632 bytes --]
virtio-blk should not use synchronous requests, as that can blocks vcpus
outside of guest mode for large periods of time for no reason.
The generic block layer could complete AIO's before re-entering guest mode,
so that cached reads and writes can be reported ASAP, a job for the block layer.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: kvm-userspace.aio/qemu/hw/virtio-blk.c
===================================================================
--- kvm-userspace.aio.orig/qemu/hw/virtio-blk.c
+++ kvm-userspace.aio/qemu/hw/virtio-blk.c
@@ -77,54 +77,117 @@ static VirtIOBlock *to_virtio_blk(VirtIO
return (VirtIOBlock *)vdev;
}
+typedef struct VirtIOBlockReq
+{
+ VirtIODevice *vdev;
+ VirtQueue *vq;
+ struct iovec in_sg_status;
+ unsigned int pending;
+ unsigned int len;
+ unsigned int elem_idx;
+ int status;
+} VirtIOBlockReq;
+
+static void virtio_blk_rw_complete(void *opaque, int ret)
+{
+ VirtIOBlockReq *req = opaque;
+ struct virtio_blk_inhdr *in;
+ VirtQueueElement elem;
+
+ req->status |= ret;
+ if (--req->pending > 0)
+ return;
+
+ elem.index = req->elem_idx;
+ in = (void *)req->in_sg_status.iov_base;
+
+ in->status = req->status ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
+ virtqueue_push(req->vq, &elem, req->len);
+ virtio_notify(req->vdev, req->vq);
+ qemu_free(req);
+}
+
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIOBlock *s = to_virtio_blk(vdev);
VirtQueueElement elem;
+ VirtIOBlockReq *req;
unsigned int count;
while ((count = virtqueue_pop(vq, &elem)) != 0) {
struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr *out;
- unsigned int wlen;
off_t off;
int i;
+ /*
+ * FIXME: limit the number of in-flight requests
+ */
+ req = qemu_malloc(sizeof(VirtIOBlockReq));
+ if (!req)
+ return;
+ memset(req, 0, sizeof(*req));
+ memcpy(&req->in_sg_status, &elem.in_sg[elem.in_num - 1],
+ sizeof(req->in_sg_status));
+ req->vdev = vdev;
+ req->vq = vq;
+ req->elem_idx = elem.index;
+
out = (void *)elem.out_sg[0].iov_base;
in = (void *)elem.in_sg[elem.in_num - 1].iov_base;
off = out->sector;
if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
- wlen = sizeof(*in);
+ unsigned int len = sizeof(*in);
+
in->status = VIRTIO_BLK_S_UNSUPP;
+ virtqueue_push(vq, &elem, len);
+ virtio_notify(vdev, vq);
+ qemu_free(req);
+
} else if (out->type & VIRTIO_BLK_T_OUT) {
- wlen = sizeof(*in);
+ req->pending = elem.out_num - 1;
for (i = 1; i < elem.out_num; i++) {
- bdrv_write(s->bs, off,
+ bdrv_aio_write(s->bs, off,
elem.out_sg[i].iov_base,
- elem.out_sg[i].iov_len / 512);
+ elem.out_sg[i].iov_len / 512,
+ virtio_blk_rw_complete,
+ req);
off += elem.out_sg[i].iov_len / 512;
+ req->len += elem.out_sg[i].iov_len;
}
- in->status = VIRTIO_BLK_S_OK;
} else {
- wlen = sizeof(*in);
+ req->pending = elem.in_num - 1;
for (i = 0; i < elem.in_num - 1; i++) {
- bdrv_read(s->bs, off,
+ bdrv_aio_read(s->bs, off,
elem.in_sg[i].iov_base,
- elem.in_sg[i].iov_len / 512);
+ elem.in_sg[i].iov_len / 512,
+ virtio_blk_rw_complete,
+ req);
off += elem.in_sg[i].iov_len / 512;
- wlen += elem.in_sg[i].iov_len;
+ req->len += elem.in_sg[i].iov_len;
}
-
- in->status = VIRTIO_BLK_S_OK;
}
-
- virtqueue_push(vq, &elem, wlen);
- virtio_notify(vdev, vq);
}
+ /*
+ * FIXME: Want to check for completions before returning to guest mode,
+ * so cached reads and writes are reported as quickly as possible. But
+ * that should be done in the generic block layer.
+ */
+}
+
+static void virtio_blk_reset(VirtIODevice *vdev)
+{
+ VirtIOBlock *s = to_virtio_blk(vdev);
+
+ /*
+ * This should cancel pending requests, but can't do nicely until there
+ * are per-device request lists.
+ */
+ qemu_aio_flush();
}
static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
@@ -156,6 +219,7 @@ void *virtio_blk_init(PCIBus *bus, uint1
s->vdev.update_config = virtio_blk_update_config;
s->vdev.get_features = virtio_blk_get_features;
+ s->vdev.reset = virtio_blk_reset;
s->bs = bs;
bs->devfn = s->vdev.pci_dev.devfn;
--
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
next prev parent reply other threads:[~2008-04-18 22:20 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-04-18 22:20 [patch 0/2] virtio-blk async IO Marcelo Tosatti
2008-04-18 22:20 ` [patch 1/2] QEMU/KVM: provide a reset method for virtio Marcelo Tosatti
2008-04-18 22:20 ` Marcelo Tosatti [this message]
2008-04-18 22:57 ` [patch 0/2] virtio-blk async IO Gerd von Egidy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080418222407.375428761@localhost.localdomain \
--to=mtosatti@redhat.com \
--cc=aliguori@us.ibm.com \
--cc=avi@qumranet.com \
--cc=kvm-devel@lists.sourceforge.net \
--cc=lists@egidy.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox