From: Badari Pulavarty <pbadari@us.ibm.com>
To: Christoph Hellwig <hch@infradead.org>
Cc: kvm@vger.kernel.org
Subject: Re: [RFC] vhost-blk implementation
Date: Wed, 24 Mar 2010 13:27:31 -0700 [thread overview]
Message-ID: <4BAA75B3.6040808@us.ibm.com> (raw)
In-Reply-To: <20100324200402.GA22272@infradead.org>
Christoph Hellwig wrote:
>> Inspired by vhost-net implementation, I did initial prototype
>> of vhost-blk to see if it provides any benefits over QEMU virtio-blk.
>> I haven't handled all the error cases, fixed naming conventions etc.,
>> but the implementation is stable to play with. I tried not to deviate
>> from vhost-net implementation where possible.
>>
>
> Can you also send the qemu side of it?
>
Its pretty hacky and based it on old patch (vhost-net) from MST for
simplicity.
I haven't focused on cleaning it up and I will re-base it on MST's
latest code
once it gets into QEMU.
Thanks,
Badari
---
hw/virtio-blk.c | 199 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 199 insertions(+)
Index: vhost/hw/virtio-blk.c
===================================================================
--- vhost.orig/hw/virtio-blk.c 2010-02-25 16:47:04.000000000 -0500
+++ vhost/hw/virtio-blk.c 2010-03-17 14:07:26.477430740 -0400
@@ -18,6 +18,7 @@
#ifdef __linux__
# include <scsi/sg.h>
#endif
+#include <kvm.h>
typedef struct VirtIOBlock
{
@@ -28,8 +29,13 @@
char serial_str[BLOCK_SERIAL_STRLEN + 1];
QEMUBH *bh;
size_t config_size;
+ uint8_t vhost_started;
} VirtIOBlock;
+typedef struct BDRVRawState {
+ int fd;
+} BDRVRawState;
+
static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
{
return (VirtIOBlock *)vdev;
@@ -501,6 +507,198 @@
return 0;
}
+#if 1
+#include "linux/vhost.h"
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+#include "vhost.h"
+
+int vhost_blk_fd;
+
+struct slot_info {
+ unsigned long phys_addr;
+ unsigned long len;
+ unsigned long userspace_addr;
+ unsigned flags;
+ int logging_count;
+};
+
+extern struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
+
+static int vhost_blk_start(struct VirtIODevice *vdev)
+{
+ target_phys_addr_t s, l, a;
+ int r, num, idx = 0;
+ struct vhost_vring_state state;
+ struct vhost_vring_file file;
+ struct vhost_vring_addr addr;
+ unsigned long long used_phys;
+ void *desc, *avail, *used;
+ int i, n =0;
+ struct VirtQueue *q = virtio_queue(vdev, idx);
+ VirtIOBlock *vb = to_virtio_blk(vdev);
+ struct vhost_memory *mem;
+ BDRVRawState *st = vb->bs->opaque;
+
+ vhost_blk_fd = open("/dev/vhost-blk", O_RDWR);
+ if (vhost_blk_fd < 0) {
+ fprintf(stderr, "unable to open vhost-blk\n");
+ return -errno;
+ }
+
+ r = ioctl(vhost_blk_fd, VHOST_SET_OWNER, NULL);
+ if (r < 0) {
+ fprintf(stderr, "ioctl VHOST_SET_OWNER failed\n");
+ return -errno;
+ }
+
+ for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
+ if (!slots[i].len ||
+ (slots[i].flags & KVM_MEM_LOG_DIRTY_PAGES)) {
+ continue;
+ }
+ ++n;
+ }
+
+ mem = qemu_mallocz(offsetof(struct vhost_memory, regions) +
+ n * sizeof(struct vhost_memory_region));
+ if (!mem)
+ return -ENOMEM;
+
+ mem->nregions = n;
+ n = 0;
+ for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
+ if (!slots[i].len || (slots[i].flags &
+ KVM_MEM_LOG_DIRTY_PAGES)) {
+ continue;
+ }
+ mem->regions[n].guest_phys_addr = slots[i].phys_addr;
+ mem->regions[n].memory_size = slots[i].len;
+ mem->regions[n].userspace_addr = slots[i].userspace_addr;
+ ++n;
+ }
+
+ r = ioctl(vhost_blk_fd, VHOST_SET_MEM_TABLE, mem);
+ if (r < 0)
+ return -errno;
+
+ state.index = idx;
+ num = state.num = virtio_queue_get_num(vdev, idx);
+ r = ioctl(vhost_blk_fd, VHOST_SET_VRING_NUM, &state);
+ if (r) {
+ fprintf(stderr, "ioctl VHOST_SET_VRING_NUM failed\n");
+ return -errno;
+ }
+
+ state.num = virtio_queue_last_avail_idx(vdev, idx);
+ r = ioctl(vhost_blk_fd, VHOST_SET_VRING_BASE, &state);
+ if (r) {
+ fprintf(stderr, "ioctl VHOST_SET_VRING_BASE failed\n");
+ return -errno;
+ }
+
+ s = l = sizeof(struct vring_desc) * num;
+ a = virtio_queue_get_desc(vdev, idx);
+ desc = cpu_physical_memory_map(a, &l, 0);
+ if (!desc || l != s) {
+ r = -ENOMEM;
+ goto fail_alloc;
+ }
+ s = l = offsetof(struct vring_avail, ring) +
+ sizeof(u_int64_t) * num;
+ a = virtio_queue_get_avail(vdev, idx);
+ avail = cpu_physical_memory_map(a, &l, 0);
+ if (!avail || l != s) {
+ r = -ENOMEM;
+ goto fail_alloc;
+ }
+ s = l = offsetof(struct vring_used, ring) +
+ sizeof(struct vring_used_elem) * num;
+ used_phys = a = virtio_queue_get_used(vdev, idx);
+ used = cpu_physical_memory_map(a, &l, 1);
+ if (!used || l != s) {
+ r = -ENOMEM;
+ goto fail_alloc;
+ }
+
+ addr.index = idx,
+ addr.desc_user_addr = (u_int64_t)(unsigned long)desc,
+ addr.avail_user_addr = (u_int64_t)(unsigned long)avail,
+ addr.used_user_addr = (u_int64_t)(unsigned long)used,
+ addr.log_guest_addr = used_phys,
+ addr.flags = 0;
+ r = ioctl(vhost_blk_fd, VHOST_SET_VRING_ADDR, &addr);
+ if (r < 0) {
+ fprintf(stderr, "ioctl VHOST_SET_VRING_ADDR failed\n");
+ r = -errno;
+ goto fail_alloc;
+ }
+ if (!vdev->binding->guest_notifier || !vdev->binding->host_notifier) {
+ fprintf(stderr, "binding does not support irqfd/queuefd\n");
+ r = -ENOSYS;
+ goto fail_alloc;
+ }
+ r = vdev->binding->guest_notifier(vdev->binding_opaque, idx, true);
+ if (r < 0) {
+ fprintf(stderr, "Error binding guest notifier: %d\n", -r);
+ goto fail_guest_notifier;
+ }
+
+ r = vdev->binding->host_notifier(vdev->binding_opaque, idx, true);
+ if (r < 0) {
+ fprintf(stderr, "Error binding host notifier: %d\n", -r);
+ goto fail_host_notifier;
+ }
+
+ file.index = idx;
+ file.fd = event_notifier_get_fd(virtio_queue_host_notifier(q));
+ r = ioctl(vhost_blk_fd, VHOST_SET_VRING_KICK, &file);
+ if (r) {
+ goto fail_kick;
+ }
+
+ file.fd = event_notifier_get_fd(virtio_queue_guest_notifier(q));
+ r = ioctl(vhost_blk_fd, VHOST_SET_VRING_CALL, &file);
+ if (r) {
+ goto fail_call;
+ }
+ file.fd = st->fd;
+ r = ioctl(vhost_blk_fd, VHOST_NET_SET_BACKEND, &file);
+ if (r) {
+ r = -errno;
+ goto fail_call;
+ }
+ return 0;
+fail_call:
+fail_kick:
+ vdev->binding->host_notifier(vdev->binding_opaque, idx, false);
+fail_host_notifier:
+ vdev->binding->guest_notifier(vdev->binding_opaque, idx, false);
+fail_guest_notifier:
+fail_alloc:
+ return r;
+}
+
+static void virtio_blk_set_status(struct VirtIODevice *vdev)
+{
+ VirtIOBlock *s = to_virtio_blk(vdev);
+
+ if (s->vhost_started)
+ return;
+
+ if (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
+ int r = vhost_blk_start(vdev);
+ if (r < 0) {
+ fprintf(stderr, "unable to start vhost blk: %d\n", r);
+ } else {
+ s->vhost_started = 1;
+ }
+ }
+}
+
+#endif
+
VirtIODevice *virtio_blk_init(DeviceState *dev, DriveInfo *dinfo)
{
VirtIOBlock *s;
@@ -517,6 +715,7 @@
s->config_size = size;
s->vdev.get_config = virtio_blk_update_config;
s->vdev.get_features = virtio_blk_get_features;
+ s->vdev.set_status = virtio_blk_set_status;
s->vdev.reset = virtio_blk_reset;
s->bs = dinfo->bdrv;
s->rq = NULL;
next prev parent reply other threads:[~2010-03-24 20:27 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-23 1:00 [RFC] vhost-blk implementation Badari Pulavarty
2010-03-23 1:16 ` Anthony Liguori
2010-03-23 1:45 ` Badari Pulavarty
2010-03-23 2:00 ` Anthony Liguori
2010-03-23 2:50 ` Badari Pulavarty
2010-03-23 10:05 ` Avi Kivity
2010-03-23 14:48 ` Badari Pulavarty
2010-03-23 10:03 ` Avi Kivity
2010-03-23 14:55 ` Badari Pulavarty
2010-03-23 16:53 ` Avi Kivity
2010-03-24 20:05 ` Christoph Hellwig
2010-03-25 6:29 ` Avi Kivity
2010-03-25 15:48 ` Christoph Hellwig
2010-03-25 15:51 ` Avi Kivity
2010-03-25 15:00 ` Asdo
2010-04-05 19:59 ` Christoph Hellwig
2010-04-07 0:36 ` [RFC] vhost-blk implementation (v2) Badari Pulavarty
2010-03-23 10:09 ` [RFC] vhost-blk implementation Eran Rom
2010-03-24 20:04 ` Christoph Hellwig
2010-03-24 20:22 ` Badari Pulavarty
2010-03-25 7:57 ` Avi Kivity
2010-03-25 14:36 ` Badari Pulavarty
2010-03-25 15:57 ` Christoph Hellwig
2010-03-26 18:53 ` Eran Rom
2010-04-08 16:17 ` Stefan Hajnoczi
2010-04-05 19:23 ` Christoph Hellwig
2010-04-05 23:17 ` Badari Pulavarty
2010-03-24 20:27 ` Badari Pulavarty [this message]
2010-03-29 15:41 ` Badari Pulavarty
2010-03-29 18:20 ` Chris Wright
2010-03-29 20:37 ` Avi Kivity
2010-03-29 22:51 ` Badari Pulavarty
2010-03-29 23:56 ` Chris Wright
2010-03-30 12:43 ` Avi Kivity
2010-04-05 14:22 ` Stefan Hajnoczi
2010-04-06 2:27 ` Badari Pulavarty
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4BAA75B3.6040808@us.ibm.com \
--to=pbadari@us.ibm.com \
--cc=hch@infradead.org \
--cc=kvm@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox