From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1L8KFj-0004xr-Ls for qemu-devel@nongnu.org; Thu, 04 Dec 2008 14:52:47 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1L8KFi-0004xN-8j for qemu-devel@nongnu.org; Thu, 04 Dec 2008 14:52:46 -0500 Received: from [199.232.76.173] (port=52759 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1L8KFi-0004xD-01 for qemu-devel@nongnu.org; Thu, 04 Dec 2008 14:52:46 -0500 Received: from savannah.gnu.org ([199.232.41.3]:52080 helo=sv.gnu.org) by monty-python.gnu.org with esmtps (TLS-1.0:RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1L8KFh-0001Cg-D7 for qemu-devel@nongnu.org; Thu, 04 Dec 2008 14:52:45 -0500 Received: from cvs.savannah.gnu.org ([199.232.41.69]) by sv.gnu.org with esmtp (Exim 4.63) (envelope-from ) id 1L8KFg-0006bJ-ST for qemu-devel@nongnu.org; Thu, 04 Dec 2008 19:52:44 +0000 Received: from aliguori by cvs.savannah.gnu.org with local (Exim 4.63) (envelope-from ) id 1L8KFg-0006bF-Hy for qemu-devel@nongnu.org; Thu, 04 Dec 2008 19:52:44 +0000 MIME-Version: 1.0 Errors-To: aliguori Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From: Anthony Liguori Message-Id: Date: Thu, 04 Dec 2008 19:52:44 +0000 Subject: [Qemu-devel] [5870] Add virtio-blk support Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Revision: 5870 http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=5870 Author: aliguori Date: 2008-12-04 19:52:44 +0000 (Thu, 04 Dec 2008) Log Message: ----------- Add virtio-blk support Virtio-blk is a paravirtual block device based on VirtIO. It can be used by specifying the if=virtio parameter to the -drive parameter. When using -enable-kvm, it can achieve very good performance compared to IDE or SCSI. Signed-off-by: Anthony Liguori Modified Paths: -------------- trunk/Makefile.target trunk/hw/pc.c trunk/qemu-doc.texi trunk/sysemu.h trunk/vl.c Added Paths: ----------- trunk/hw/virtio-blk.c trunk/hw/virtio-blk.h Modified: trunk/Makefile.target =================================================================== --- trunk/Makefile.target 2008-12-04 19:38:57 UTC (rev 5869) +++ trunk/Makefile.target 2008-12-04 19:52:44 UTC (rev 5870) @@ -665,7 +665,7 @@ OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o # virtio support -OBJS+= virtio.o +OBJS+= virtio.o virtio-blk.o CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE endif ifeq ($(TARGET_BASE_ARCH), ppc) @@ -684,7 +684,7 @@ # PowerPC 4xx boards OBJS+= pflash_cfi02.o ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o # virtio support -OBJS+= virtio.o +OBJS+= virtio.o virtio-blk.o endif ifeq ($(TARGET_BASE_ARCH), mips) OBJS+= mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o Modified: trunk/hw/pc.c =================================================================== --- trunk/hw/pc.c 2008-12-04 19:38:57 UTC (rev 5869) +++ trunk/hw/pc.c 2008-12-04 19:52:44 UTC (rev 5870) @@ -33,6 +33,7 @@ #include "boards.h" #include "console.h" #include "fw_cfg.h" +#include "virtio-blk.h" /* output Bochs bios info messages */ //#define DEBUG_BIOS @@ -1092,6 +1093,18 @@ } } } + + /* Add virtio block devices */ + if (pci_enabled) { + int index; + int unit_id = 0; + + while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) { + virtio_blk_init(pci_bus, 0x1AF4, 0x1001, + drives_table[index].bdrv); + unit_id++; + } + } } static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size, Added: trunk/hw/virtio-blk.c =================================================================== --- trunk/hw/virtio-blk.c (rev 0) +++ trunk/hw/virtio-blk.c 2008-12-04 19:52:44 UTC (rev 5870) @@ -0,0 +1,248 @@ +/* + * Virtio Block Device + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "virtio-blk.h" +#include "block_int.h" + +typedef struct VirtIOBlock +{ + VirtIODevice vdev; + BlockDriverState *bs; + VirtQueue *vq; +} VirtIOBlock; + +static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev) +{ + return (VirtIOBlock *)vdev; +} + +typedef struct VirtIOBlockReq +{ + VirtIOBlock *dev; + VirtQueueElement elem; + struct virtio_blk_inhdr *in; + struct virtio_blk_outhdr *out; + size_t size; + uint8_t *buffer; +} VirtIOBlockReq; + +static void virtio_blk_rw_complete(void *opaque, int ret) +{ + VirtIOBlockReq *req = opaque; + VirtIOBlock *s = req->dev; + + /* Copy read data to the guest */ + if (!ret && !(req->out->type & VIRTIO_BLK_T_OUT)) { + size_t offset = 0; + int i; + + for (i = 0; i < req->elem.in_num - 1; i++) { + size_t len; + + /* Be pretty defensive wrt malicious guests */ + len = MIN(req->elem.in_sg[i].iov_len, + req->size - offset); + + memcpy(req->elem.in_sg[i].iov_base, + req->buffer + offset, + len); + offset += len; + } + } + + req->in->status = ret ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK; + virtqueue_push(s->vq, &req->elem, req->size + sizeof(*req->in)); + virtio_notify(&s->vdev, s->vq); + + qemu_free(req->buffer); + qemu_free(req); +} + +static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) +{ + VirtIOBlockReq *req; + + req = qemu_mallocz(sizeof(*req)); + if (req == NULL) + return NULL; + + req->dev = s; + if (!virtqueue_pop(s->vq, &req->elem)) { + qemu_free(req); + return NULL; + } + + return req; +} + +static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOBlock *s = to_virtio_blk(vdev); + VirtIOBlockReq *req; + + while ((req = virtio_blk_get_request(s))) { + int i; + + if (req->elem.out_num < 1 || req->elem.in_num < 1) { + fprintf(stderr, "virtio-blk missing headers\n"); + exit(1); + } + + if (req->elem.out_sg[0].iov_len < sizeof(*req->out) || + req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) { + fprintf(stderr, "virtio-blk header not in correct element\n"); + exit(1); + } + + req->out = (void *)req->elem.out_sg[0].iov_base; + req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base; + + if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) { + unsigned int len = sizeof(*req->in); + + req->in->status = VIRTIO_BLK_S_UNSUPP; + virtqueue_push(vq, &req->elem, len); + virtio_notify(vdev, vq); + qemu_free(req); + } else if (req->out->type & VIRTIO_BLK_T_OUT) { + size_t offset; + + for (i = 1; i < req->elem.out_num; i++) + req->size += req->elem.out_sg[i].iov_len; + + req->buffer = qemu_memalign(512, req->size); + if (req->buffer == NULL) { + qemu_free(req); + break; + } + + /* We copy the data from the SG list to avoid splitting up the request. This helps + performance a lot until we can pass full sg lists as AIO operations */ + offset = 0; + for (i = 1; i < req->elem.out_num; i++) { + size_t len; + + len = MIN(req->elem.out_sg[i].iov_len, + req->size - offset); + memcpy(req->buffer + offset, + req->elem.out_sg[i].iov_base, + len); + offset += len; + } + + bdrv_aio_write(s->bs, req->out->sector, + req->buffer, + req->size / 512, + virtio_blk_rw_complete, + req); + } else { + for (i = 0; i < req->elem.in_num - 1; i++) + req->size += req->elem.in_sg[i].iov_len; + + req->buffer = qemu_memalign(512, req->size); + if (req->buffer == NULL) { + qemu_free(req); + break; + } + + bdrv_aio_read(s->bs, req->out->sector, + req->buffer, + req->size / 512, + virtio_blk_rw_complete, + req); + } + } + /* + * FIXME: Want to check for completions before returning to guest mode, + * so cached reads and writes are reported as quickly as possible. But + * that should be done in the generic block layer. + */ +} + +static void virtio_blk_reset(VirtIODevice *vdev) +{ + /* + * This should cancel pending requests, but can't do nicely until there + * are per-device request lists. + */ + qemu_aio_flush(); +} + +static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) +{ + VirtIOBlock *s = to_virtio_blk(vdev); + struct virtio_blk_config blkcfg; + uint64_t capacity; + int cylinders, heads, secs; + + bdrv_get_geometry(s->bs, &capacity); + bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs); + stq_raw(&blkcfg.capacity, capacity); + stl_raw(&blkcfg.seg_max, 128 - 2); + stw_raw(&blkcfg.cylinders, cylinders); + blkcfg.heads = heads; + blkcfg.sectors = secs; + memcpy(config, &blkcfg, sizeof(blkcfg)); +} + +static uint32_t virtio_blk_get_features(VirtIODevice *vdev) +{ + return (1 << VIRTIO_BLK_F_SEG_MAX | 1 << VIRTIO_BLK_F_GEOMETRY); +} + +static void virtio_blk_save(QEMUFile *f, void *opaque) +{ + VirtIOBlock *s = opaque; + virtio_save(&s->vdev, f); +} + +static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) +{ + VirtIOBlock *s = opaque; + + if (version_id != 1) + return -EINVAL; + + virtio_load(&s->vdev, f); + + return 0; +} + +void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, + BlockDriverState *bs) +{ + VirtIOBlock *s; + int cylinders, heads, secs; + static int virtio_blk_id; + + s = (VirtIOBlock *)virtio_init_pci(bus, "virtio-blk", vendor, device, + 0, VIRTIO_ID_BLOCK, + 0x01, 0x80, 0x00, + sizeof(struct virtio_blk_config), sizeof(VirtIOBlock)); + if (!s) + return NULL; + + s->vdev.get_config = virtio_blk_update_config; + s->vdev.get_features = virtio_blk_get_features; + s->vdev.reset = virtio_blk_reset; + s->bs = bs; + bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs); + bdrv_set_geometry_hint(s->bs, cylinders, heads, secs); + + s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output); + + register_savevm("virtio-blk", virtio_blk_id++, 1, + virtio_blk_save, virtio_blk_load, s); + + return s; +} Added: trunk/hw/virtio-blk.h =================================================================== --- trunk/hw/virtio-blk.h (rev 0) +++ trunk/hw/virtio-blk.h 2008-12-04 19:52:44 UTC (rev 5870) @@ -0,0 +1,76 @@ +/* + * Virtio Block Device + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef _QEMU_VIRTIO_BLK_H +#define _QEMU_VIRTIO_BLK_H + +#include "virtio.h" +#include "block.h" +#include "pci.h" + +/* from Linux's linux/virtio_blk.h */ + +/* The ID for virtio_block */ +#define VIRTIO_ID_BLOCK 2 + +/* Feature bits */ +#define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */ +#define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */ +#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ +#define VIRTIO_BLK_F_GEOMETRY 4 /* Indicates support of legacy geometry */ + +struct virtio_blk_config +{ + uint64_t capacity; + uint32_t size_max; + uint32_t seg_max; + uint16_t cylinders; + uint8_t heads; + uint8_t sectors; +} __attribute__((packed)); + +/* These two define direction. */ +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1 + +/* This bit says it's a scsi command, not an actual read or write. */ +#define VIRTIO_BLK_T_SCSI_CMD 2 + +/* Barrier before this op. */ +#define VIRTIO_BLK_T_BARRIER 0x80000000 + +/* This is the first element of the read scatter-gather list. */ +struct virtio_blk_outhdr +{ + /* VIRTIO_BLK_T* */ + uint32_t type; + /* io priority. */ + uint32_t ioprio; + /* Sector (ie. 512 byte offset) */ + uint64_t sector; +}; + +#define VIRTIO_BLK_S_OK 0 +#define VIRTIO_BLK_S_IOERR 1 +#define VIRTIO_BLK_S_UNSUPP 2 + +/* This is the first element of the write scatter-gather list */ +struct virtio_blk_inhdr +{ + unsigned char status; +}; + +void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, + BlockDriverState *bs); + +#endif Modified: trunk/qemu-doc.texi =================================================================== --- trunk/qemu-doc.texi 2008-12-04 19:38:57 UTC (rev 5869) +++ trunk/qemu-doc.texi 2008-12-04 19:52:44 UTC (rev 5870) @@ -253,7 +253,7 @@ (for instance, "file=my,,file" to use file "my,file"). @item if=@var{interface} This option defines on which type on interface the drive is connected. -Available types are: ide, scsi, sd, mtd, floppy, pflash. +Available types are: ide, scsi, sd, mtd, floppy, pflash, virtio. @item bus=@var{bus},unit=@var{unit} These options define where is connected the drive by defining the bus number and the unit id. Modified: trunk/sysemu.h =================================================================== --- trunk/sysemu.h 2008-12-04 19:38:57 UTC (rev 5869) +++ trunk/sysemu.h 2008-12-04 19:52:44 UTC (rev 5870) @@ -123,7 +123,7 @@ #endif typedef enum { - IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD + IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO } BlockInterfaceType; typedef struct DriveInfo { Modified: trunk/vl.c =================================================================== --- trunk/vl.c 2008-12-04 19:38:57 UTC (rev 5869) +++ trunk/vl.c 2008-12-04 19:52:44 UTC (rev 5870) @@ -2267,7 +2267,10 @@ } else if (!strcmp(buf, "sd")) { type = IF_SD; max_devs = 0; - } else { + } else if (!strcmp(buf, "virtio")) { + type = IF_VIRTIO; + max_devs = 0; + } else { fprintf(stderr, "qemu: '%s' unsupported bus type '%s'\n", str, buf); return -1; } @@ -2474,6 +2477,7 @@ break; case IF_PFLASH: case IF_MTD: + case IF_VIRTIO: break; } if (!file[0])