From: Anthony Liguori <anthony@codemonkey.ws>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [6073] virtio-net support
Date: Wed, 17 Dec 2008 19:13:11 +0000 [thread overview]
Message-ID: <E1LD1pX-0007YR-DH@cvs.savannah.gnu.org> (raw)
Revision: 6073
http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=6073
Author: aliguori
Date: 2008-12-17 19:13:11 +0000 (Wed, 17 Dec 2008)
Log Message:
-----------
virtio-net support
This adds virtio-net support. This is based on the virtio-net driver
that exists in kvm-userspace. This also adds a new qemu_sendv_packet
which virtio-net requires.
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Modified Paths:
--------------
trunk/Makefile.target
trunk/hw/pci.c
trunk/net.c
trunk/net.h
Added Paths:
-----------
trunk/hw/virtio-net.c
trunk/hw/virtio-net.h
Modified: trunk/Makefile.target
===================================================================
--- trunk/Makefile.target 2008-12-17 19:00:18 UTC (rev 6072)
+++ trunk/Makefile.target 2008-12-17 19:13:11 UTC (rev 6073)
@@ -637,7 +637,7 @@
OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o
# virtio support
-OBJS+= virtio.o virtio-blk.o virtio-balloon.o
+OBJS+= virtio.o virtio-blk.o virtio-balloon.o virtio-net.o
CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
endif
ifeq ($(TARGET_BASE_ARCH), ppc)
@@ -664,7 +664,7 @@
OBJS+= kvm_ppc.o
endif
# virtio support
-OBJS+= virtio.o virtio-blk.o virtio-balloon.o
+OBJS+= virtio.o virtio-blk.o virtio-balloon.o virtio-net.o
endif
ifeq ($(TARGET_BASE_ARCH), mips)
OBJS+= mips_r4k.o mips_jazz.o mips_malta.o mips_mipssim.o
Modified: trunk/hw/pci.c
===================================================================
--- trunk/hw/pci.c 2008-12-17 19:00:18 UTC (rev 6072)
+++ trunk/hw/pci.c 2008-12-17 19:13:11 UTC (rev 6073)
@@ -25,6 +25,7 @@
#include "pci.h"
#include "console.h"
#include "net.h"
+#include "virtio-net.h"
//#define DEBUG_PCI
@@ -654,9 +655,11 @@
pci_e1000_init(bus, nd, devfn);
} else if (strcmp(nd->model, "pcnet") == 0) {
pci_pcnet_init(bus, nd, devfn);
+ } else if (strcmp(nd->model, "virtio") == 0) {
+ virtio_net_init(bus, nd, devfn);
} else if (strcmp(nd->model, "?") == 0) {
fprintf(stderr, "qemu: Supported PCI NICs: i82551 i82557b i82559er"
- " ne2k_pci pcnet rtl8139 e1000\n");
+ " ne2k_pci pcnet rtl8139 e1000 virtio\n");
exit (1);
} else {
fprintf(stderr, "qemu: Unsupported NIC: %s\n", nd->model);
Added: trunk/hw/virtio-net.c
===================================================================
--- trunk/hw/virtio-net.c (rev 0)
+++ trunk/hw/virtio-net.c 2008-12-17 19:13:11 UTC (rev 6073)
@@ -0,0 +1,329 @@
+/*
+ * Virtio Network Device
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "virtio.h"
+#include "net.h"
+#include "qemu-timer.h"
+#include "virtio-net.h"
+
+typedef struct VirtIONet
+{
+ VirtIODevice vdev;
+ uint8_t mac[6];
+ VirtQueue *rx_vq;
+ VirtQueue *tx_vq;
+ VLANClientState *vc;
+ QEMUTimer *tx_timer;
+ int tx_timer_active;
+ int mergeable_rx_bufs;
+} VirtIONet;
+
+/* TODO
+ * - we could suppress RX interrupt if we were so inclined.
+ */
+
+static VirtIONet *to_virtio_net(VirtIODevice *vdev)
+{
+ return (VirtIONet *)vdev;
+}
+
+static void virtio_net_update_config(VirtIODevice *vdev, uint8_t *config)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ struct virtio_net_config netcfg;
+
+ memcpy(netcfg.mac, n->mac, 6);
+ memcpy(config, &netcfg, sizeof(netcfg));
+}
+
+static uint32_t virtio_net_get_features(VirtIODevice *vdev)
+{
+ uint32_t features = (1 << VIRTIO_NET_F_MAC);
+
+ return features;
+}
+
+static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+
+ n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
+}
+
+/* RX */
+
+static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
+{
+}
+
+static int do_virtio_net_can_receive(VirtIONet *n, int bufsize)
+{
+ if (!virtio_queue_ready(n->rx_vq) ||
+ !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
+ return 0;
+
+ if (virtio_queue_empty(n->rx_vq) ||
+ (n->mergeable_rx_bufs &&
+ !virtqueue_avail_bytes(n->rx_vq, bufsize, 0))) {
+ virtio_queue_set_notification(n->rx_vq, 1);
+ return 0;
+ }
+
+ virtio_queue_set_notification(n->rx_vq, 0);
+ return 1;
+}
+
+static int virtio_net_can_receive(void *opaque)
+{
+ VirtIONet *n = opaque;
+
+ return do_virtio_net_can_receive(n, VIRTIO_NET_MAX_BUFSIZE);
+}
+
+static int iov_fill(struct iovec *iov, int iovcnt, const void *buf, int count)
+{
+ int offset, i;
+
+ offset = i = 0;
+ while (offset < count && i < iovcnt) {
+ int len = MIN(iov[i].iov_len, count - offset);
+ memcpy(iov[i].iov_base, buf + offset, len);
+ offset += len;
+ i++;
+ }
+
+ return offset;
+}
+
+static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
+ const void *buf, int size, int hdr_len)
+{
+ struct virtio_net_hdr *hdr = iov[0].iov_base;
+ int offset = 0;
+
+ hdr->flags = 0;
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+ /* We only ever receive a struct virtio_net_hdr from the tapfd,
+ * but we may be passing along a larger header to the guest.
+ */
+ iov[0].iov_base += hdr_len;
+ iov[0].iov_len -= hdr_len;
+
+ return offset;
+}
+
+static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
+{
+ VirtIONet *n = opaque;
+ struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
+ int hdr_len, offset, i;
+
+ if (!do_virtio_net_can_receive(n, size))
+ return;
+
+ /* hdr_len refers to the header we supply to the guest */
+ hdr_len = n->mergeable_rx_bufs ?
+ sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
+
+ offset = i = 0;
+
+ while (offset < size) {
+ VirtQueueElement elem;
+ int len, total;
+ struct iovec sg[VIRTQUEUE_MAX_SIZE];
+
+ len = total = 0;
+
+ if ((i != 0 && !n->mergeable_rx_bufs) ||
+ virtqueue_pop(n->rx_vq, &elem) == 0) {
+ if (i == 0)
+ return;
+ fprintf(stderr, "virtio-net truncating packet\n");
+ exit(1);
+ }
+
+ if (elem.in_num < 1) {
+ fprintf(stderr, "virtio-net receive queue contains no in buffers\n");
+ exit(1);
+ }
+
+ if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != hdr_len) {
+ fprintf(stderr, "virtio-net header not in first element\n");
+ exit(1);
+ }
+
+ memcpy(&sg, &elem.in_sg[0], sizeof(sg[0]) * elem.in_num);
+
+ if (i == 0) {
+ if (n->mergeable_rx_bufs)
+ mhdr = (struct virtio_net_hdr_mrg_rxbuf *)sg[0].iov_base;
+
+ offset += receive_header(n, sg, elem.in_num,
+ buf + offset, size - offset, hdr_len);
+ total += hdr_len;
+ }
+
+ /* copy in packet. ugh */
+ len = iov_fill(sg, elem.in_num,
+ buf + offset, size - offset);
+ total += len;
+
+ /* signal other side */
+ virtqueue_fill(n->rx_vq, &elem, total, i++);
+
+ offset += len;
+ }
+
+ if (mhdr)
+ mhdr->num_buffers = i;
+
+ virtqueue_flush(n->rx_vq, i);
+ virtio_notify(&n->vdev, n->rx_vq);
+}
+
+/* TX */
+static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
+{
+ VirtQueueElement elem;
+ int has_vnet_hdr = 0;
+
+ if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
+ return;
+
+ while (virtqueue_pop(vq, &elem)) {
+ ssize_t len = 0;
+ unsigned int out_num = elem.out_num;
+ struct iovec *out_sg = &elem.out_sg[0];
+ unsigned hdr_len;
+
+ /* hdr_len refers to the header received from the guest */
+ hdr_len = n->mergeable_rx_bufs ?
+ sizeof(struct virtio_net_hdr_mrg_rxbuf) :
+ sizeof(struct virtio_net_hdr);
+
+ if (out_num < 1 || out_sg->iov_len != hdr_len) {
+ fprintf(stderr, "virtio-net header not in first element\n");
+ exit(1);
+ }
+
+ /* ignore the header if GSO is not supported */
+ if (!has_vnet_hdr) {
+ out_num--;
+ out_sg++;
+ len += hdr_len;
+ } else if (n->mergeable_rx_bufs) {
+ /* tapfd expects a struct virtio_net_hdr */
+ hdr_len -= sizeof(struct virtio_net_hdr);
+ out_sg->iov_len -= hdr_len;
+ len += hdr_len;
+ }
+
+ len += qemu_sendv_packet(n->vc, out_sg, out_num);
+
+ virtqueue_push(vq, &elem, len);
+ virtio_notify(&n->vdev, vq);
+ }
+}
+
+static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+
+ if (n->tx_timer_active) {
+ virtio_queue_set_notification(vq, 1);
+ qemu_del_timer(n->tx_timer);
+ n->tx_timer_active = 0;
+ virtio_net_flush_tx(n, vq);
+ } else {
+ qemu_mod_timer(n->tx_timer,
+ qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
+ n->tx_timer_active = 1;
+ virtio_queue_set_notification(vq, 0);
+ }
+}
+
+static void virtio_net_tx_timer(void *opaque)
+{
+ VirtIONet *n = opaque;
+
+ n->tx_timer_active = 0;
+
+ /* Just in case the driver is not ready on more */
+ if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
+ return;
+
+ virtio_queue_set_notification(n->tx_vq, 1);
+ virtio_net_flush_tx(n, n->tx_vq);
+}
+
+static void virtio_net_save(QEMUFile *f, void *opaque)
+{
+ VirtIONet *n = opaque;
+
+ virtio_save(&n->vdev, f);
+
+ qemu_put_buffer(f, n->mac, 6);
+ qemu_put_be32(f, n->tx_timer_active);
+}
+
+static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
+{
+ VirtIONet *n = opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ virtio_load(&n->vdev, f);
+
+ qemu_get_buffer(f, n->mac, 6);
+ n->tx_timer_active = qemu_get_be32(f);
+
+ if (n->tx_timer_active) {
+ qemu_mod_timer(n->tx_timer,
+ qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
+ }
+
+ return 0;
+}
+
+PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
+{
+ VirtIONet *n;
+ static int virtio_net_id;
+
+ n = (VirtIONet *)virtio_init_pci(bus, "virtio-net", 6900, 0x1000,
+ 0, VIRTIO_ID_NET,
+ 0x02, 0x00, 0x00,
+ 6, sizeof(VirtIONet));
+ if (!n)
+ return NULL;
+
+ n->vdev.get_config = virtio_net_update_config;
+ n->vdev.get_features = virtio_net_get_features;
+ n->vdev.set_features = virtio_net_set_features;
+ n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
+ n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx);
+ memcpy(n->mac, nd->macaddr, 6);
+ n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
+ virtio_net_can_receive, n);
+
+ n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
+ n->tx_timer_active = 0;
+ n->mergeable_rx_bufs = 0;
+
+ register_savevm("virtio-net", virtio_net_id++, 1,
+ virtio_net_save, virtio_net_load, n);
+
+ return (PCIDevice *)n;
+}
Added: trunk/hw/virtio-net.h
===================================================================
--- trunk/hw/virtio-net.h (rev 0)
+++ trunk/hw/virtio-net.h 2008-12-17 19:13:11 UTC (rev 6073)
@@ -0,0 +1,80 @@
+/*
+ * Virtio Network Device
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _QEMU_VIRTIO_NET_H
+#define _QEMU_VIRTIO_NET_H
+
+#include "virtio.h"
+#include "net.h"
+#include "pci.h"
+
+/* from Linux's virtio_net.h */
+
+/* The ID for virtio_net */
+#define VIRTIO_ID_NET 1
+
+/* The feature bitmap for virtio net */
+#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
+#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
+#define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */
+#define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */
+
+#define TX_TIMER_INTERVAL 150000 /* 150 us */
+
+/* Maximum packet size we can receive from tap device: header + 64k */
+#define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10))
+
+/* The config defining mac address (6 bytes) */
+struct virtio_net_config
+{
+ uint8_t mac[6];
+} __attribute__((packed));
+
+/* This is the first element of the scatter-gather list. If you don't
+ * specify GSO or CSUM features, you can simply ignore the header. */
+struct virtio_net_hdr
+{
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset
+ uint8_t flags;
+#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame
+#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO)
+#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO)
+#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP
+#define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set
+ uint8_t gso_type;
+ uint16_t hdr_len;
+ uint16_t gso_size;
+ uint16_t csum_start;
+ uint16_t csum_offset;
+};
+
+/* This is the version of the header to use when the MRG_RXBUF
+ * feature has been negotiated. */
+struct virtio_net_hdr_mrg_rxbuf
+{
+ struct virtio_net_hdr hdr;
+ uint16_t num_buffers; /* Number of merged rx buffers */
+};
+
+PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn);
+
+#endif
Modified: trunk/net.c
===================================================================
--- trunk/net.c 2008-12-17 19:00:18 UTC (rev 6072)
+++ trunk/net.c 2008-12-17 19:13:11 UTC (rev 6073)
@@ -369,6 +369,50 @@
}
}
+static ssize_t vc_sendv_compat(VLANClientState *vc, const struct iovec *iov,
+ int iovcnt)
+{
+ uint8_t buffer[4096];
+ size_t offset = 0;
+ int i;
+
+ for (i = 0; i < iovcnt; i++) {
+ size_t len;
+
+ len = MIN(sizeof(buffer) - offset, iov[i].iov_len);
+ memcpy(buffer + offset, iov[i].iov_base, len);
+ offset += len;
+ }
+
+ vc->fd_read(vc->opaque, buffer, offset);
+
+ return offset;
+}
+
+ssize_t qemu_sendv_packet(VLANClientState *vc1, const struct iovec *iov,
+ int iovcnt)
+{
+ VLANState *vlan = vc1->vlan;
+ VLANClientState *vc;
+ ssize_t max_len = 0;
+
+ for (vc = vlan->first_client; vc != NULL; vc = vc->next) {
+ ssize_t len = 0;
+
+ if (vc == vc1)
+ continue;
+
+ if (vc->fd_readv)
+ len = vc->fd_readv(vc->opaque, iov, iovcnt);
+ else if (vc->fd_read)
+ len = vc_sendv_compat(vc, iov, iovcnt);
+
+ max_len = MAX(max_len, len);
+ }
+
+ return max_len;
+}
+
#if defined(CONFIG_SLIRP)
/* slirp network adapter */
Modified: trunk/net.h
===================================================================
--- trunk/net.h 2008-12-17 19:00:18 UTC (rev 6072)
+++ trunk/net.h 2008-12-17 19:13:11 UTC (rev 6073)
@@ -1,12 +1,17 @@
#ifndef QEMU_NET_H
#define QEMU_NET_H
+#include "qemu-common.h"
+
/* VLANs support */
+typedef ssize_t (IOReadvHandler)(void *, const struct iovec *, int);
+
typedef struct VLANClientState VLANClientState;
struct VLANClientState {
IOReadHandler *fd_read;
+ IOReadvHandler *fd_readv;
/* Packets may still be sent if this returns zero. It's used to
rate-limit the slirp code. */
IOCanRWHandler *fd_can_read;
@@ -30,6 +35,8 @@
void *opaque);
void qemu_del_vlan_client(VLANClientState *vc);
int qemu_can_send_packet(VLANClientState *vc);
+ssize_t qemu_sendv_packet(VLANClientState *vc, const struct iovec *iov,
+ int iovcnt);
void qemu_send_packet(VLANClientState *vc, const uint8_t *buf, int size);
void qemu_handler_true(void *opaque);
next reply other threads:[~2008-12-17 19:13 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-12-17 19:13 Anthony Liguori [this message]
2008-12-18 0:16 ` [Qemu-devel] Re: [6073] virtio-net support Consul
2008-12-18 1:57 ` Anthony Liguori
2008-12-18 16:54 ` Consul
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=E1LD1pX-0007YR-DH@cvs.savannah.gnu.org \
--to=anthony@codemonkey.ws \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.