[PATCH] kvm tools: Implement virtio network device

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

From: Asias He <asias.hejun@gmail.com>
To: Pekka Enberg <penberg@kernel.org>,
	Cyrill Gorcunov <gorcunov@gmail.com>, Ingo Molnar <mingo@elte.hu>
Cc: kvm@vger.kernel.org, Asias He <asias.hejun@gmail.com>
Subject: [PATCH] kvm tools: Implement virtio network device
Date: Wed, 13 Apr 2011 19:48:48 +0800	[thread overview]
Message-ID: <1302695328-3503-1-git-send-email-asias.hejun@gmail.com> (raw)

This patch implement virtio network device.
Use '-n virtio or --network=virtio' to enable it.

The current implementation uses tap which needs root privileges to create a
virtual network device (tap0) on host side. Actually, what we need is
CAP_NET_ADMIN.

The host side tap0 is set to 192.168.33.2/24.
You need to configure the guest side eth0 to any ip address in
192.168.33.0/24.

Here are some scp performance test for differenct implementations:
None of rx and tx as thread:
guest to host 3.2MB/s
host  to guest 3.1MB/s

Only rx as thread:
guest to host  14.7MB/s
host  to guest 33.4MB/s

Both rx and tx as thread(This patch works this way):
guest to host  19.8MB/s
host  to guest 32.5MB/s

Signed-off-by: Asias He <asias.hejun@gmail.com>
---
 tools/kvm/Makefile                 |    1 +
 tools/kvm/include/kvm/ioport.h     |    2 +
 tools/kvm/include/kvm/types.h      |    7 +
 tools/kvm/include/kvm/virtio-net.h |    7 +
 tools/kvm/kvm-run.c                |   11 ++
 tools/kvm/virtio-net.c             |  318 ++++++++++++++++++++++++++++++++++++
 6 files changed, 346 insertions(+), 0 deletions(-)
 create mode 100644 tools/kvm/include/kvm/types.h
 create mode 100644 tools/kvm/include/kvm/virtio-net.h
 create mode 100644 tools/kvm/virtio-net.c

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 7a2863d..6895113 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -14,6 +14,7 @@ TAGS = ctags
 
 OBJS	+= 8250-serial.o
 OBJS	+= virtio-blk.o
+OBJS	+= virtio-net.o
 OBJS	+= virtio-console.o
 OBJS	+= cpuid.o
 OBJS	+= read-write.o
diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 0218329..2fdcca4 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -10,6 +10,8 @@
 #define IOPORT_VIRTIO_BLK_SIZE		256
 #define IOPORT_VIRTIO_CONSOLE		0xd200	/* Virtio console device */
 #define IOPORT_VIRTIO_CONSOLE_SIZE	256
+#define IOPORT_VIRTIO_NET		0xe200	/* Virtio network device */
+#define IOPORT_VIRTIO_NET_SIZE		256
 
 struct kvm;
 
diff --git a/tools/kvm/include/kvm/types.h b/tools/kvm/include/kvm/types.h
new file mode 100644
index 0000000..0cbc5fb
--- /dev/null
+++ b/tools/kvm/include/kvm/types.h
@@ -0,0 +1,7 @@
+#ifndef KVM_TYPES_H
+#define KVM_TYPES_H
+
+/* FIXME: include/linux/if_tun.h and include/linux/if_ether.h complains */
+#define __be16 u16
+
+#endif /* KVM_TYPES_H */
diff --git a/tools/kvm/include/kvm/virtio-net.h b/tools/kvm/include/kvm/virtio-net.h
new file mode 100644
index 0000000..a1cab15
--- /dev/null
+++ b/tools/kvm/include/kvm/virtio-net.h
@@ -0,0 +1,7 @@
+#ifndef KVM__VIRTIO_NET_H
+#define KVM__VIRTIO_NET_H
+
+struct kvm;
+void virtio_net__init(struct kvm *self);
+
+#endif /* KVM__VIRTIO_NET_H */
diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c
index 65c4787..6046a0a 100644
--- a/tools/kvm/kvm-run.c
+++ b/tools/kvm/kvm-run.c
@@ -16,6 +16,7 @@
 #include <kvm/kvm-cpu.h>
 #include <kvm/8250-serial.h>
 #include <kvm/virtio-blk.h>
+#include <kvm/virtio-net.h>
 #include <kvm/virtio-console.h>
 #include <kvm/disk-image.h>
 #include <kvm/util.h>
@@ -29,6 +30,7 @@
 
 #define DEFAULT_KVM_DEV		"/dev/kvm"
 #define DEFAULT_CONSOLE		"serial"
+#define DEFAULT_NETWORK		"none"
 
 #define MB_SHIFT		(20)
 #define MIN_RAM_SIZE_MB		(64ULL)
@@ -63,6 +65,7 @@ static const char *initrd_filename;
 static const char *image_filename;
 static const char *console;
 static const char *kvm_dev;
+static const char *network;
 static bool single_step;
 static bool readonly_image;
 extern bool ioport_debug;
@@ -84,6 +87,8 @@ static const struct option options[] = {
 			"Don't write changes back to disk image"),
 	OPT_STRING('c', "console", &console, "serial or virtio",
 			"Console to use"),
+	OPT_STRING('n', "network", &network, "virtio",
+			"Network to use"),
 
 	OPT_GROUP("Kernel options:"),
 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
@@ -250,6 +255,12 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix)
 
 	virtio_console__init(kvm);
 
+	if (!network)
+		network = DEFAULT_NETWORK;
+
+	if (!strncmp(network, "virtio", 6))
+		virtio_net__init(kvm);
+
 	kvm__start_timer(kvm);
 
 	for (i = 0; i < nrcpus; i++) {
diff --git a/tools/kvm/virtio-net.c b/tools/kvm/virtio-net.c
new file mode 100644
index 0000000..ec70d5c
--- /dev/null
+++ b/tools/kvm/virtio-net.c
@@ -0,0 +1,318 @@
+#include "kvm/virtio-net.h"
+#include "kvm/virtio-pci.h"
+#include "kvm/virtio.h"
+#include "kvm/ioport.h"
+#include "kvm/types.h"
+#include "kvm/mutex.h"
+#include "kvm/util.h"
+#include "kvm/kvm.h"
+#include "kvm/pci.h"
+
+#include <linux/virtio_net.h>
+#include <linux/if_tun.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <fcntl.h>
+
+#define VIRTIO_NET_IRQ		14
+#define VIRTIO_NET_QUEUE_SIZE	128
+#define VIRTIO_NET_NUM_QUEUES	2
+#define VIRTIO_NET_RX_QUEUE	0
+#define VIRTIO_NET_TX_QUEUE	1
+#define PCI_VIRTIO_NET_DEVNUM	3
+
+struct net_device {
+	pthread_mutex_t			mutex;
+
+	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES];
+	struct virtio_net_config	net_config;
+	uint32_t			host_features;
+	uint32_t			guest_features;
+	uint16_t			config_vector;
+	uint8_t				status;
+	uint16_t			queue_selector;
+
+	pthread_t			io_rx_thread;
+	pthread_mutex_t			io_rx_mutex;
+	pthread_cond_t			io_rx_cond;
+
+	pthread_t			io_tx_thread;
+	pthread_mutex_t			io_tx_mutex;
+	pthread_cond_t			io_tx_cond;
+
+	int				tap_fd;
+	char				tap_name[IFNAMSIZ];
+};
+
+static struct net_device net_device = {
+	.mutex			= PTHREAD_MUTEX_INITIALIZER,
+
+	.net_config = {
+		.mac		= {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
+		.status		= VIRTIO_NET_S_LINK_UP,
+	},
+
+	.host_features		= 1UL << VIRTIO_NET_F_MAC,
+};
+
+static void *virtio_net_rx_thread(void *p)
+{
+	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
+	struct virt_queue *vq;
+	struct kvm *self;
+	uint16_t out, in;
+	uint16_t head;
+	int len;
+
+	self = p;
+	vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE];
+
+	while (1) {
+		mutex_lock(&net_device.io_rx_mutex);
+		if (!virt_queue__available(vq))
+			pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex);
+		mutex_unlock(&net_device.io_rx_mutex);
+
+		while (virt_queue__available(vq)) {
+			head = virt_queue__get_iov(vq, iov, &out, &in, self);
+
+			/* We do not specify GSO or CSUM features, So we can ignore virtio_net_hdr */
+			len = readv(net_device.tap_fd, iov + 1, in - 1);
+
+			/* However, We have to tell guest we have write the virtio_net_hdr */
+			virt_queue__set_used_elem(vq, head, sizeof(struct virtio_net_hdr) + len);
+
+			/* We should interrupt guest right now, otherwise latency is huge. */
+			kvm__irq_line(self, VIRTIO_NET_IRQ, 1);
+		}
+
+	}
+
+	pthread_exit(NULL);
+	return NULL;
+
+}
+
+static void *virtio_net_tx_thread(void *p)
+{
+	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
+	struct virt_queue *vq;
+	struct kvm *self;
+	uint16_t out, in;
+	uint16_t head;
+	int len;
+
+	self = p;
+	vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE];
+
+	while (1) {
+		mutex_lock(&net_device.io_tx_mutex);
+		if (!virt_queue__available(vq))
+			pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex);
+		mutex_unlock(&net_device.io_tx_mutex);
+
+		while (virt_queue__available(vq)) {
+			head = virt_queue__get_iov(vq, iov, &out, &in, self);
+			len = writev(net_device.tap_fd, iov + 1, out - 1);
+			virt_queue__set_used_elem(vq, head, len);
+		}
+
+		kvm__irq_line(self, VIRTIO_NET_IRQ, 1);
+	}
+
+	pthread_exit(NULL);
+	return NULL;
+
+}
+static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count)
+{
+	uint8_t *config_space = (uint8_t *) &net_device.net_config;
+
+	if (size != 1 || count != 1)
+		return false;
+
+	if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config))
+		error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI);
+
+	ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]);
+
+	return true;
+}
+
+static bool virtio_net_pci_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
+{
+	unsigned long offset = port - IOPORT_VIRTIO_NET;
+	bool ret = true;
+
+	mutex_lock(&net_device.mutex);
+
+	switch (offset) {
+	case VIRTIO_PCI_HOST_FEATURES:
+		ioport__write32(data, net_device.host_features);
+		break;
+	case VIRTIO_PCI_GUEST_FEATURES:
+		ret = false;
+		break;
+	case VIRTIO_PCI_QUEUE_PFN:
+		ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn);
+		break;
+	case VIRTIO_PCI_QUEUE_NUM:
+		ioport__write16(data, VIRTIO_NET_QUEUE_SIZE);
+		break;
+	case VIRTIO_PCI_QUEUE_SEL:
+	case VIRTIO_PCI_QUEUE_NOTIFY:
+		ret = false;
+		break;
+	case VIRTIO_PCI_STATUS:
+		ioport__write8(data, net_device.status);
+		break;
+	case VIRTIO_PCI_ISR:
+		ioport__write8(data, 0x1);
+		kvm__irq_line(self, VIRTIO_NET_IRQ, 0);
+		break;
+	case VIRTIO_MSI_CONFIG_VECTOR:
+		ioport__write16(data, net_device.config_vector);
+		break;
+	default:
+		ret = virtio_net_pci_io_device_specific_in(data, offset, size, count);
+	};
+
+	mutex_unlock(&net_device.mutex);
+
+	return ret;
+}
+
+static void virtio_net_handle_callback(struct kvm *self, uint16_t queue_index)
+{
+	if (queue_index == VIRTIO_NET_TX_QUEUE) {
+
+		mutex_lock(&net_device.io_tx_mutex);
+		pthread_cond_signal(&net_device.io_tx_cond);
+		mutex_unlock(&net_device.io_tx_mutex);
+
+	} else if (queue_index == VIRTIO_NET_RX_QUEUE) {
+
+		mutex_lock(&net_device.io_rx_mutex);
+		pthread_cond_signal(&net_device.io_rx_cond);
+		mutex_unlock(&net_device.io_rx_mutex);
+
+	}
+}
+
+static bool virtio_net_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
+{
+	unsigned long offset = port - IOPORT_VIRTIO_NET;
+	bool ret = true;
+
+	mutex_lock(&net_device.mutex);
+
+	switch (offset) {
+	case VIRTIO_PCI_GUEST_FEATURES:
+		net_device.guest_features	= ioport__read32(data);
+		break;
+	case VIRTIO_PCI_QUEUE_PFN: {
+		struct virt_queue *queue;
+		void *p;
+
+		assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES);
+
+		queue		= &net_device.vqs[net_device.queue_selector];
+		queue->pfn	= ioport__read32(data);
+		p		= guest_flat_to_host(self, queue->pfn << 12);
+
+		vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096);
+
+		break;
+	}
+	case VIRTIO_PCI_QUEUE_SEL:
+		net_device.queue_selector	= ioport__read16(data);
+		break;
+	case VIRTIO_PCI_QUEUE_NOTIFY: {
+		uint16_t queue_index;
+		queue_index	= ioport__read16(data);
+		virtio_net_handle_callback(self, queue_index);
+		break;
+	}
+	case VIRTIO_PCI_STATUS:
+		net_device.status		= ioport__read8(data);
+		break;
+	case VIRTIO_MSI_CONFIG_VECTOR:
+		net_device.config_vector	= VIRTIO_MSI_NO_VECTOR;
+		break;
+	case VIRTIO_MSI_QUEUE_VECTOR:
+		break;
+	default:
+		ret = false;
+	};
+
+	mutex_unlock(&net_device.mutex);
+	return ret;
+}
+
+static struct ioport_operations virtio_net_io_ops = {
+	.io_in	= virtio_net_pci_io_in,
+	.io_out	= virtio_net_pci_io_out,
+};
+
+#define PCI_VENDOR_ID_REDHAT_QUMRANET		0x1af4
+#define PCI_DEVICE_ID_VIRTIO_NET		0x1000
+#define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET	0x1af4
+#define PCI_SUBSYSTEM_ID_VIRTIO_NET		0x0001
+
+static struct pci_device_header virtio_net_pci_device = {
+	.vendor_id		= PCI_VENDOR_ID_REDHAT_QUMRANET,
+	.device_id		= PCI_DEVICE_ID_VIRTIO_NET,
+	.header_type		= PCI_HEADER_TYPE_NORMAL,
+	.revision_id		= 0,
+	.class			= 0x020000,
+	.subsys_vendor_id	= PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
+	.subsys_id		= PCI_SUBSYSTEM_ID_VIRTIO_NET,
+	.bar[0]			= IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO,
+	.irq_pin		= 3,
+	.irq_line		= VIRTIO_NET_IRQ,
+};
+
+static void virtio_net__tap_init(void)
+{
+	struct ifreq ifr;
+
+	net_device.tap_fd = open("/dev/net/tun", O_RDWR);
+	if (net_device.tap_fd < 0)
+		die("Unable to open /dev/net/tun\n");
+
+	memset(&ifr, 0, sizeof(ifr));
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+	if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0)
+		die("Config tap device error. Are you root?");
+
+	strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name));
+
+	ioctl(net_device.tap_fd, TUNSETNOCSUM, 1);
+
+	/*FIXME: Remove this after user can specify ip address and netmask*/
+	if (system("ifconfig tap0 192.168.33.2") < 0)
+		warning("Can not set ip address on tap0");
+}
+
+static void virtio_net__io_thread_init(struct kvm *self)
+{
+	pthread_mutex_init(&net_device.io_rx_mutex, NULL);
+	pthread_cond_init(&net_device.io_tx_cond, NULL);
+
+	pthread_mutex_init(&net_device.io_rx_mutex, NULL);
+	pthread_cond_init(&net_device.io_tx_cond, NULL);
+
+	pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self);
+	pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self);
+}
+
+void virtio_net__init(struct kvm *self)
+{
+	pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM);
+	ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE);
+
+	virtio_net__tap_init();
+	virtio_net__io_thread_init(self);
+}
-- 
1.7.4.1

next             reply	other threads:[~2011-04-13 11:50 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-13 11:48 Asias He [this message]
2011-04-13 11:51 ` [PATCH] kvm tools: Implement virtio network device Pekka Enberg
2011-04-13 12:00   ` Asias He
2011-04-13 13:02     ` [transparent networking] " Ingo Molnar
2011-04-13 13:33       ` Avi Kivity
2011-04-13 13:38         ` Avi Kivity
2011-04-13 13:39         ` Asias He
2011-04-13 16:21       ` Stefan Hajnoczi

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:7a2863d dfblob:6895113 dfblob:0218329 dfblob:2fdcca4
dfblob:0cbc5fb dfblob:a1cab15 dfblob:65c4787 dfblob:6046a0a
dfblob:ec70d5c )
 OR (
bs:"[PATCH] kvm tools: Implement virtio network device" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1302695328-3503-1-git-send-email-asias.hejun@gmail.com \
    --to=asias.hejun@gmail.com \
    --cc=gorcunov@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=penberg@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox